diff --git a/dep/CMakeLists.txt b/dep/CMakeLists.txt index 3846d78c7..b5dc473a7 100644 --- a/dep/CMakeLists.txt +++ b/dep/CMakeLists.txt @@ -15,6 +15,7 @@ add_subdirectory(vulkan) add_subdirectory(soundtouch) add_subdirectory(tinyxml2) add_subdirectory(googletest) +add_subdirectory(cpuinfo) if(ENABLE_CUBEB) add_subdirectory(cubeb) diff --git a/dep/cpuinfo/.gitignore b/dep/cpuinfo/.gitignore new file mode 100644 index 000000000..f069f195f --- /dev/null +++ b/dep/cpuinfo/.gitignore @@ -0,0 +1,21 @@ +# Ninja files +build.ninja + +# Build objects and artifacts +deps/ +build/ +bin/ +lib/ +libs/ +obj/ +*.pyc +*.pyo + +# System files +.DS_Store +.DS_Store? +._* +.Spotlight-V100 +.Trashes +ehthumbs.db +Thumbs.db diff --git a/dep/cpuinfo/CMakeLists.txt b/dep/cpuinfo/CMakeLists.txt new file mode 100644 index 000000000..e965fe9bb --- /dev/null +++ b/dep/cpuinfo/CMakeLists.txt @@ -0,0 +1,223 @@ +CMAKE_MINIMUM_REQUIRED(VERSION 3.5 FATAL_ERROR) + +# ---[ Setup project +PROJECT( + cpuinfo + LANGUAGES C CXX + ) + +# ---[ Options. +SET(CPUINFO_LIBRARY_TYPE "default" CACHE STRING "Type of cpuinfo library (shared, static, or default) to build") +SET_PROPERTY(CACHE CPUINFO_LIBRARY_TYPE PROPERTY STRINGS default static shared) +SET(CPUINFO_RUNTIME_TYPE "default" CACHE STRING "Type of runtime library (shared, static, or default) to use") +SET_PROPERTY(CACHE CPUINFO_RUNTIME_TYPE PROPERTY STRINGS default static shared) +SET(CPUINFO_LOG_LEVEL "default" CACHE STRING "Minimum logging level (info with lower severity will be ignored)") +SET_PROPERTY(CACHE CPUINFO_LOG_LEVEL PROPERTY STRINGS default debug info warning error fatal none) + +MACRO(CPUINFO_TARGET_ENABLE_C99 target) + SET_TARGET_PROPERTIES(${target} PROPERTIES + C_STANDARD 99 + C_EXTENSIONS NO) +ENDMACRO() + +MACRO(CPUINFO_TARGET_ENABLE_CXX11 target) + SET_TARGET_PROPERTIES(${target} PROPERTIES + CXX_STANDARD 11 + CXX_EXTENSIONS NO) +ENDMACRO() + +MACRO(CPUINFO_TARGET_RUNTIME_LIBRARY target) + IF(MSVC AND NOT CPUINFO_RUNTIME_TYPE STREQUAL "default") + IF(CPUINFO_RUNTIME_TYPE STREQUAL "shared") + TARGET_COMPILE_OPTIONS(${target} PRIVATE + "/MD$<$:d>") + ELSEIF(CPUINFO_RUNTIME_TYPE STREQUAL "static") + TARGET_COMPILE_OPTIONS(${target} PRIVATE + "/MT$<$:d>") + ENDIF() + ENDIF() +ENDMACRO() + +# -- [ Determine target processor +SET(CPUINFO_TARGET_PROCESSOR "${CMAKE_SYSTEM_PROCESSOR}") +IF(CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_OSX_ARCHITECTURES MATCHES "^(x86_64|arm64)$") + SET(CPUINFO_TARGET_PROCESSOR "${CMAKE_OSX_ARCHITECTURES}") +ENDIF() + +# ---[ Build flags +SET(CPUINFO_SUPPORTED_PLATFORM TRUE) +IF(NOT CMAKE_SYSTEM_PROCESSOR) + IF(NOT IOS) + MESSAGE(WARNING + "Target processor architecture is not specified. " + "cpuinfo will compile, but cpuinfo_initialize() will always fail.") + SET(CPUINFO_SUPPORTED_PLATFORM FALSE) + ENDIF() +ELSEIF(NOT CPUINFO_TARGET_PROCESSOR MATCHES "^(i[3-6]86|AMD64|x86(_64)?|armv[5-8].*|aarch64|arm64)$") + MESSAGE(WARNING + "Target processor architecture \"${CPUINFO_TARGET_PROCESSOR}\" is not supported in cpuinfo. " + "cpuinfo will compile, but cpuinfo_initialize() will always fail.") + SET(CPUINFO_SUPPORTED_PLATFORM FALSE) +ENDIF() + +IF(NOT CMAKE_SYSTEM_NAME) + MESSAGE(WARNING + "Target operating system is not specified. " + "cpuinfo will compile, but cpuinfo_initialize() will always fail.") + SET(CPUINFO_SUPPORTED_PLATFORM FALSE) +ELSEIF(NOT CMAKE_SYSTEM_NAME MATCHES "^(Windows|CYGWIN|MSYS|Darwin|Linux|Android)$") + IF(${CMAKE_VERSION} VERSION_GREATER_EQUAL "3.14" AND NOT CMAKE_SYSTEM_NAME STREQUAL "iOS") + MESSAGE(WARNING + "Target operating system \"${CMAKE_SYSTEM_NAME}\" is not supported in cpuinfo. " + "cpuinfo will compile, but cpuinfo_initialize() will always fail.") + SET(CPUINFO_SUPPORTED_PLATFORM FALSE) + ENDIF() +ENDIF() + +# ---[ cpuinfo library +SET(CPUINFO_SRCS + src/init.c + src/api.c + src/cache.c) + +IF(CPUINFO_SUPPORTED_PLATFORM) + IF(NOT CMAKE_SYSTEM_NAME STREQUAL "Emscripten" AND (CPUINFO_TARGET_PROCESSOR MATCHES "^(i[3-6]86|AMD64|x86(_64)?)$" OR IOS_ARCH MATCHES "^(i386|x86_64)$")) + LIST(APPEND CPUINFO_SRCS + src/x86/init.c + src/x86/info.c + src/x86/vendor.c + src/x86/uarch.c + src/x86/name.c + src/x86/topology.c + src/x86/isa.c + src/x86/cache/init.c + src/x86/cache/descriptor.c + src/x86/cache/deterministic.c) + IF(CMAKE_SYSTEM_NAME STREQUAL "Linux" OR CMAKE_SYSTEM_NAME STREQUAL "Android") + LIST(APPEND CPUINFO_SRCS + src/x86/linux/init.c + src/x86/linux/cpuinfo.c) + ELSEIF(CMAKE_SYSTEM_NAME STREQUAL "Darwin" OR CMAKE_SYSTEM_NAME STREQUAL "iOS") + LIST(APPEND CPUINFO_SRCS src/x86/mach/init.c) + ELSEIF(CMAKE_SYSTEM_NAME MATCHES "^(Windows|CYGWIN|MSYS)$") + LIST(APPEND CPUINFO_SRCS src/x86/windows/init.c) + ENDIF() + ELSEIF(CPUINFO_TARGET_PROCESSOR MATCHES "^(armv[5-8].*|aarch64|arm64)$" OR IOS_ARCH MATCHES "^(armv7.*|arm64.*)$") + LIST(APPEND CPUINFO_SRCS + src/arm/uarch.c + src/arm/cache.c) + IF(CMAKE_SYSTEM_NAME STREQUAL "Linux" OR CMAKE_SYSTEM_NAME STREQUAL "Android") + LIST(APPEND CPUINFO_SRCS + src/arm/linux/init.c + src/arm/linux/cpuinfo.c + src/arm/linux/clusters.c + src/arm/linux/chipset.c + src/arm/linux/midr.c + src/arm/linux/hwcap.c) + IF(CMAKE_SYSTEM_PROCESSOR MATCHES "^armv[5-8]") + LIST(APPEND CPUINFO_SRCS src/arm/linux/aarch32-isa.c) + IF(CMAKE_SYSTEM_NAME STREQUAL "Android" AND ANDROID_ABI STREQUAL "armeabi") + SET_SOURCE_FILES_PROPERTIES(src/arm/linux/aarch32-isa.c PROPERTIES COMPILE_FLAGS -marm) + ENDIF() + ELSEIF(CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64|arm64)$") + LIST(APPEND CPUINFO_SRCS src/arm/linux/aarch64-isa.c) + ENDIF() + ELSEIF(IOS OR (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CPUINFO_TARGET_PROCESSOR STREQUAL "arm64")) + LIST(APPEND CPUINFO_SRCS src/arm/mach/init.c) + ENDIF() + IF(CMAKE_SYSTEM_NAME STREQUAL "Android") + LIST(APPEND CPUINFO_SRCS + src/arm/android/properties.c) + ENDIF() + ENDIF() + + IF(CMAKE_SYSTEM_NAME STREQUAL "Emscripten") + LIST(APPEND CPUINFO_SRCS + src/emscripten/init.c) + ENDIF() + + IF(CMAKE_SYSTEM_NAME STREQUAL "Linux" OR CMAKE_SYSTEM_NAME STREQUAL "Android") + LIST(APPEND CPUINFO_SRCS + src/linux/smallfile.c + src/linux/multiline.c + src/linux/cpulist.c + src/linux/processors.c) + ELSEIF(CMAKE_SYSTEM_NAME STREQUAL "Darwin" OR CMAKE_SYSTEM_NAME STREQUAL "iOS") + LIST(APPEND CPUINFO_SRCS src/mach/topology.c) + ENDIF() + + IF(CMAKE_SYSTEM_NAME STREQUAL "Linux" OR CMAKE_SYSTEM_NAME STREQUAL "Android") + SET(CMAKE_THREAD_PREFER_PTHREAD TRUE) + SET(THREADS_PREFER_PTHREAD_FLAG TRUE) + FIND_PACKAGE(Threads REQUIRED) + ENDIF() +ENDIF() + +IF(CPUINFO_LIBRARY_TYPE STREQUAL "default") + ADD_LIBRARY(cpuinfo ${CPUINFO_SRCS}) +ELSEIF(CPUINFO_LIBRARY_TYPE STREQUAL "shared") + ADD_LIBRARY(cpuinfo SHARED ${CPUINFO_SRCS}) +ELSEIF(CPUINFO_LIBRARY_TYPE STREQUAL "static") + ADD_LIBRARY(cpuinfo STATIC ${CPUINFO_SRCS}) +ELSE() + MESSAGE(FATAL_ERROR "Unsupported library type ${CPUINFO_LIBRARY_TYPE}") +ENDIF() +ADD_LIBRARY(cpuinfo_internals STATIC ${CPUINFO_SRCS}) +CPUINFO_TARGET_ENABLE_C99(cpuinfo) +CPUINFO_TARGET_ENABLE_C99(cpuinfo_internals) +CPUINFO_TARGET_RUNTIME_LIBRARY(cpuinfo) +IF(CMAKE_SYSTEM_NAME MATCHES "^(Windows|CYGWIN|MSYS)$") + # Target Windows 7+ API + TARGET_COMPILE_DEFINITIONS(cpuinfo PRIVATE _WIN32_WINNT=0x0601) + TARGET_COMPILE_DEFINITIONS(cpuinfo_internals PRIVATE _WIN32_WINNT=0x0601) +ENDIF() +SET_TARGET_PROPERTIES(cpuinfo PROPERTIES PUBLIC_HEADER include/cpuinfo.h) +TARGET_INCLUDE_DIRECTORIES(cpuinfo BEFORE PUBLIC include) +TARGET_INCLUDE_DIRECTORIES(cpuinfo BEFORE PRIVATE src) +TARGET_INCLUDE_DIRECTORIES(cpuinfo_internals BEFORE PUBLIC include src) +IF(CPUINFO_LOG_LEVEL STREQUAL "default") + # default logging level: error (subject to change) + TARGET_COMPILE_DEFINITIONS(cpuinfo PRIVATE CPUINFO_LOG_LEVEL=2) +ELSEIF(CPUINFO_LOG_LEVEL STREQUAL "debug") + TARGET_COMPILE_DEFINITIONS(cpuinfo PRIVATE CPUINFO_LOG_LEVEL=5) +ELSEIF(CPUINFO_LOG_LEVEL STREQUAL "info") + TARGET_COMPILE_DEFINITIONS(cpuinfo PRIVATE CPUINFO_LOG_LEVEL=4) +ELSEIF(CPUINFO_LOG_LEVEL STREQUAL "warning") + TARGET_COMPILE_DEFINITIONS(cpuinfo PRIVATE CPUINFO_LOG_LEVEL=3) +ELSEIF(CPUINFO_LOG_LEVEL STREQUAL "error") + TARGET_COMPILE_DEFINITIONS(cpuinfo PRIVATE CPUINFO_LOG_LEVEL=2) +ELSEIF(CPUINFO_LOG_LEVEL STREQUAL "fatal") + TARGET_COMPILE_DEFINITIONS(cpuinfo PRIVATE CPUINFO_LOG_LEVEL=1) +ELSEIF(CPUINFO_LOG_LEVEL STREQUAL "none") + TARGET_COMPILE_DEFINITIONS(cpuinfo PRIVATE CPUINFO_LOG_LEVEL=0) +ELSE() + MESSAGE(FATAL_ERROR "Unsupported logging level ${CPUINFO_LOG_LEVEL}") +ENDIF() +TARGET_COMPILE_DEFINITIONS(cpuinfo_internals PRIVATE CPUINFO_LOG_LEVEL=0) + +IF(CPUINFO_SUPPORTED_PLATFORM) + TARGET_COMPILE_DEFINITIONS(cpuinfo INTERFACE CPUINFO_SUPPORTED_PLATFORM=1) + IF(CMAKE_SYSTEM_NAME STREQUAL "Linux" OR CMAKE_SYSTEM_NAME STREQUAL "Android") + TARGET_LINK_LIBRARIES(cpuinfo PUBLIC ${CMAKE_THREAD_LIBS_INIT}) + TARGET_LINK_LIBRARIES(cpuinfo_internals PUBLIC ${CMAKE_THREAD_LIBS_INIT}) + TARGET_COMPILE_DEFINITIONS(cpuinfo PRIVATE _GNU_SOURCE=1) + TARGET_COMPILE_DEFINITIONS(cpuinfo_internals PRIVATE _GNU_SOURCE=1) + ENDIF() +ELSE() + TARGET_COMPILE_DEFINITIONS(cpuinfo INTERFACE CPUINFO_SUPPORTED_PLATFORM=0) +ENDIF() + +# ---[ cpuinfo dependencies: clog +IF(NOT DEFINED CLOG_SOURCE_DIR) + SET(CLOG_SOURCE_DIR "${PROJECT_SOURCE_DIR}/deps/clog") +ENDIF() +IF(NOT TARGET clog) + SET(CLOG_BUILD_TESTS OFF CACHE BOOL "") + SET(CLOG_RUNTIME_TYPE "${CPUINFO_RUNTIME_TYPE}" CACHE STRING "") + ADD_SUBDIRECTORY( + "${CLOG_SOURCE_DIR}") + # We build static version of clog but a dynamic library may indirectly depend on it + SET_PROPERTY(TARGET clog PROPERTY POSITION_INDEPENDENT_CODE ON) +ENDIF() +TARGET_LINK_LIBRARIES(cpuinfo PRIVATE clog) +TARGET_LINK_LIBRARIES(cpuinfo_internals PRIVATE clog) \ No newline at end of file diff --git a/dep/cpuinfo/LICENSE b/dep/cpuinfo/LICENSE new file mode 100644 index 000000000..3f9a4f020 --- /dev/null +++ b/dep/cpuinfo/LICENSE @@ -0,0 +1,27 @@ +Copyright (c) 2019 Google LLC +Copyright (c) 2017-2018 Facebook Inc. +Copyright (C) 2012-2017 Georgia Institute of Technology +Copyright (C) 2010-2012 Marat Dukhan + +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/dep/cpuinfo/README.md b/dep/cpuinfo/README.md new file mode 100644 index 000000000..0eb71a5d6 --- /dev/null +++ b/dep/cpuinfo/README.md @@ -0,0 +1,283 @@ +# CPU INFOrmation library + +[![BSD (2 clause) License](https://img.shields.io/badge/License-BSD%202--Clause%20%22Simplified%22%20License-blue.svg)](https://github.com/pytorch/cpuinfo/blob/master/LICENSE) +[![Linux/Mac build status](https://img.shields.io/travis/pytorch/cpuinfo.svg)](https://travis-ci.org/pytorch/cpuinfo) +[![Windows build status](https://ci.appveyor.com/api/projects/status/g5khy9nr0xm458t7/branch/master?svg=true)](https://ci.appveyor.com/project/MaratDukhan/cpuinfo/branch/master) + +cpuinfo is a library to detect essential for performance optimization information about host CPU. + +## Features + +- **Cross-platform** availability: + - Linux, Windows, macOS, Android, and iOS operating systems + - x86, x86-64, ARM, and ARM64 architectures +- Modern **C/C++ interface** + - Thread-safe + - No memory allocation after initialization + - No exceptions thrown +- Detection of **supported instruction sets**, up to AVX512 (x86) and ARMv8.3 extensions +- Detection of SoC and core information: + - **Processor (SoC) name** + - Vendor and **microarchitecture** for each CPU core + - ID (**MIDR** on ARM, **CPUID** leaf 1 EAX value on x86) for each CPU core +- Detection of **cache information**: + - Cache type (instruction/data/unified), size and line size + - Cache associativity + - Cores and logical processors (hyper-threads) sharing the cache +- Detection of **topology information** (relative between logical processors, cores, and processor packages) +- Well-tested **production-quality** code: + - 60+ mock tests based on data from real devices + - Includes work-arounds for common bugs in hardware and OS kernels + - Supports systems with heterogenous cores, such as **big.LITTLE** and Max.Med.Min +- Permissive **open-source** license (Simplified BSD) + +## Examples + +Log processor name: + +```c +cpuinfo_initialize(); +printf("Running on %s CPU\n", cpuinfo_get_package(0)->name); +``` + +Detect if target is a 32-bit or 64-bit ARM system: + +```c +#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64 + /* 32-bit ARM-specific code here */ +#endif +``` + +Check if the host CPU support ARM NEON + +```c +cpuinfo_initialize(); +if (cpuinfo_has_arm_neon()) { + neon_implementation(arguments); +} +``` + +Check if the host CPU supports x86 AVX + +```c +cpuinfo_initialize(); +if (cpuinfo_has_x86_avx()) { + avx_implementation(arguments); +} +``` + +Check if the thread runs on a Cortex-A53 core + +```c +cpuinfo_initialize(); +switch (cpuinfo_get_current_core()->uarch) { + case cpuinfo_uarch_cortex_a53: + cortex_a53_implementation(arguments); + break; + default: + generic_implementation(arguments); + break; +} +``` + +Get the size of level 1 data cache on the fastest core in the processor (e.g. big core in big.LITTLE ARM systems): + +```c +cpuinfo_initialize(); +const size_t l1_size = cpuinfo_get_processor(0)->cache.l1d->size; +``` + +Pin thread to cores sharing L2 cache with the current core (Linux or Android) + +```c +cpuinfo_initialize(); +cpu_set_t cpu_set; +CPU_ZERO(&cpu_set); +const struct cpuinfo_cache* current_l2 = cpuinfo_get_current_processor()->cache.l2; +for (uint32_t i = 0; i < current_l2->processor_count; i++) { + CPU_SET(cpuinfo_get_processor(current_l2->processor_start + i)->linux_id, &cpu_set); +} +pthread_setaffinity_np(pthread_self(), sizeof(cpu_set_t), &cpu_set); +``` + +## Use via pkg-config + +If you would like to provide your project's build environment with the necessary compiler and linker flags in a portable manner, the library by default when built enables `CPUINFO_BUILD_PKG_CONFIG` and will generate a [pkg-config](https://www.freedesktop.org/wiki/Software/pkg-config/) manifest (_libcpuinfo.pc_). Here are several examples of how to use it: + +### Command Line + +If you used your distro's package manager to install the library, you can verify that it is available to your build environment like so: + +```console +$ pkg-config --cflags --libs libcpuinfo +-I/usr/include/x86_64-linux-gnu/ -L/lib/x86_64-linux-gnu/ -lcpuinfo +``` + +If you have installed the library from source into a non-standard prefix, pkg-config may need help finding it: + +```console +$ PKG_CONFIG_PATH="/home/me/projects/cpuinfo/prefix/lib/pkgconfig/:$PKG_CONFIG_PATH" pkg-config --cflags --libs libcpuinfo +-I/home/me/projects/cpuinfo/prefix/include -L/home/me/projects/cpuinfo/prefix/lib -lcpuinfo +``` + +### GNU Autotools + +To [use](https://autotools.io/pkgconfig/pkg_check_modules.html) with the GNU Autotools include the following snippet in your project's `configure.ac`: + +```makefile +# CPU INFOrmation library... +PKG_CHECK_MODULES( + [libcpuinfo], [libcpuinfo], [], + [AC_MSG_ERROR([libcpuinfo missing...])]) +YOURPROJECT_CXXFLAGS="$YOURPROJECT_CXXFLAGS $libcpuinfo_CFLAGS" +YOURPROJECT_LIBS="$YOURPROJECT_LIBS $libcpuinfo_LIBS" +``` + +### Meson + +To use with Meson you just need to add `dependency('libcpuinfo')` as a dependency for your executable. + +```meson +project( + 'MyCpuInfoProject', + 'cpp', + meson_version: '>=0.55.0' +) + +executable( + 'MyCpuInfoExecutable', + sources: 'main.cpp', + dependencies: dependency('libcpuinfo') +) +``` + +### CMake + +To use with CMake use the [FindPkgConfig](https://cmake.org/cmake/help/latest/module/FindPkgConfig.html) module. Here is an example: + +```cmake +cmake_minimum_required(VERSION 3.6) +project("MyCpuInfoProject") + +find_package(PkgConfig) +pkg_check_modules(CpuInfo REQUIRED IMPORTED_TARGET libcpuinfo) + +add_executable(${PROJECT_NAME} main.cpp) +target_link_libraries(${PROJECT_NAME} PkgConfig::CpuInfo) +``` + +### Makefile + +To use within a vanilla makefile, you can call pkg-config directly to supply compiler and linker flags using shell substitution. + +```makefile +CFLAGS=-g3 -Wall -Wextra -Werror ... +LDFLAGS=-lfoo ... +... +CFLAGS+= $(pkg-config --cflags libcpuinfo) +LDFLAGS+= $(pkg-config --libs libcpuinfo) +``` + +## Exposed information +- [x] Processor (SoC) name +- [x] Microarchitecture +- [x] Usable instruction sets +- [ ] CPU frequency +- [x] Cache + - [x] Size + - [x] Associativity + - [x] Line size + - [x] Number of partitions + - [x] Flags (unified, inclusive, complex hash function) + - [x] Topology (logical processors that share this cache level) +- [ ] TLB + - [ ] Number of entries + - [ ] Associativity + - [ ] Covered page types (instruction, data) + - [ ] Covered page sizes +- [x] Topology information + - [x] Logical processors + - [x] Cores + - [x] Packages (sockets) + +## Supported environments: +- [x] Android + - [x] x86 ABI + - [x] x86_64 ABI + - [x] armeabi ABI + - [x] armeabiv7-a ABI + - [x] arm64-v8a ABI + - [ ] ~~mips ABI~~ + - [ ] ~~mips64 ABI~~ +- [x] Linux + - [x] x86 + - [x] x86-64 + - [x] 32-bit ARM (ARMv5T and later) + - [x] ARM64 + - [ ] PowerPC64 +- [x] iOS + - [x] x86 (iPhone simulator) + - [x] x86-64 (iPhone simulator) + - [x] ARMv7 + - [x] ARM64 +- [x] OS X + - [x] x86 + - [x] x86-64 +- [x] Windows + - [x] x86 + - [x] x86-64 + +## Methods + +- Processor (SoC) name detection + - [x] Using CPUID leaves 0x80000002–0x80000004 on x86/x86-64 + - [x] Using `/proc/cpuinfo` on ARM + - [x] Using `ro.chipname`, `ro.board.platform`, `ro.product.board`, `ro.mediatek.platform`, `ro.arch` properties (Android) + - [ ] Using kernel log (`dmesg`) on ARM Linux +- Vendor and microarchitecture detection + - [x] Intel-designed x86/x86-64 cores (up to Sunny Cove, Goldmont Plus, and Knights Mill) + - [x] AMD-designed x86/x86-64 cores (up to Puma/Jaguar and Zen 2) + - [ ] VIA-designed x86/x86-64 cores + - [ ] Other x86 cores (DM&P, RDC, Transmeta, Cyrix, Rise) + - [x] ARM-designed ARM cores (up to Cortex-A55, Cortex-A77, and Neoverse E1/N1) + - [x] Qualcomm-designed ARM cores (Scorpion, Krait, and Kryo) + - [x] Nvidia-designed ARM cores (Denver and Carmel) + - [x] Samsung-designed ARM cores (Exynos) + - [x] Intel-designed ARM cores (XScale up to 3rd-gen) + - [x] Apple-designed ARM cores (up to Lightning and Thunder) + - [x] Cavium-designed ARM cores (ThunderX) + - [x] AppliedMicro-designed ARM cores (X-Gene) +- Instruction set detection + - [x] Using CPUID (x86/x86-64) + - [x] Using `/proc/cpuinfo` on 32-bit ARM EABI (Linux) + - [x] Using microarchitecture heuristics on (32-bit ARM) + - [x] Using `FPSID` and `WCID` registers (32-bit ARM) + - [x] Using `getauxval` (Linux/ARM) + - [x] Using `/proc/self/auxv` (Android/ARM) + - [ ] Using instruction probing on ARM (Linux) + - [ ] Using CPUID registers on ARM64 (Linux) +- Cache detection + - [x] Using CPUID leaf 0x00000002 (x86/x86-64) + - [x] Using CPUID leaf 0x00000004 (non-AMD x86/x86-64) + - [ ] Using CPUID leaves 0x80000005-0x80000006 (AMD x86/x86-64) + - [x] Using CPUID leaf 0x8000001D (AMD x86/x86-64) + - [x] Using `/proc/cpuinfo` (Linux/pre-ARMv7) + - [x] Using microarchitecture heuristics (ARM) + - [x] Using chipset name (ARM) + - [x] Using `sysctlbyname` (Mach) + - [x] Using sysfs `typology` directories (ARM/Linux) + - [ ] Using sysfs `cache` directories (Linux) +- TLB detection + - [x] Using CPUID leaf 0x00000002 (x86/x86-64) + - [ ] Using CPUID leaves 0x80000005-0x80000006 and 0x80000019 (AMD x86/x86-64) + - [x] Using microarchitecture heuristics (ARM) +- Topology detection + - [x] Using CPUID leaf 0x00000001 on x86/x86-64 (legacy APIC ID) + - [x] Using CPUID leaf 0x0000000B on x86/x86-64 (Intel APIC ID) + - [ ] Using CPUID leaf 0x8000001E on x86/x86-64 (AMD APIC ID) + - [x] Using `/proc/cpuinfo` (Linux) + - [x] Using `host_info` (Mach) + - [x] Using `GetLogicalProcessorInformationEx` (Windows) + - [x] Using sysfs (Linux) + - [x] Using chipset name (ARM/Linux) + diff --git a/dep/cpuinfo/cpuinfo.vcxproj b/dep/cpuinfo/cpuinfo.vcxproj new file mode 100644 index 000000000..2aa279f60 --- /dev/null +++ b/dep/cpuinfo/cpuinfo.vcxproj @@ -0,0 +1,79 @@ + + + + + + + + true + + + true + + + true + + + + + true + + + true + + + true + + + true + + + true + + + true + + + true + + + true + + + true + + + true + + + true + + + + + + + + + + + + + + + + + + {EE55AA65-EA6B-4861-810B-78354B53A807} + + + + + TurnOffAllWarnings + $(ProjectDir)include;$(ProjectDir)src;$(ProjectDir)deps\clog\include;%(AdditionalIncludeDirectories) + stdcpp14 + $(IntDir)%(RelativeDir) + + + + \ No newline at end of file diff --git a/dep/cpuinfo/cpuinfo.vcxproj.filters b/dep/cpuinfo/cpuinfo.vcxproj.filters new file mode 100644 index 000000000..f2e4f2b20 --- /dev/null +++ b/dep/cpuinfo/cpuinfo.vcxproj.filters @@ -0,0 +1,112 @@ + + + + + {8fc9f543-ff04-48fb-ae1a-7c575a8aed13} + + + {0b540baa-aafb-4e51-8cbf-b7e7c00d9a4d} + + + {53ef3c40-8e03-46d1-aeb3-6446c40469da} + + + {26002d26-399a-41bb-93cb-42fb9be21c1f} + + + {7f0aba4c-ca06-4a7b-aed1-4f1e6976e839} + + + {f8cee8f2-6ab7-47cf-a5fb-3ae5e444000c} + + + {cca5126a-b401-4925-b163-d2e64b010c7b} + + + + + x86 + + + x86 + + + x86 + + + x86 + + + x86 + + + x86 + + + x86 + + + x86\windows + + + x86\descriptor + + + x86\descriptor + + + x86\descriptor + + + + + + clog + + + arm + + + arm + + + arm\windows + + + + + x86 + + + x86 + + + x86\windows + + + cpuinfo + + + cpuinfo + + + cpuinfo + + + cpuinfo + + + + clog + + + arm + + + arm + + + arm\windows + + + \ No newline at end of file diff --git a/dep/cpuinfo/deps/clog/.gitignore b/dep/cpuinfo/deps/clog/.gitignore new file mode 100644 index 000000000..73b299889 --- /dev/null +++ b/dep/cpuinfo/deps/clog/.gitignore @@ -0,0 +1,19 @@ +# Ninja files +build.ninja + +# Build objects and artifacts +deps/ +build/ +bin/ +lib/ +*.pyc +*.pyo + +# System files +.DS_Store +.DS_Store? +._* +.Spotlight-V100 +.Trashes +ehthumbs.db +Thumbs.db diff --git a/dep/cpuinfo/deps/clog/CMakeLists.txt b/dep/cpuinfo/deps/clog/CMakeLists.txt new file mode 100644 index 000000000..0e65a1b8b --- /dev/null +++ b/dep/cpuinfo/deps/clog/CMakeLists.txt @@ -0,0 +1,42 @@ +CMAKE_MINIMUM_REQUIRED(VERSION 3.1 FATAL_ERROR) + +# ---[ Project and semantic versioning. +PROJECT(clog C CXX) + +# ---[ Options. +SET(CLOG_RUNTIME_TYPE "default" CACHE STRING "Type of runtime library (shared, static, or default) to use") +SET_PROPERTY(CACHE CLOG_RUNTIME_TYPE PROPERTY STRINGS default static shared) +IF(ANDROID) + OPTION(CLOG_LOG_TO_STDIO "Log errors, warnings, and information to stdout/stderr" OFF) +ELSE() + OPTION(CLOG_LOG_TO_STDIO "Log errors, warnings, and information to stdout/stderr" ON) +ENDIF() + +MACRO(CLOG_TARGET_RUNTIME_LIBRARY target) + IF(MSVC AND NOT CLOG_RUNTIME_TYPE STREQUAL "default") + IF(CLOG_RUNTIME_TYPE STREQUAL "shared") + TARGET_COMPILE_OPTIONS(${target} PRIVATE + "/MD$<$:d>") + ELSEIF(CLOG_RUNTIME_TYPE STREQUAL "static") + TARGET_COMPILE_OPTIONS(${target} PRIVATE + "/MT$<$:d>") + ENDIF() + ENDIF() +ENDMACRO() + +# ---[ clog library +ADD_LIBRARY(clog STATIC src/clog.c) +SET_TARGET_PROPERTIES(clog PROPERTIES + C_STANDARD 99 + C_EXTENSIONS NO) +CLOG_TARGET_RUNTIME_LIBRARY(clog) +SET_TARGET_PROPERTIES(clog PROPERTIES PUBLIC_HEADER include/clog.h) +TARGET_INCLUDE_DIRECTORIES(clog BEFORE PUBLIC include) +IF(CLOG_LOG_TO_STDIO) + TARGET_COMPILE_DEFINITIONS(clog PRIVATE CLOG_LOG_TO_STDIO=1) +ELSE() + TARGET_COMPILE_DEFINITIONS(clog PRIVATE CLOG_LOG_TO_STDIO=0) +ENDIF() +IF(ANDROID AND NOT CLOG_LOG_TO_STDIO) + TARGET_LINK_LIBRARIES(clog PRIVATE log) +ENDIF() \ No newline at end of file diff --git a/dep/cpuinfo/deps/clog/LICENSE b/dep/cpuinfo/deps/clog/LICENSE new file mode 100644 index 000000000..306de3d8f --- /dev/null +++ b/dep/cpuinfo/deps/clog/LICENSE @@ -0,0 +1,26 @@ +Copyright (C) 2018 Marat Dukhan +Copyright (c) 2017-2018 Facebook Inc. +Copyright (c) 2017 Georgia Institute of Technology + +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/dep/cpuinfo/deps/clog/README.md b/dep/cpuinfo/deps/clog/README.md new file mode 100644 index 000000000..17fc70931 --- /dev/null +++ b/dep/cpuinfo/deps/clog/README.md @@ -0,0 +1,57 @@ +# clog: C-style (a-la printf) logging library + +[![BSD (2 clause) License](https://img.shields.io/badge/License-BSD%202--Clause%20%22Simplified%22%20License-blue.svg)](https://github.com/pytorch/cpuinfo/blob/master/deps/clog/LICENSE) + +C-style library for logging errors, warnings, information notes, and debug information. + +## Features + +- printf-style interface for formatting variadic parameters. +- Separate functions for logging errors, warnings, information notes, and debug information. +- Independent logging settings for different modules. +- Logging to logcat on Android and stderr/stdout on other platforms. +- Compatible with C99 and C++. +- Covered with unit tests. + +## Example + +```c +#include + +#ifndef MYMODULE_LOG_LEVEL + #define MYMODULE_LOG_LEVEL CLOG_DEBUG +#endif + +CLOG_DEFINE_LOG_DEBUG(mymodule_, "My Module", MYMODULE_LOG_LEVEL); +CLOG_DEFINE_LOG_INFO(mymodule_, "My Module", MYMODULE_LOG_LEVEL); +CLOG_DEFINE_LOG_WARNING(mymodule_, "My Module", MYMODULE_LOG_LEVEL); +CLOG_DEFINE_LOG_ERROR(mymodule_, "My Module", MYMODULE_LOG_LEVEL); + +... + +void some_function(...) { + int status = ... + if (status != 0) { + mymodule_log_error( + "something really bad happened: " + "operation failed with status %d", status); + } + + uint32_t expected_zero = ... + if (expected_zero != 0) { + mymodule_log_warning( + "something suspicious happened (var = %"PRIu32"), " + "fall back to generic implementation", expected_zero); + } + + void* usually_non_null = ... + if (usually_non_null == NULL) { + mymodule_log_info( + "something unusual, but common, happened: " + "enabling work-around"); + } + + float a = ... + mymodule_log_debug("computed a = %.7f", a); +} +``` diff --git a/dep/cpuinfo/deps/clog/include/clog.h b/dep/cpuinfo/deps/clog/include/clog.h new file mode 100644 index 000000000..414376116 --- /dev/null +++ b/dep/cpuinfo/deps/clog/include/clog.h @@ -0,0 +1,100 @@ +#pragma once + +#include +#include +#include + +#define CLOG_NONE 0 +#define CLOG_FATAL 1 +#define CLOG_ERROR 2 +#define CLOG_WARNING 3 +#define CLOG_INFO 4 +#define CLOG_DEBUG 5 + +#ifndef CLOG_VISIBILITY + #if defined(__ELF__) + #define CLOG_VISIBILITY __attribute__((__visibility__("internal"))) + #elif defined(__MACH__) + #define CLOG_VISIBILITY __attribute__((__visibility__("hidden"))) + #else + #define CLOG_VISIBILITY + #endif +#endif + +#ifndef CLOG_ARGUMENTS_FORMAT + #if defined(__GNUC__) + #define CLOG_ARGUMENTS_FORMAT __attribute__((__format__(__printf__, 1, 2))) + #else + #define CLOG_ARGUMENTS_FORMAT + #endif +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +CLOG_VISIBILITY void clog_vlog_debug(const char* module, const char* format, va_list args); +CLOG_VISIBILITY void clog_vlog_info(const char* module, const char* format, va_list args); +CLOG_VISIBILITY void clog_vlog_warning(const char* module, const char* format, va_list args); +CLOG_VISIBILITY void clog_vlog_error(const char* module, const char* format, va_list args); +CLOG_VISIBILITY void clog_vlog_fatal(const char* module, const char* format, va_list args); + +#define CLOG_DEFINE_LOG_DEBUG(log_debug_function_name, module, level) \ + CLOG_ARGUMENTS_FORMAT \ + inline static void log_debug_function_name(const char* format, ...) { \ + if (level >= CLOG_DEBUG) { \ + va_list args; \ + va_start(args, format); \ + clog_vlog_debug(module, format, args); \ + va_end(args); \ + } \ + } + +#define CLOG_DEFINE_LOG_INFO(log_info_function_name, module, level) \ + CLOG_ARGUMENTS_FORMAT \ + inline static void log_info_function_name(const char* format, ...) { \ + if (level >= CLOG_INFO) { \ + va_list args; \ + va_start(args, format); \ + clog_vlog_info(module, format, args); \ + va_end(args); \ + } \ + } + +#define CLOG_DEFINE_LOG_WARNING(log_warning_function_name, module, level) \ + CLOG_ARGUMENTS_FORMAT \ + inline static void log_warning_function_name(const char* format, ...) { \ + if (level >= CLOG_WARNING) { \ + va_list args; \ + va_start(args, format); \ + clog_vlog_warning(module, format, args); \ + va_end(args); \ + } \ + } + +#define CLOG_DEFINE_LOG_ERROR(log_error_function_name, module, level) \ + CLOG_ARGUMENTS_FORMAT \ + inline static void log_error_function_name(const char* format, ...) { \ + if (level >= CLOG_ERROR) { \ + va_list args; \ + va_start(args, format); \ + clog_vlog_error(module, format, args); \ + va_end(args); \ + } \ + } + +#define CLOG_DEFINE_LOG_FATAL(log_fatal_function_name, module, level) \ + CLOG_ARGUMENTS_FORMAT \ + inline static void log_fatal_function_name(const char* format, ...) { \ + if (level >= CLOG_FATAL) { \ + va_list args; \ + va_start(args, format); \ + clog_vlog_fatal(module, format, args); \ + va_end(args); \ + } \ + abort(); \ + } + +#ifdef __cplusplus +} /* extern "C" */ +#endif diff --git a/dep/cpuinfo/deps/clog/src/clog.c b/dep/cpuinfo/deps/clog/src/clog.c new file mode 100644 index 000000000..fe5d43e1f --- /dev/null +++ b/dep/cpuinfo/deps/clog/src/clog.c @@ -0,0 +1,423 @@ +#include +#include +#include +#include +#ifdef _WIN32 + #include +#else + #include +#endif +#ifdef __ANDROID__ + #include +#endif + +#ifndef CLOG_LOG_TO_STDIO + #ifdef __ANDROID__ + #define CLOG_LOG_TO_STDIO 0 + #else + #define CLOG_LOG_TO_STDIO 1 + #endif +#endif + +#include + + +/* Messages up to this size are formatted entirely on-stack, and don't allocate heap memory */ +#define CLOG_STACK_BUFFER_SIZE 1024 + +#define CLOG_FATAL_PREFIX "Fatal error: " +#define CLOG_FATAL_PREFIX_LENGTH 13 +#define CLOG_FATAL_PREFIX_FORMAT "Fatal error in %s: " +#define CLOG_ERROR_PREFIX "Error: " +#define CLOG_ERROR_PREFIX_LENGTH 7 +#define CLOG_ERROR_PREFIX_FORMAT "Error in %s: " +#define CLOG_WARNING_PREFIX "Warning: " +#define CLOG_WARNING_PREFIX_LENGTH 9 +#define CLOG_WARNING_PREFIX_FORMAT "Warning in %s: " +#define CLOG_INFO_PREFIX "Note: " +#define CLOG_INFO_PREFIX_LENGTH 6 +#define CLOG_INFO_PREFIX_FORMAT "Note (%s): " +#define CLOG_DEBUG_PREFIX "Debug: " +#define CLOG_DEBUG_PREFIX_LENGTH 7 +#define CLOG_DEBUG_PREFIX_FORMAT "Debug (%s): " +#define CLOG_SUFFIX_LENGTH 1 + +void clog_vlog_fatal(const char* module, const char* format, va_list args) { + #if defined(__ANDROID__) && !CLOG_LOG_TO_STDIO + __android_log_vprint(ANDROID_LOG_FATAL, module, format, args); + #else + char stack_buffer[CLOG_STACK_BUFFER_SIZE]; + char* heap_buffer = NULL; + char* out_buffer = &stack_buffer[0]; + + /* The first call to vsnprintf will clobber args, thus need a copy in case a second vsnprintf call is needed */ + va_list args_copy; + va_copy(args_copy, args); + + int prefix_chars = CLOG_FATAL_PREFIX_LENGTH; + if (module == NULL) { + memcpy(stack_buffer, CLOG_FATAL_PREFIX, CLOG_FATAL_PREFIX_LENGTH); + } else { + prefix_chars = snprintf(stack_buffer, CLOG_STACK_BUFFER_SIZE, CLOG_FATAL_PREFIX_FORMAT, module); + if (prefix_chars < 0) { + /* Format error in prefix (possible if prefix is modified): skip prefix and continue as if nothing happened. */ + prefix_chars = 0; + } + } + + int format_chars; + if (prefix_chars + CLOG_SUFFIX_LENGTH >= CLOG_STACK_BUFFER_SIZE) { + /* + * Prefix + suffix alone would overflow the on-stack buffer, thus need to use on-heap buffer. + * Do not even try to format the string into on-stack buffer. + */ + format_chars = vsnprintf(NULL, 0, format, args); + } else { + format_chars = + vsnprintf( + &stack_buffer[prefix_chars], + CLOG_STACK_BUFFER_SIZE - prefix_chars - CLOG_SUFFIX_LENGTH, + format, + args); + } + if (format_chars < 0) { + /* Format error in the message: silently ignore this particular message. */ + goto cleanup; + } + if (prefix_chars + format_chars + CLOG_SUFFIX_LENGTH > CLOG_STACK_BUFFER_SIZE) { + /* Allocate a buffer on heap, and vsnprintf to this buffer */ + heap_buffer = malloc(prefix_chars + format_chars + CLOG_SUFFIX_LENGTH); + if (heap_buffer == NULL) { + goto cleanup; + } + + if (prefix_chars > CLOG_STACK_BUFFER_SIZE) { + /* Prefix didn't fit into on-stack buffer, re-format it again to on-heap buffer */ + snprintf(heap_buffer, prefix_chars + 1 /* for '\0'-terminator */, CLOG_FATAL_PREFIX_FORMAT, module); + } else { + /* Copy pre-formatted prefix from on-stack buffer to on-heap buffer */ + memcpy(heap_buffer, stack_buffer, prefix_chars); + } + vsnprintf(heap_buffer + prefix_chars, format_chars + CLOG_SUFFIX_LENGTH, format, args_copy); + out_buffer = heap_buffer; + } + out_buffer[prefix_chars + format_chars] = '\n'; + #ifdef _WIN32 + DWORD bytes_written; + WriteFile( + GetStdHandle(STD_ERROR_HANDLE), + out_buffer, prefix_chars + format_chars + CLOG_SUFFIX_LENGTH, + &bytes_written, NULL); + #else + write(STDERR_FILENO, out_buffer, prefix_chars + format_chars + CLOG_SUFFIX_LENGTH); + #endif + +cleanup: + free(heap_buffer); + va_end(args_copy); + #endif +} + +void clog_vlog_error(const char* module, const char* format, va_list args) { + #if defined(__ANDROID__) && !CLOG_LOG_TO_STDIO + __android_log_vprint(ANDROID_LOG_ERROR, module, format, args); + #else + char stack_buffer[CLOG_STACK_BUFFER_SIZE]; + char* heap_buffer = NULL; + char* out_buffer = &stack_buffer[0]; + + /* The first call to vsnprintf will clobber args, thus need a copy in case a second vsnprintf call is needed */ + va_list args_copy; + va_copy(args_copy, args); + + int prefix_chars = CLOG_ERROR_PREFIX_LENGTH; + if (module == NULL) { + memcpy(stack_buffer, CLOG_ERROR_PREFIX, CLOG_ERROR_PREFIX_LENGTH); + } else { + prefix_chars = snprintf(stack_buffer, CLOG_STACK_BUFFER_SIZE, CLOG_ERROR_PREFIX_FORMAT, module); + if (prefix_chars < 0) { + /* Format error in prefix (possible if prefix is modified): skip prefix and continue as if nothing happened. */ + prefix_chars = 0; + } + } + + int format_chars; + if (prefix_chars + CLOG_SUFFIX_LENGTH >= CLOG_STACK_BUFFER_SIZE) { + /* + * Prefix + suffix alone would overflow the on-stack buffer, thus need to use on-heap buffer. + * Do not even try to format the string into on-stack buffer. + */ + format_chars = vsnprintf(NULL, 0, format, args); + } else { + format_chars = + vsnprintf( + &stack_buffer[prefix_chars], + CLOG_STACK_BUFFER_SIZE - prefix_chars - CLOG_SUFFIX_LENGTH, + format, + args); + } + if (format_chars < 0) { + /* Format error in the message: silently ignore this particular message. */ + goto cleanup; + } + if (prefix_chars + format_chars + CLOG_SUFFIX_LENGTH > CLOG_STACK_BUFFER_SIZE) { + /* Allocate a buffer on heap, and vsnprintf to this buffer */ + heap_buffer = malloc(prefix_chars + format_chars + CLOG_SUFFIX_LENGTH); + if (heap_buffer == NULL) { + goto cleanup; + } + + if (prefix_chars > CLOG_STACK_BUFFER_SIZE) { + /* Prefix didn't fit into on-stack buffer, re-format it again to on-heap buffer */ + snprintf(heap_buffer, prefix_chars + 1 /* for '\0'-terminator */, CLOG_ERROR_PREFIX_FORMAT, module); + } else { + /* Copy pre-formatted prefix from on-stack buffer to on-heap buffer */ + memcpy(heap_buffer, stack_buffer, prefix_chars); + } + vsnprintf(heap_buffer + prefix_chars, format_chars + CLOG_SUFFIX_LENGTH, format, args_copy); + out_buffer = heap_buffer; + } + out_buffer[prefix_chars + format_chars] = '\n'; + #ifdef _WIN32 + DWORD bytes_written; + WriteFile( + GetStdHandle(STD_ERROR_HANDLE), + out_buffer, prefix_chars + format_chars + CLOG_SUFFIX_LENGTH, + &bytes_written, NULL); + #else + write(STDERR_FILENO, out_buffer, prefix_chars + format_chars + CLOG_SUFFIX_LENGTH); + #endif + +cleanup: + free(heap_buffer); + va_end(args_copy); + #endif +} + +void clog_vlog_warning(const char* module, const char* format, va_list args) { + #if defined(__ANDROID__) && !CLOG_LOG_TO_STDIO + __android_log_vprint(ANDROID_LOG_WARN, module, format, args); + #else + char stack_buffer[CLOG_STACK_BUFFER_SIZE]; + char* heap_buffer = NULL; + char* out_buffer = &stack_buffer[0]; + + /* The first call to vsnprintf will clobber args, thus need a copy in case a second vsnprintf call is needed */ + va_list args_copy; + va_copy(args_copy, args); + + int prefix_chars = CLOG_WARNING_PREFIX_LENGTH; + if (module == NULL) { + memcpy(stack_buffer, CLOG_WARNING_PREFIX, CLOG_WARNING_PREFIX_LENGTH); + } else { + prefix_chars = snprintf(stack_buffer, CLOG_STACK_BUFFER_SIZE, CLOG_WARNING_PREFIX_FORMAT, module); + if (prefix_chars < 0) { + /* Format error in prefix (possible if prefix is modified): skip prefix and continue as if nothing happened. */ + prefix_chars = 0; + } + } + + int format_chars; + if (prefix_chars + CLOG_SUFFIX_LENGTH >= CLOG_STACK_BUFFER_SIZE) { + /* + * Prefix + suffix alone would overflow the on-stack buffer, thus need to use on-heap buffer. + * Do not even try to format the string into on-stack buffer. + */ + format_chars = vsnprintf(NULL, 0, format, args); + } else { + format_chars = + vsnprintf( + &stack_buffer[prefix_chars], + CLOG_STACK_BUFFER_SIZE - prefix_chars - CLOG_SUFFIX_LENGTH, + format, + args); + } + if (format_chars < 0) { + /* Format error in the message: silently ignore this particular message. */ + goto cleanup; + } + if (prefix_chars + format_chars + CLOG_SUFFIX_LENGTH > CLOG_STACK_BUFFER_SIZE) { + /* Allocate a buffer on heap, and vsnprintf to this buffer */ + heap_buffer = malloc(prefix_chars + format_chars + CLOG_SUFFIX_LENGTH); + if (heap_buffer == NULL) { + goto cleanup; + } + + if (prefix_chars > CLOG_STACK_BUFFER_SIZE) { + /* Prefix didn't fit into on-stack buffer, re-format it again to on-heap buffer */ + snprintf(heap_buffer, prefix_chars + 1 /* for '\0'-terminator */, CLOG_WARNING_PREFIX_FORMAT, module); + } else { + /* Copy pre-formatted prefix from on-stack buffer to on-heap buffer */ + memcpy(heap_buffer, stack_buffer, prefix_chars); + } + vsnprintf(heap_buffer + prefix_chars, format_chars + CLOG_SUFFIX_LENGTH, format, args_copy); + out_buffer = heap_buffer; + } + out_buffer[prefix_chars + format_chars] = '\n'; + #ifdef _WIN32 + DWORD bytes_written; + WriteFile( + GetStdHandle(STD_ERROR_HANDLE), + out_buffer, prefix_chars + format_chars + CLOG_SUFFIX_LENGTH, + &bytes_written, NULL); + #else + write(STDERR_FILENO, out_buffer, prefix_chars + format_chars + CLOG_SUFFIX_LENGTH); + #endif + +cleanup: + free(heap_buffer); + va_end(args_copy); + #endif +} + +void clog_vlog_info(const char* module, const char* format, va_list args) { + #if defined(__ANDROID__) && !CLOG_LOG_TO_STDIO + __android_log_vprint(ANDROID_LOG_INFO, module, format, args); + #else + char stack_buffer[CLOG_STACK_BUFFER_SIZE]; + char* heap_buffer = NULL; + char* out_buffer = &stack_buffer[0]; + + /* The first call to vsnprintf will clobber args, thus need a copy in case a second vsnprintf call is needed */ + va_list args_copy; + va_copy(args_copy, args); + + int prefix_chars = CLOG_INFO_PREFIX_LENGTH; + if (module == NULL) { + memcpy(stack_buffer, CLOG_INFO_PREFIX, CLOG_INFO_PREFIX_LENGTH); + } else { + prefix_chars = snprintf(stack_buffer, CLOG_STACK_BUFFER_SIZE, CLOG_INFO_PREFIX_FORMAT, module); + if (prefix_chars < 0) { + /* Format error in prefix (possible if prefix is modified): skip prefix and continue as if nothing happened. */ + prefix_chars = 0; + } + } + + int format_chars; + if (prefix_chars + CLOG_SUFFIX_LENGTH >= CLOG_STACK_BUFFER_SIZE) { + /* + * Prefix + suffix alone would overflow the on-stack buffer, thus need to use on-heap buffer. + * Do not even try to format the string into on-stack buffer. + */ + format_chars = vsnprintf(NULL, 0, format, args); + } else { + format_chars = + vsnprintf( + &stack_buffer[prefix_chars], + CLOG_STACK_BUFFER_SIZE - prefix_chars - CLOG_SUFFIX_LENGTH, + format, + args); + } + if (format_chars < 0) { + /* Format error in the message: silently ignore this particular message. */ + goto cleanup; + } + if (prefix_chars + format_chars + CLOG_SUFFIX_LENGTH > CLOG_STACK_BUFFER_SIZE) { + /* Allocate a buffer on heap, and vsnprintf to this buffer */ + heap_buffer = malloc(prefix_chars + format_chars + CLOG_SUFFIX_LENGTH); + if (heap_buffer == NULL) { + goto cleanup; + } + + if (prefix_chars > CLOG_STACK_BUFFER_SIZE) { + /* Prefix didn't fit into on-stack buffer, re-format it again to on-heap buffer */ + snprintf(heap_buffer, prefix_chars + 1 /* for '\0'-terminator */, CLOG_INFO_PREFIX_FORMAT, module); + } else { + /* Copy pre-formatted prefix from on-stack buffer to on-heap buffer */ + memcpy(heap_buffer, stack_buffer, prefix_chars); + } + vsnprintf(heap_buffer + prefix_chars, format_chars + CLOG_SUFFIX_LENGTH, format, args_copy); + out_buffer = heap_buffer; + } + out_buffer[prefix_chars + format_chars] = '\n'; + #ifdef _WIN32 + DWORD bytes_written; + WriteFile( + GetStdHandle(STD_OUTPUT_HANDLE), + out_buffer, prefix_chars + format_chars + CLOG_SUFFIX_LENGTH, + &bytes_written, NULL); + #else + write(STDOUT_FILENO, out_buffer, prefix_chars + format_chars + CLOG_SUFFIX_LENGTH); + #endif + +cleanup: + free(heap_buffer); + va_end(args_copy); + #endif +} + +void clog_vlog_debug(const char* module, const char* format, va_list args) { + #if defined(__ANDROID__) && !CLOG_LOG_TO_STDIO + __android_log_vprint(ANDROID_LOG_DEBUG, module, format, args); + #else + char stack_buffer[CLOG_STACK_BUFFER_SIZE]; + char* heap_buffer = NULL; + char* out_buffer = &stack_buffer[0]; + + /* The first call to vsnprintf will clobber args, thus need a copy in case a second vsnprintf call is needed */ + va_list args_copy; + va_copy(args_copy, args); + + int prefix_chars = CLOG_DEBUG_PREFIX_LENGTH; + if (module == NULL) { + memcpy(stack_buffer, CLOG_DEBUG_PREFIX, CLOG_DEBUG_PREFIX_LENGTH); + } else { + prefix_chars = snprintf(stack_buffer, CLOG_STACK_BUFFER_SIZE, CLOG_DEBUG_PREFIX_FORMAT, module); + if (prefix_chars < 0) { + /* Format error in prefix (possible if prefix is modified): skip prefix and continue as if nothing happened. */ + prefix_chars = 0; + } + } + + int format_chars; + if (prefix_chars + CLOG_SUFFIX_LENGTH >= CLOG_STACK_BUFFER_SIZE) { + /* + * Prefix + suffix alone would overflow the on-stack buffer, thus need to use on-heap buffer. + * Do not even try to format the string into on-stack buffer. + */ + format_chars = vsnprintf(NULL, 0, format, args); + } else { + format_chars = + vsnprintf( + &stack_buffer[prefix_chars], + CLOG_STACK_BUFFER_SIZE - prefix_chars - CLOG_SUFFIX_LENGTH, + format, + args); + } + if (format_chars < 0) { + /* Format error in the message: silently ignore this particular message. */ + goto cleanup; + } + if (prefix_chars + format_chars + CLOG_SUFFIX_LENGTH > CLOG_STACK_BUFFER_SIZE) { + /* Allocate a buffer on heap, and vsnprintf to this buffer */ + heap_buffer = malloc(prefix_chars + format_chars + CLOG_SUFFIX_LENGTH); + if (heap_buffer == NULL) { + goto cleanup; + } + + if (prefix_chars > CLOG_STACK_BUFFER_SIZE) { + /* Prefix didn't fit into on-stack buffer, re-format it again to on-heap buffer */ + snprintf(heap_buffer, prefix_chars + 1 /* for '\0'-terminator */, CLOG_DEBUG_PREFIX_FORMAT, module); + } else { + /* Copy pre-formatted prefix from on-stack buffer to on-heap buffer */ + memcpy(heap_buffer, stack_buffer, prefix_chars); + } + vsnprintf(heap_buffer + prefix_chars, format_chars + CLOG_SUFFIX_LENGTH, format, args_copy); + out_buffer = heap_buffer; + } + out_buffer[prefix_chars + format_chars] = '\n'; + #ifdef _WIN32 + DWORD bytes_written; + WriteFile( + GetStdHandle(STD_OUTPUT_HANDLE), + out_buffer, prefix_chars + format_chars + CLOG_SUFFIX_LENGTH, + &bytes_written, NULL); + #else + write(STDOUT_FILENO, out_buffer, prefix_chars + format_chars + CLOG_SUFFIX_LENGTH); + #endif + +cleanup: + free(heap_buffer); + va_end(args_copy); + #endif +} diff --git a/dep/cpuinfo/include/cpuinfo-mock.h b/dep/cpuinfo/include/cpuinfo-mock.h new file mode 100644 index 000000000..3c1f637d9 --- /dev/null +++ b/dep/cpuinfo/include/cpuinfo-mock.h @@ -0,0 +1,78 @@ +#pragma once +#ifndef CPUINFO_MOCK_H +#define CPUINFO_MOCK_H + +#include +#include + +#include +#if defined(__linux__) + #include +#endif + +#if !defined(CPUINFO_MOCK) || !(CPUINFO_MOCK) + #error This header is intended only for test use +#endif + + +#ifdef __cplusplus +extern "C" { +#endif + + +#if CPUINFO_ARCH_ARM + void CPUINFO_ABI cpuinfo_set_fpsid(uint32_t fpsid); + void CPUINFO_ABI cpuinfo_set_wcid(uint32_t wcid); +#endif /* CPUINFO_ARCH_ARM */ + +#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + struct cpuinfo_mock_cpuid { + uint32_t input_eax; + uint32_t input_ecx; + uint32_t eax; + uint32_t ebx; + uint32_t ecx; + uint32_t edx; + }; + + void CPUINFO_ABI cpuinfo_mock_set_cpuid(struct cpuinfo_mock_cpuid* dump, size_t entries); + void CPUINFO_ABI cpuinfo_mock_get_cpuid(uint32_t eax, uint32_t regs[4]); + void CPUINFO_ABI cpuinfo_mock_get_cpuidex(uint32_t eax, uint32_t ecx, uint32_t regs[4]); +#endif /* CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 */ + +struct cpuinfo_mock_file { + const char* path; + size_t size; + const char* content; + size_t offset; +}; + +struct cpuinfo_mock_property { + const char* key; + const char* value; +}; + +#if defined(__linux__) + void CPUINFO_ABI cpuinfo_mock_filesystem(struct cpuinfo_mock_file* files); + int CPUINFO_ABI cpuinfo_mock_open(const char* path, int oflag); + int CPUINFO_ABI cpuinfo_mock_close(int fd); + ssize_t CPUINFO_ABI cpuinfo_mock_read(int fd, void* buffer, size_t capacity); + + #if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64 + void CPUINFO_ABI cpuinfo_set_hwcap(uint32_t hwcap); + #endif + #if CPUINFO_ARCH_ARM + void CPUINFO_ABI cpuinfo_set_hwcap2(uint32_t hwcap2); + #endif +#endif + +#if defined(__ANDROID__) + void CPUINFO_ABI cpuinfo_mock_android_properties(struct cpuinfo_mock_property* properties); + void CPUINFO_ABI cpuinfo_mock_gl_renderer(const char* renderer); +#endif + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* CPUINFO_MOCK_H */ diff --git a/dep/cpuinfo/include/cpuinfo.h b/dep/cpuinfo/include/cpuinfo.h new file mode 100644 index 000000000..cffa299ed --- /dev/null +++ b/dep/cpuinfo/include/cpuinfo.h @@ -0,0 +1,1872 @@ +#pragma once +#ifndef CPUINFO_H +#define CPUINFO_H + +#ifndef __cplusplus + #include +#endif + +#ifdef __APPLE__ + #include +#endif + +#include + +/* Identify architecture and define corresponding macro */ + +#if defined(__i386__) || defined(__i486__) || defined(__i586__) || defined(__i686__) || defined(_M_IX86) + #define CPUINFO_ARCH_X86 1 +#endif + +#if defined(__x86_64__) || defined(__x86_64) || defined(_M_X64) || defined(_M_AMD64) + #define CPUINFO_ARCH_X86_64 1 +#endif + +#if defined(__arm__) || defined(_M_ARM) + #define CPUINFO_ARCH_ARM 1 +#endif + +#if defined(__aarch64__) || defined(_M_ARM64) + #define CPUINFO_ARCH_ARM64 1 +#endif + +#if defined(__PPC64__) || defined(__powerpc64__) || defined(_ARCH_PPC64) + #define CPUINFO_ARCH_PPC64 1 +#endif + +#if defined(__asmjs__) + #define CPUINFO_ARCH_ASMJS 1 +#endif + +#if defined(__wasm__) + #if defined(__wasm_simd128__) + #define CPUINFO_ARCH_WASMSIMD 1 + #else + #define CPUINFO_ARCH_WASM 1 + #endif +#endif + +/* Define other architecture-specific macros as 0 */ + +#ifndef CPUINFO_ARCH_X86 + #define CPUINFO_ARCH_X86 0 +#endif + +#ifndef CPUINFO_ARCH_X86_64 + #define CPUINFO_ARCH_X86_64 0 +#endif + +#ifndef CPUINFO_ARCH_ARM + #define CPUINFO_ARCH_ARM 0 +#endif + +#ifndef CPUINFO_ARCH_ARM64 + #define CPUINFO_ARCH_ARM64 0 +#endif + +#ifndef CPUINFO_ARCH_PPC64 + #define CPUINFO_ARCH_PPC64 0 +#endif + +#ifndef CPUINFO_ARCH_ASMJS + #define CPUINFO_ARCH_ASMJS 0 +#endif + +#ifndef CPUINFO_ARCH_WASM + #define CPUINFO_ARCH_WASM 0 +#endif + +#ifndef CPUINFO_ARCH_WASMSIMD + #define CPUINFO_ARCH_WASMSIMD 0 +#endif + +#if CPUINFO_ARCH_X86 && defined(_MSC_VER) + #define CPUINFO_ABI __cdecl +#elif CPUINFO_ARCH_X86 && defined(__GNUC__) + #define CPUINFO_ABI __attribute__((__cdecl__)) +#else + #define CPUINFO_ABI +#endif + +#define CPUINFO_CACHE_UNIFIED 0x00000001 +#define CPUINFO_CACHE_INCLUSIVE 0x00000002 +#define CPUINFO_CACHE_COMPLEX_INDEXING 0x00000004 + +struct cpuinfo_cache { + /** Cache size in bytes */ + uint32_t size; + /** Number of ways of associativity */ + uint32_t associativity; + /** Number of sets */ + uint32_t sets; + /** Number of partitions */ + uint32_t partitions; + /** Line size in bytes */ + uint32_t line_size; + /** + * Binary characteristics of the cache (unified cache, inclusive cache, cache with complex indexing). + * + * @see CPUINFO_CACHE_UNIFIED, CPUINFO_CACHE_INCLUSIVE, CPUINFO_CACHE_COMPLEX_INDEXING + */ + uint32_t flags; + /** Index of the first logical processor that shares this cache */ + uint32_t processor_start; + /** Number of logical processors that share this cache */ + uint32_t processor_count; +}; + +struct cpuinfo_trace_cache { + uint32_t uops; + uint32_t associativity; +}; + +#define CPUINFO_PAGE_SIZE_4KB 0x1000 +#define CPUINFO_PAGE_SIZE_1MB 0x100000 +#define CPUINFO_PAGE_SIZE_2MB 0x200000 +#define CPUINFO_PAGE_SIZE_4MB 0x400000 +#define CPUINFO_PAGE_SIZE_16MB 0x1000000 +#define CPUINFO_PAGE_SIZE_1GB 0x40000000 + +struct cpuinfo_tlb { + uint32_t entries; + uint32_t associativity; + uint64_t pages; +}; + +/** Vendor of processor core design */ +enum cpuinfo_vendor { + /** Processor vendor is not known to the library, or the library failed to get vendor information from the OS. */ + cpuinfo_vendor_unknown = 0, + + /* Active vendors of modern CPUs */ + + /** + * Intel Corporation. Vendor of x86, x86-64, IA64, and ARM processor microarchitectures. + * + * Sold its ARM design subsidiary in 2006. The last ARM processor design was released in 2004. + */ + cpuinfo_vendor_intel = 1, + /** Advanced Micro Devices, Inc. Vendor of x86 and x86-64 processor microarchitectures. */ + cpuinfo_vendor_amd = 2, + /** ARM Holdings plc. Vendor of ARM and ARM64 processor microarchitectures. */ + cpuinfo_vendor_arm = 3, + /** Qualcomm Incorporated. Vendor of ARM and ARM64 processor microarchitectures. */ + cpuinfo_vendor_qualcomm = 4, + /** Apple Inc. Vendor of ARM and ARM64 processor microarchitectures. */ + cpuinfo_vendor_apple = 5, + /** Samsung Electronics Co., Ltd. Vendir if ARM64 processor microarchitectures. */ + cpuinfo_vendor_samsung = 6, + /** Nvidia Corporation. Vendor of ARM64-compatible processor microarchitectures. */ + cpuinfo_vendor_nvidia = 7, + /** MIPS Technologies, Inc. Vendor of MIPS processor microarchitectures. */ + cpuinfo_vendor_mips = 8, + /** International Business Machines Corporation. Vendor of PowerPC processor microarchitectures. */ + cpuinfo_vendor_ibm = 9, + /** Ingenic Semiconductor. Vendor of MIPS processor microarchitectures. */ + cpuinfo_vendor_ingenic = 10, + /** + * VIA Technologies, Inc. Vendor of x86 and x86-64 processor microarchitectures. + * + * Processors are designed by Centaur Technology, a subsidiary of VIA Technologies. + */ + cpuinfo_vendor_via = 11, + /** Cavium, Inc. Vendor of ARM64 processor microarchitectures. */ + cpuinfo_vendor_cavium = 12, + /** Broadcom, Inc. Vendor of ARM processor microarchitectures. */ + cpuinfo_vendor_broadcom = 13, + /** Applied Micro Circuits Corporation (APM). Vendor of ARM64 processor microarchitectures. */ + cpuinfo_vendor_apm = 14, + /** + * Huawei Technologies Co., Ltd. Vendor of ARM64 processor microarchitectures. + * + * Processors are designed by HiSilicon, a subsidiary of Huawei. + */ + cpuinfo_vendor_huawei = 15, + /** + * Hygon (Chengdu Haiguang Integrated Circuit Design Co., Ltd), Vendor of x86-64 processor microarchitectures. + * + * Processors are variants of AMD cores. + */ + cpuinfo_vendor_hygon = 16, + + /* Active vendors of embedded CPUs */ + + /** Texas Instruments Inc. Vendor of ARM processor microarchitectures. */ + cpuinfo_vendor_texas_instruments = 30, + /** Marvell Technology Group Ltd. Vendor of ARM processor microarchitectures. */ + cpuinfo_vendor_marvell = 31, + /** RDC Semiconductor Co., Ltd. Vendor of x86 processor microarchitectures. */ + cpuinfo_vendor_rdc = 32, + /** DM&P Electronics Inc. Vendor of x86 processor microarchitectures. */ + cpuinfo_vendor_dmp = 33, + /** Motorola, Inc. Vendor of PowerPC and ARM processor microarchitectures. */ + cpuinfo_vendor_motorola = 34, + + /* Defunct CPU vendors */ + + /** + * Transmeta Corporation. Vendor of x86 processor microarchitectures. + * + * Now defunct. The last processor design was released in 2004. + * Transmeta processors implemented VLIW ISA and used binary translation to execute x86 code. + */ + cpuinfo_vendor_transmeta = 50, + /** + * Cyrix Corporation. Vendor of x86 processor microarchitectures. + * + * Now defunct. The last processor design was released in 1996. + */ + cpuinfo_vendor_cyrix = 51, + /** + * Rise Technology. Vendor of x86 processor microarchitectures. + * + * Now defunct. The last processor design was released in 1999. + */ + cpuinfo_vendor_rise = 52, + /** + * National Semiconductor. Vendor of x86 processor microarchitectures. + * + * Sold its x86 design subsidiary in 1999. The last processor design was released in 1998. + */ + cpuinfo_vendor_nsc = 53, + /** + * Silicon Integrated Systems. Vendor of x86 processor microarchitectures. + * + * Sold its x86 design subsidiary in 2001. The last processor design was released in 2001. + */ + cpuinfo_vendor_sis = 54, + /** + * NexGen. Vendor of x86 processor microarchitectures. + * + * Now defunct. The last processor design was released in 1994. + * NexGen designed the first x86 microarchitecture which decomposed x86 instructions into simple microoperations. + */ + cpuinfo_vendor_nexgen = 55, + /** + * United Microelectronics Corporation. Vendor of x86 processor microarchitectures. + * + * Ceased x86 in the early 1990s. The last processor design was released in 1991. + * Designed U5C and U5D processors. Both are 486 level. + */ + cpuinfo_vendor_umc = 56, + /** + * Digital Equipment Corporation. Vendor of ARM processor microarchitecture. + * + * Sold its ARM designs in 1997. The last processor design was released in 1997. + */ + cpuinfo_vendor_dec = 57, +}; + +/** + * Processor microarchitecture + * + * Processors with different microarchitectures often have different instruction performance characteristics, + * and may have dramatically different pipeline organization. + */ +enum cpuinfo_uarch { + /** Microarchitecture is unknown, or the library failed to get information about the microarchitecture from OS */ + cpuinfo_uarch_unknown = 0, + + /** Pentium and Pentium MMX microarchitecture. */ + cpuinfo_uarch_p5 = 0x00100100, + /** Intel Quark microarchitecture. */ + cpuinfo_uarch_quark = 0x00100101, + + /** Pentium Pro, Pentium II, and Pentium III. */ + cpuinfo_uarch_p6 = 0x00100200, + /** Pentium M. */ + cpuinfo_uarch_dothan = 0x00100201, + /** Intel Core microarchitecture. */ + cpuinfo_uarch_yonah = 0x00100202, + /** Intel Core 2 microarchitecture on 65 nm process. */ + cpuinfo_uarch_conroe = 0x00100203, + /** Intel Core 2 microarchitecture on 45 nm process. */ + cpuinfo_uarch_penryn = 0x00100204, + /** Intel Nehalem and Westmere microarchitectures (Core i3/i5/i7 1st gen). */ + cpuinfo_uarch_nehalem = 0x00100205, + /** Intel Sandy Bridge microarchitecture (Core i3/i5/i7 2nd gen). */ + cpuinfo_uarch_sandy_bridge = 0x00100206, + /** Intel Ivy Bridge microarchitecture (Core i3/i5/i7 3rd gen). */ + cpuinfo_uarch_ivy_bridge = 0x00100207, + /** Intel Haswell microarchitecture (Core i3/i5/i7 4th gen). */ + cpuinfo_uarch_haswell = 0x00100208, + /** Intel Broadwell microarchitecture. */ + cpuinfo_uarch_broadwell = 0x00100209, + /** Intel Sky Lake microarchitecture (14 nm, including Kaby/Coffee/Whiskey/Amber/Comet/Cascade/Cooper Lake). */ + cpuinfo_uarch_sky_lake = 0x0010020A, + /** DEPRECATED (Intel Kaby Lake microarchitecture). */ + cpuinfo_uarch_kaby_lake = 0x0010020A, + /** Intel Palm Cove microarchitecture (10 nm, Cannon Lake). */ + cpuinfo_uarch_palm_cove = 0x0010020B, + /** Intel Sunny Cove microarchitecture (10 nm, Ice Lake). */ + cpuinfo_uarch_sunny_cove = 0x0010020C, + + /** Pentium 4 with Willamette, Northwood, or Foster cores. */ + cpuinfo_uarch_willamette = 0x00100300, + /** Pentium 4 with Prescott and later cores. */ + cpuinfo_uarch_prescott = 0x00100301, + + /** Intel Atom on 45 nm process. */ + cpuinfo_uarch_bonnell = 0x00100400, + /** Intel Atom on 32 nm process. */ + cpuinfo_uarch_saltwell = 0x00100401, + /** Intel Silvermont microarchitecture (22 nm out-of-order Atom). */ + cpuinfo_uarch_silvermont = 0x00100402, + /** Intel Airmont microarchitecture (14 nm out-of-order Atom). */ + cpuinfo_uarch_airmont = 0x00100403, + /** Intel Goldmont microarchitecture (Denverton, Apollo Lake). */ + cpuinfo_uarch_goldmont = 0x00100404, + /** Intel Goldmont Plus microarchitecture (Gemini Lake). */ + cpuinfo_uarch_goldmont_plus = 0x00100405, + + /** Intel Knights Ferry HPC boards. */ + cpuinfo_uarch_knights_ferry = 0x00100500, + /** Intel Knights Corner HPC boards (aka Xeon Phi). */ + cpuinfo_uarch_knights_corner = 0x00100501, + /** Intel Knights Landing microarchitecture (second-gen MIC). */ + cpuinfo_uarch_knights_landing = 0x00100502, + /** Intel Knights Hill microarchitecture (third-gen MIC). */ + cpuinfo_uarch_knights_hill = 0x00100503, + /** Intel Knights Mill Xeon Phi. */ + cpuinfo_uarch_knights_mill = 0x00100504, + + /** Intel/Marvell XScale series. */ + cpuinfo_uarch_xscale = 0x00100600, + + /** AMD K5. */ + cpuinfo_uarch_k5 = 0x00200100, + /** AMD K6 and alike. */ + cpuinfo_uarch_k6 = 0x00200101, + /** AMD Athlon and Duron. */ + cpuinfo_uarch_k7 = 0x00200102, + /** AMD Athlon 64, Opteron 64. */ + cpuinfo_uarch_k8 = 0x00200103, + /** AMD Family 10h (Barcelona, Istambul, Magny-Cours). */ + cpuinfo_uarch_k10 = 0x00200104, + /** + * AMD Bulldozer microarchitecture + * Zambezi FX-series CPUs, Zurich, Valencia and Interlagos Opteron CPUs. + */ + cpuinfo_uarch_bulldozer = 0x00200105, + /** + * AMD Piledriver microarchitecture + * Vishera FX-series CPUs, Trinity and Richland APUs, Delhi, Seoul, Abu Dhabi Opteron CPUs. + */ + cpuinfo_uarch_piledriver = 0x00200106, + /** AMD Steamroller microarchitecture (Kaveri APUs). */ + cpuinfo_uarch_steamroller = 0x00200107, + /** AMD Excavator microarchitecture (Carizzo APUs). */ + cpuinfo_uarch_excavator = 0x00200108, + /** AMD Zen microarchitecture (12/14 nm Ryzen and EPYC CPUs). */ + cpuinfo_uarch_zen = 0x00200109, + /** AMD Zen 2 microarchitecture (7 nm Ryzen and EPYC CPUs). */ + cpuinfo_uarch_zen2 = 0x0020010A, + /** AMD Zen 3 microarchitecture. */ + cpuinfo_uarch_zen3 = 0x0020010B, + + /** NSC Geode and AMD Geode GX and LX. */ + cpuinfo_uarch_geode = 0x00200200, + /** AMD Bobcat mobile microarchitecture. */ + cpuinfo_uarch_bobcat = 0x00200201, + /** AMD Jaguar mobile microarchitecture. */ + cpuinfo_uarch_jaguar = 0x00200202, + /** AMD Puma mobile microarchitecture. */ + cpuinfo_uarch_puma = 0x00200203, + + /** ARM7 series. */ + cpuinfo_uarch_arm7 = 0x00300100, + /** ARM9 series. */ + cpuinfo_uarch_arm9 = 0x00300101, + /** ARM 1136, ARM 1156, ARM 1176, or ARM 11MPCore. */ + cpuinfo_uarch_arm11 = 0x00300102, + + /** ARM Cortex-A5. */ + cpuinfo_uarch_cortex_a5 = 0x00300205, + /** ARM Cortex-A7. */ + cpuinfo_uarch_cortex_a7 = 0x00300207, + /** ARM Cortex-A8. */ + cpuinfo_uarch_cortex_a8 = 0x00300208, + /** ARM Cortex-A9. */ + cpuinfo_uarch_cortex_a9 = 0x00300209, + /** ARM Cortex-A12. */ + cpuinfo_uarch_cortex_a12 = 0x00300212, + /** ARM Cortex-A15. */ + cpuinfo_uarch_cortex_a15 = 0x00300215, + /** ARM Cortex-A17. */ + cpuinfo_uarch_cortex_a17 = 0x00300217, + + /** ARM Cortex-A32. */ + cpuinfo_uarch_cortex_a32 = 0x00300332, + /** ARM Cortex-A35. */ + cpuinfo_uarch_cortex_a35 = 0x00300335, + /** ARM Cortex-A53. */ + cpuinfo_uarch_cortex_a53 = 0x00300353, + /** ARM Cortex-A55 revision 0 (restricted dual-issue capabilities compared to revision 1+). */ + cpuinfo_uarch_cortex_a55r0 = 0x00300354, + /** ARM Cortex-A55. */ + cpuinfo_uarch_cortex_a55 = 0x00300355, + /** ARM Cortex-A57. */ + cpuinfo_uarch_cortex_a57 = 0x00300357, + /** ARM Cortex-A65. */ + cpuinfo_uarch_cortex_a65 = 0x00300365, + /** ARM Cortex-A72. */ + cpuinfo_uarch_cortex_a72 = 0x00300372, + /** ARM Cortex-A73. */ + cpuinfo_uarch_cortex_a73 = 0x00300373, + /** ARM Cortex-A75. */ + cpuinfo_uarch_cortex_a75 = 0x00300375, + /** ARM Cortex-A76. */ + cpuinfo_uarch_cortex_a76 = 0x00300376, + /** ARM Cortex-A77. */ + cpuinfo_uarch_cortex_a77 = 0x00300377, + /** ARM Cortex-A78. */ + cpuinfo_uarch_cortex_a78 = 0x00300378, + + /** ARM Neoverse N1. */ + cpuinfo_uarch_neoverse_n1 = 0x00300400, + /** ARM Neoverse E1. */ + cpuinfo_uarch_neoverse_e1 = 0x00300401, + + /** ARM Cortex-X1. */ + cpuinfo_uarch_cortex_x1 = 0x00300500, + + /** Qualcomm Scorpion. */ + cpuinfo_uarch_scorpion = 0x00400100, + /** Qualcomm Krait. */ + cpuinfo_uarch_krait = 0x00400101, + /** Qualcomm Kryo. */ + cpuinfo_uarch_kryo = 0x00400102, + /** Qualcomm Falkor. */ + cpuinfo_uarch_falkor = 0x00400103, + /** Qualcomm Saphira. */ + cpuinfo_uarch_saphira = 0x00400104, + + /** Nvidia Denver. */ + cpuinfo_uarch_denver = 0x00500100, + /** Nvidia Denver 2. */ + cpuinfo_uarch_denver2 = 0x00500101, + /** Nvidia Carmel. */ + cpuinfo_uarch_carmel = 0x00500102, + + /** Samsung Exynos M1 (Exynos 8890 big cores). */ + cpuinfo_uarch_exynos_m1 = 0x00600100, + /** Samsung Exynos M2 (Exynos 8895 big cores). */ + cpuinfo_uarch_exynos_m2 = 0x00600101, + /** Samsung Exynos M3 (Exynos 9810 big cores). */ + cpuinfo_uarch_exynos_m3 = 0x00600102, + /** Samsung Exynos M4 (Exynos 9820 big cores). */ + cpuinfo_uarch_exynos_m4 = 0x00600103, + /** Samsung Exynos M5 (Exynos 9830 big cores). */ + cpuinfo_uarch_exynos_m5 = 0x00600104, + + /* Deprecated synonym for Cortex-A76 */ + cpuinfo_uarch_cortex_a76ae = 0x00300376, + /* Deprecated names for Exynos. */ + cpuinfo_uarch_mongoose_m1 = 0x00600100, + cpuinfo_uarch_mongoose_m2 = 0x00600101, + cpuinfo_uarch_meerkat_m3 = 0x00600102, + cpuinfo_uarch_meerkat_m4 = 0x00600103, + + /** Apple A6 and A6X processors. */ + cpuinfo_uarch_swift = 0x00700100, + /** Apple A7 processor. */ + cpuinfo_uarch_cyclone = 0x00700101, + /** Apple A8 and A8X processor. */ + cpuinfo_uarch_typhoon = 0x00700102, + /** Apple A9 and A9X processor. */ + cpuinfo_uarch_twister = 0x00700103, + /** Apple A10 and A10X processor. */ + cpuinfo_uarch_hurricane = 0x00700104, + /** Apple A11 processor (big cores). */ + cpuinfo_uarch_monsoon = 0x00700105, + /** Apple A11 processor (little cores). */ + cpuinfo_uarch_mistral = 0x00700106, + /** Apple A12 processor (big cores). */ + cpuinfo_uarch_vortex = 0x00700107, + /** Apple A12 processor (little cores). */ + cpuinfo_uarch_tempest = 0x00700108, + /** Apple A13 processor (big cores). */ + cpuinfo_uarch_lightning = 0x00700109, + /** Apple A13 processor (little cores). */ + cpuinfo_uarch_thunder = 0x0070010A, + /** Apple M1 processor (big cores). */ + cpuinfo_uarch_firestorm = 0x0070010B, + /** Apple M1 processor (little cores). */ + cpuinfo_uarch_icestorm = 0x0070010C, + + /** Cavium ThunderX. */ + cpuinfo_uarch_thunderx = 0x00800100, + /** Cavium ThunderX2 (originally Broadcom Vulkan). */ + cpuinfo_uarch_thunderx2 = 0x00800200, + + /** Marvell PJ4. */ + cpuinfo_uarch_pj4 = 0x00900100, + + /** Broadcom Brahma B15. */ + cpuinfo_uarch_brahma_b15 = 0x00A00100, + /** Broadcom Brahma B53. */ + cpuinfo_uarch_brahma_b53 = 0x00A00101, + + /** Applied Micro X-Gene. */ + cpuinfo_uarch_xgene = 0x00B00100, + + /* Hygon Dhyana (a modification of AMD Zen for Chinese market). */ + cpuinfo_uarch_dhyana = 0x01000100, + + /** HiSilicon TaiShan v110 (Huawei Kunpeng 920 series processors). */ + cpuinfo_uarch_taishan_v110 = 0x00C00100, +}; + +struct cpuinfo_processor { + /** SMT (hyperthread) ID within a core */ + uint32_t smt_id; + /** Core containing this logical processor */ + const struct cpuinfo_core* core; + /** Cluster of cores containing this logical processor */ + const struct cpuinfo_cluster* cluster; + /** Physical package containing this logical processor */ + const struct cpuinfo_package* package; +#if defined(__linux__) + /** + * Linux-specific ID for the logical processor: + * - Linux kernel exposes information about this logical processor in /sys/devices/system/cpu/cpu/ + * - Bit in the cpu_set_t identifies this logical processor + */ + int linux_id; +#endif +#if defined(_WIN32) || defined(__CYGWIN__) + /** Windows-specific ID for the group containing the logical processor. */ + uint16_t windows_group_id; + /** + * Windows-specific ID of the logical processor within its group: + * - Bit in the KAFFINITY mask identifies this logical processor within its group. + */ + uint16_t windows_processor_id; +#endif +#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + /** APIC ID (unique x86-specific ID of the logical processor) */ + uint32_t apic_id; +#endif + struct { + /** Level 1 instruction cache */ + const struct cpuinfo_cache* l1i; + /** Level 1 data cache */ + const struct cpuinfo_cache* l1d; + /** Level 2 unified or data cache */ + const struct cpuinfo_cache* l2; + /** Level 3 unified or data cache */ + const struct cpuinfo_cache* l3; + /** Level 4 unified or data cache */ + const struct cpuinfo_cache* l4; + } cache; +}; + +struct cpuinfo_core { + /** Index of the first logical processor on this core. */ + uint32_t processor_start; + /** Number of logical processors on this core */ + uint32_t processor_count; + /** Core ID within a package */ + uint32_t core_id; + /** Cluster containing this core */ + const struct cpuinfo_cluster* cluster; + /** Physical package containing this core. */ + const struct cpuinfo_package* package; + /** Vendor of the CPU microarchitecture for this core */ + enum cpuinfo_vendor vendor; + /** CPU microarchitecture for this core */ + enum cpuinfo_uarch uarch; +#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + /** Value of CPUID leaf 1 EAX register for this core */ + uint32_t cpuid; +#elif CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64 + /** Value of Main ID Register (MIDR) for this core */ + uint32_t midr; +#endif + /** Clock rate (non-Turbo) of the core, in Hz */ + uint64_t frequency; +}; + +struct cpuinfo_cluster { + /** Index of the first logical processor in the cluster */ + uint32_t processor_start; + /** Number of logical processors in the cluster */ + uint32_t processor_count; + /** Index of the first core in the cluster */ + uint32_t core_start; + /** Number of cores on the cluster */ + uint32_t core_count; + /** Cluster ID within a package */ + uint32_t cluster_id; + /** Physical package containing the cluster */ + const struct cpuinfo_package* package; + /** CPU microarchitecture vendor of the cores in the cluster */ + enum cpuinfo_vendor vendor; + /** CPU microarchitecture of the cores in the cluster */ + enum cpuinfo_uarch uarch; +#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + /** Value of CPUID leaf 1 EAX register of the cores in the cluster */ + uint32_t cpuid; +#elif CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64 + /** Value of Main ID Register (MIDR) of the cores in the cluster */ + uint32_t midr; +#endif + /** Clock rate (non-Turbo) of the cores in the cluster, in Hz */ + uint64_t frequency; +}; + +#define CPUINFO_PACKAGE_NAME_MAX 48 + +struct cpuinfo_package { + /** SoC or processor chip model name */ + char name[CPUINFO_PACKAGE_NAME_MAX]; + /** Index of the first logical processor on this physical package */ + uint32_t processor_start; + /** Number of logical processors on this physical package */ + uint32_t processor_count; + /** Index of the first core on this physical package */ + uint32_t core_start; + /** Number of cores on this physical package */ + uint32_t core_count; + /** Index of the first cluster of cores on this physical package */ + uint32_t cluster_start; + /** Number of clusters of cores on this physical package */ + uint32_t cluster_count; +}; + +struct cpuinfo_uarch_info { + /** Type of CPU microarchitecture */ + enum cpuinfo_uarch uarch; +#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + /** Value of CPUID leaf 1 EAX register for the microarchitecture */ + uint32_t cpuid; +#elif CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64 + /** Value of Main ID Register (MIDR) for the microarchitecture */ + uint32_t midr; +#endif + /** Number of logical processors with the microarchitecture */ + uint32_t processor_count; + /** Number of cores with the microarchitecture */ + uint32_t core_count; +}; + +#ifdef __cplusplus +extern "C" { +#endif + +bool CPUINFO_ABI cpuinfo_initialize(void); + +void CPUINFO_ABI cpuinfo_deinitialize(void); + +#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + /* This structure is not a part of stable API. Use cpuinfo_has_x86_* functions instead. */ + struct cpuinfo_x86_isa { + #if CPUINFO_ARCH_X86 + bool rdtsc; + #endif + bool rdtscp; + bool rdpid; + bool sysenter; + #if CPUINFO_ARCH_X86 + bool syscall; + #endif + bool msr; + bool clzero; + bool clflush; + bool clflushopt; + bool mwait; + bool mwaitx; + #if CPUINFO_ARCH_X86 + bool emmx; + #endif + bool fxsave; + bool xsave; + #if CPUINFO_ARCH_X86 + bool fpu; + bool mmx; + bool mmx_plus; + #endif + bool three_d_now; + bool three_d_now_plus; + #if CPUINFO_ARCH_X86 + bool three_d_now_geode; + #endif + bool prefetch; + bool prefetchw; + bool prefetchwt1; + #if CPUINFO_ARCH_X86 + bool daz; + bool sse; + bool sse2; + #endif + bool sse3; + bool ssse3; + bool sse4_1; + bool sse4_2; + bool sse4a; + bool misaligned_sse; + bool avx; + bool fma3; + bool fma4; + bool xop; + bool f16c; + bool avx2; + bool avx512f; + bool avx512pf; + bool avx512er; + bool avx512cd; + bool avx512dq; + bool avx512bw; + bool avx512vl; + bool avx512ifma; + bool avx512vbmi; + bool avx512vbmi2; + bool avx512bitalg; + bool avx512vpopcntdq; + bool avx512vnni; + bool avx512bf16; + bool avx512vp2intersect; + bool avx512_4vnniw; + bool avx512_4fmaps; + bool hle; + bool rtm; + bool xtest; + bool mpx; + #if CPUINFO_ARCH_X86 + bool cmov; + bool cmpxchg8b; + #endif + bool cmpxchg16b; + bool clwb; + bool movbe; + #if CPUINFO_ARCH_X86_64 + bool lahf_sahf; + #endif + bool fs_gs_base; + bool lzcnt; + bool popcnt; + bool tbm; + bool bmi; + bool bmi2; + bool adx; + bool aes; + bool vaes; + bool pclmulqdq; + bool vpclmulqdq; + bool gfni; + bool rdrand; + bool rdseed; + bool sha; + bool rng; + bool ace; + bool ace2; + bool phe; + bool pmm; + bool lwp; + }; + + extern struct cpuinfo_x86_isa cpuinfo_isa; +#endif + +static inline bool cpuinfo_has_x86_rdtsc(void) { + #if CPUINFO_ARCH_X86_64 + return true; + #elif CPUINFO_ARCH_X86 + #if defined(__ANDROID__) + return true; + #else + return cpuinfo_isa.rdtsc; + #endif + #else + return false; + #endif +} + +static inline bool cpuinfo_has_x86_rdtscp(void) { + #if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.rdtscp; + #else + return false; + #endif +} + +static inline bool cpuinfo_has_x86_rdpid(void) { + #if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.rdpid; + #else + return false; + #endif +} + +static inline bool cpuinfo_has_x86_clzero(void) { + #if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.clzero; + #else + return false; + #endif +} + +static inline bool cpuinfo_has_x86_mwait(void) { + #if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.mwait; + #else + return false; + #endif +} + +static inline bool cpuinfo_has_x86_mwaitx(void) { + #if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.mwaitx; + #else + return false; + #endif +} + +static inline bool cpuinfo_has_x86_fxsave(void) { + #if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.fxsave; + #else + return false; + #endif +} + +static inline bool cpuinfo_has_x86_xsave(void) { + #if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.xsave; + #else + return false; + #endif +} + +static inline bool cpuinfo_has_x86_fpu(void) { + #if CPUINFO_ARCH_X86_64 + return true; + #elif CPUINFO_ARCH_X86 + #if defined(__ANDROID__) + return true; + #else + return cpuinfo_isa.fpu; + #endif + #else + return false; + #endif +} + +static inline bool cpuinfo_has_x86_mmx(void) { + #if CPUINFO_ARCH_X86_64 + return true; + #elif CPUINFO_ARCH_X86 + #if defined(__ANDROID__) + return true; + #else + return cpuinfo_isa.mmx; + #endif + #else + return false; + #endif +} + +static inline bool cpuinfo_has_x86_mmx_plus(void) { + #if CPUINFO_ARCH_X86_64 + return true; + #elif CPUINFO_ARCH_X86 + #if defined(__ANDROID__) + return true; + #else + return cpuinfo_isa.mmx_plus; + #endif + #else + return false; + #endif +} + +static inline bool cpuinfo_has_x86_3dnow(void) { + #if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.three_d_now; + #else + return false; + #endif +} + +static inline bool cpuinfo_has_x86_3dnow_plus(void) { + #if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.three_d_now_plus; + #else + return false; + #endif +} + +static inline bool cpuinfo_has_x86_3dnow_geode(void) { + #if CPUINFO_ARCH_X86_64 + return false; + #elif CPUINFO_ARCH_X86 + #if defined(__ANDROID__) + return false; + #else + return cpuinfo_isa.three_d_now_geode; + #endif + #else + return false; + #endif +} + +static inline bool cpuinfo_has_x86_prefetch(void) { + #if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.prefetch; + #else + return false; + #endif +} + +static inline bool cpuinfo_has_x86_prefetchw(void) { + #if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.prefetchw; + #else + return false; + #endif +} + +static inline bool cpuinfo_has_x86_prefetchwt1(void) { + #if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.prefetchwt1; + #else + return false; + #endif +} + +static inline bool cpuinfo_has_x86_daz(void) { + #if CPUINFO_ARCH_X86_64 + return true; + #elif CPUINFO_ARCH_X86 + #if defined(__ANDROID__) + return true; + #else + return cpuinfo_isa.daz; + #endif + #else + return false; + #endif +} + +static inline bool cpuinfo_has_x86_sse(void) { + #if CPUINFO_ARCH_X86_64 + return true; + #elif CPUINFO_ARCH_X86 + #if defined(__ANDROID__) + return true; + #else + return cpuinfo_isa.sse; + #endif + #else + return false; + #endif +} + +static inline bool cpuinfo_has_x86_sse2(void) { + #if CPUINFO_ARCH_X86_64 + return true; + #elif CPUINFO_ARCH_X86 + #if defined(__ANDROID__) + return true; + #else + return cpuinfo_isa.sse2; + #endif + #else + return false; + #endif +} + +static inline bool cpuinfo_has_x86_sse3(void) { + #if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + #if defined(__ANDROID__) + return true; + #else + return cpuinfo_isa.sse3; + #endif + #else + return false; + #endif +} + +static inline bool cpuinfo_has_x86_ssse3(void) { + #if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + #if defined(__ANDROID__) + return true; + #else + return cpuinfo_isa.ssse3; + #endif + #else + return false; + #endif +} + +static inline bool cpuinfo_has_x86_sse4_1(void) { + #if CPUINFO_ARCH_X86_64 + #if defined(__ANDROID__) + return true; + #else + return cpuinfo_isa.sse4_1; + #endif + #elif CPUINFO_ARCH_X86 + return cpuinfo_isa.sse4_1; + #else + return false; + #endif +} + +static inline bool cpuinfo_has_x86_sse4_2(void) { + #if CPUINFO_ARCH_X86_64 + #if defined(__ANDROID__) + return true; + #else + return cpuinfo_isa.sse4_2; + #endif + #elif CPUINFO_ARCH_X86 + return cpuinfo_isa.sse4_2; + #else + return false; + #endif +} + +static inline bool cpuinfo_has_x86_sse4a(void) { + #if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.sse4a; + #else + return false; + #endif +} + +static inline bool cpuinfo_has_x86_misaligned_sse(void) { + #if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.misaligned_sse; + #else + return false; + #endif +} + +static inline bool cpuinfo_has_x86_avx(void) { + #if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.avx; + #else + return false; + #endif +} + +static inline bool cpuinfo_has_x86_fma3(void) { + #if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.fma3; + #else + return false; + #endif +} + +static inline bool cpuinfo_has_x86_fma4(void) { + #if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.fma4; + #else + return false; + #endif +} + +static inline bool cpuinfo_has_x86_xop(void) { + #if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.xop; + #else + return false; + #endif +} + +static inline bool cpuinfo_has_x86_f16c(void) { + #if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.f16c; + #else + return false; + #endif +} + +static inline bool cpuinfo_has_x86_avx2(void) { + #if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.avx2; + #else + return false; + #endif +} + +static inline bool cpuinfo_has_x86_avx512f(void) { + #if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.avx512f; + #else + return false; + #endif +} + +static inline bool cpuinfo_has_x86_avx512pf(void) { + #if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.avx512pf; + #else + return false; + #endif +} + +static inline bool cpuinfo_has_x86_avx512er(void) { + #if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.avx512er; + #else + return false; + #endif +} + +static inline bool cpuinfo_has_x86_avx512cd(void) { + #if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.avx512cd; + #else + return false; + #endif +} + +static inline bool cpuinfo_has_x86_avx512dq(void) { + #if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.avx512dq; + #else + return false; + #endif +} + +static inline bool cpuinfo_has_x86_avx512bw(void) { + #if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.avx512bw; + #else + return false; + #endif +} + +static inline bool cpuinfo_has_x86_avx512vl(void) { + #if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.avx512vl; + #else + return false; + #endif +} + +static inline bool cpuinfo_has_x86_avx512ifma(void) { + #if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.avx512ifma; + #else + return false; + #endif +} + +static inline bool cpuinfo_has_x86_avx512vbmi(void) { + #if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.avx512vbmi; + #else + return false; + #endif +} + +static inline bool cpuinfo_has_x86_avx512vbmi2(void) { + #if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.avx512vbmi2; + #else + return false; + #endif +} + +static inline bool cpuinfo_has_x86_avx512bitalg(void) { + #if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.avx512bitalg; + #else + return false; + #endif +} + +static inline bool cpuinfo_has_x86_avx512vpopcntdq(void) { + #if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.avx512vpopcntdq; + #else + return false; + #endif +} + +static inline bool cpuinfo_has_x86_avx512vnni(void) { + #if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.avx512vnni; + #else + return false; + #endif +} + +static inline bool cpuinfo_has_x86_avx512bf16(void) { + #if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.avx512bf16; + #else + return false; + #endif +} + +static inline bool cpuinfo_has_x86_avx512vp2intersect(void) { + #if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.avx512vp2intersect; + #else + return false; + #endif +} + +static inline bool cpuinfo_has_x86_avx512_4vnniw(void) { + #if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.avx512_4vnniw; + #else + return false; + #endif +} + +static inline bool cpuinfo_has_x86_avx512_4fmaps(void) { + #if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.avx512_4fmaps; + #else + return false; + #endif +} + +static inline bool cpuinfo_has_x86_hle(void) { + #if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.hle; + #else + return false; + #endif +} + +static inline bool cpuinfo_has_x86_rtm(void) { + #if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.rtm; + #else + return false; + #endif +} + +static inline bool cpuinfo_has_x86_xtest(void) { + #if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.xtest; + #else + return false; + #endif +} + +static inline bool cpuinfo_has_x86_mpx(void) { + #if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.mpx; + #else + return false; + #endif +} + +static inline bool cpuinfo_has_x86_cmov(void) { + #if CPUINFO_ARCH_X86_64 + return true; + #elif CPUINFO_ARCH_X86 + return cpuinfo_isa.cmov; + #else + return false; + #endif +} + +static inline bool cpuinfo_has_x86_cmpxchg8b(void) { + #if CPUINFO_ARCH_X86_64 + return true; + #elif CPUINFO_ARCH_X86 + return cpuinfo_isa.cmpxchg8b; + #else + return false; + #endif +} + +static inline bool cpuinfo_has_x86_cmpxchg16b(void) { + #if CPUINFO_ARCH_X86_64 + return cpuinfo_isa.cmpxchg16b; + #else + return false; + #endif +} + +static inline bool cpuinfo_has_x86_clwb(void) { + #if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.clwb; + #else + return false; + #endif +} + +static inline bool cpuinfo_has_x86_movbe(void) { + #if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.movbe; + #else + return false; + #endif +} + +static inline bool cpuinfo_has_x86_lahf_sahf(void) { + #if CPUINFO_ARCH_X86 + return true; + #elif CPUINFO_ARCH_X86_64 + return cpuinfo_isa.lahf_sahf; + #else + return false; + #endif +} + +static inline bool cpuinfo_has_x86_lzcnt(void) { + #if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.lzcnt; + #else + return false; + #endif +} + +static inline bool cpuinfo_has_x86_popcnt(void) { + #if CPUINFO_ARCH_X86_64 + #if defined(__ANDROID__) + return true; + #else + return cpuinfo_isa.popcnt; + #endif + #elif CPUINFO_ARCH_X86 + return cpuinfo_isa.popcnt; + #else + return false; + #endif +} + +static inline bool cpuinfo_has_x86_tbm(void) { + #if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.tbm; + #else + return false; + #endif +} + +static inline bool cpuinfo_has_x86_bmi(void) { + #if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.bmi; + #else + return false; + #endif +} + +static inline bool cpuinfo_has_x86_bmi2(void) { + #if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.bmi2; + #else + return false; + #endif +} + +static inline bool cpuinfo_has_x86_adx(void) { + #if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.adx; + #else + return false; + #endif +} + +static inline bool cpuinfo_has_x86_aes(void) { + #if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.aes; + #else + return false; + #endif +} + +static inline bool cpuinfo_has_x86_vaes(void) { + #if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.vaes; + #else + return false; + #endif +} + +static inline bool cpuinfo_has_x86_pclmulqdq(void) { + #if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.pclmulqdq; + #else + return false; + #endif +} + +static inline bool cpuinfo_has_x86_vpclmulqdq(void) { + #if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.vpclmulqdq; + #else + return false; + #endif +} + +static inline bool cpuinfo_has_x86_gfni(void) { + #if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.gfni; + #else + return false; + #endif +} + +static inline bool cpuinfo_has_x86_rdrand(void) { + #if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.rdrand; + #else + return false; + #endif +} + +static inline bool cpuinfo_has_x86_rdseed(void) { + #if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.rdseed; + #else + return false; + #endif +} + +static inline bool cpuinfo_has_x86_sha(void) { + #if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.sha; + #else + return false; + #endif +} + +#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64 + /* This structure is not a part of stable API. Use cpuinfo_has_arm_* functions instead. */ + struct cpuinfo_arm_isa { + #if CPUINFO_ARCH_ARM + bool thumb; + bool thumb2; + bool thumbee; + bool jazelle; + bool armv5e; + bool armv6; + bool armv6k; + bool armv7; + bool armv7mp; + bool armv8; + bool idiv; + + bool vfpv2; + bool vfpv3; + bool d32; + bool fp16; + bool fma; + + bool wmmx; + bool wmmx2; + bool neon; + #endif + #if CPUINFO_ARCH_ARM64 + bool atomics; + bool sve; + bool sve2; + #endif + bool rdm; + bool fp16arith; + bool dot; + bool jscvt; + bool fcma; + + bool aes; + bool sha1; + bool sha2; + bool pmull; + bool crc32; + }; + + extern struct cpuinfo_arm_isa cpuinfo_isa; +#endif + +static inline bool cpuinfo_has_arm_thumb(void) { + #if CPUINFO_ARCH_ARM + return cpuinfo_isa.thumb; + #else + return false; + #endif +} + +static inline bool cpuinfo_has_arm_thumb2(void) { + #if CPUINFO_ARCH_ARM + return cpuinfo_isa.thumb2; + #else + return false; + #endif +} + +static inline bool cpuinfo_has_arm_v5e(void) { + #if CPUINFO_ARCH_ARM + return cpuinfo_isa.armv5e; + #else + return false; + #endif +} + +static inline bool cpuinfo_has_arm_v6(void) { + #if CPUINFO_ARCH_ARM + return cpuinfo_isa.armv6; + #else + return false; + #endif +} + +static inline bool cpuinfo_has_arm_v6k(void) { + #if CPUINFO_ARCH_ARM + return cpuinfo_isa.armv6k; + #else + return false; + #endif +} + +static inline bool cpuinfo_has_arm_v7(void) { + #if CPUINFO_ARCH_ARM + return cpuinfo_isa.armv7; + #else + return false; + #endif +} + +static inline bool cpuinfo_has_arm_v7mp(void) { + #if CPUINFO_ARCH_ARM + return cpuinfo_isa.armv7mp; + #else + return false; + #endif +} + +static inline bool cpuinfo_has_arm_v8(void) { + #if CPUINFO_ARCH_ARM64 + return true; + #elif CPUINFO_ARCH_ARM + return cpuinfo_isa.armv8; + #else + return false; + #endif +} + +static inline bool cpuinfo_has_arm_idiv(void) { + #if CPUINFO_ARCH_ARM64 + return true; + #elif CPUINFO_ARCH_ARM + return cpuinfo_isa.idiv; + #else + return false; + #endif +} + +static inline bool cpuinfo_has_arm_vfpv2(void) { + #if CPUINFO_ARCH_ARM + return cpuinfo_isa.vfpv2; + #else + return false; + #endif +} + +static inline bool cpuinfo_has_arm_vfpv3(void) { + #if CPUINFO_ARCH_ARM64 + return true; + #elif CPUINFO_ARCH_ARM + return cpuinfo_isa.vfpv3; + #else + return false; + #endif +} + +static inline bool cpuinfo_has_arm_vfpv3_d32(void) { + #if CPUINFO_ARCH_ARM64 + return true; + #elif CPUINFO_ARCH_ARM + return cpuinfo_isa.vfpv3 && cpuinfo_isa.d32; + #else + return false; + #endif +} + +static inline bool cpuinfo_has_arm_vfpv3_fp16(void) { + #if CPUINFO_ARCH_ARM64 + return true; + #elif CPUINFO_ARCH_ARM + return cpuinfo_isa.vfpv3 && cpuinfo_isa.fp16; + #else + return false; + #endif +} + +static inline bool cpuinfo_has_arm_vfpv3_fp16_d32(void) { + #if CPUINFO_ARCH_ARM64 + return true; + #elif CPUINFO_ARCH_ARM + return cpuinfo_isa.vfpv3 && cpuinfo_isa.fp16 && cpuinfo_isa.d32; + #else + return false; + #endif +} + +static inline bool cpuinfo_has_arm_vfpv4(void) { + #if CPUINFO_ARCH_ARM64 + return true; + #elif CPUINFO_ARCH_ARM + return cpuinfo_isa.vfpv3 && cpuinfo_isa.fma; + #else + return false; + #endif +} + +static inline bool cpuinfo_has_arm_vfpv4_d32(void) { + #if CPUINFO_ARCH_ARM64 + return true; + #elif CPUINFO_ARCH_ARM + return cpuinfo_isa.vfpv3 && cpuinfo_isa.fma && cpuinfo_isa.d32; + #else + return false; + #endif +} + +static inline bool cpuinfo_has_arm_wmmx(void) { + #if CPUINFO_ARCH_ARM + return cpuinfo_isa.wmmx; + #else + return false; + #endif +} + +static inline bool cpuinfo_has_arm_wmmx2(void) { + #if CPUINFO_ARCH_ARM + return cpuinfo_isa.wmmx2; + #else + return false; + #endif +} + +static inline bool cpuinfo_has_arm_neon(void) { + #if CPUINFO_ARCH_ARM64 + return true; + #elif CPUINFO_ARCH_ARM + return cpuinfo_isa.neon; + #else + return false; + #endif +} + +static inline bool cpuinfo_has_arm_neon_fp16(void) { + #if CPUINFO_ARCH_ARM64 + return true; + #elif CPUINFO_ARCH_ARM + return cpuinfo_isa.neon && cpuinfo_isa.fp16; + #else + return false; + #endif +} + +static inline bool cpuinfo_has_arm_neon_fma(void) { + #if CPUINFO_ARCH_ARM64 + return true; + #elif CPUINFO_ARCH_ARM + return cpuinfo_isa.neon && cpuinfo_isa.fma; + #else + return false; + #endif +} + +static inline bool cpuinfo_has_arm_neon_v8(void) { + #if CPUINFO_ARCH_ARM64 + return true; + #elif CPUINFO_ARCH_ARM + return cpuinfo_isa.neon && cpuinfo_isa.armv8; + #else + return false; + #endif +} + +static inline bool cpuinfo_has_arm_atomics(void) { + #if CPUINFO_ARCH_ARM64 + return cpuinfo_isa.atomics; + #else + return false; + #endif +} + +static inline bool cpuinfo_has_arm_neon_rdm(void) { + #if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64 + return cpuinfo_isa.rdm; + #else + return false; + #endif +} + +static inline bool cpuinfo_has_arm_neon_fp16_arith(void) { + #if CPUINFO_ARCH_ARM + return cpuinfo_isa.neon && cpuinfo_isa.fp16arith; + #elif CPUINFO_ARCH_ARM64 + return cpuinfo_isa.fp16arith; + #else + return false; + #endif +} + +static inline bool cpuinfo_has_arm_fp16_arith(void) { + #if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64 + return cpuinfo_isa.fp16arith; + #else + return false; + #endif +} + +static inline bool cpuinfo_has_arm_neon_dot(void) { + #if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64 + return cpuinfo_isa.dot; + #else + return false; + #endif +} + +static inline bool cpuinfo_has_arm_jscvt(void) { + #if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64 + return cpuinfo_isa.jscvt; + #else + return false; + #endif +} + +static inline bool cpuinfo_has_arm_fcma(void) { + #if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64 + return cpuinfo_isa.fcma; + #else + return false; + #endif +} + +static inline bool cpuinfo_has_arm_aes(void) { + #if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64 + return cpuinfo_isa.aes; + #else + return false; + #endif +} + +static inline bool cpuinfo_has_arm_sha1(void) { + #if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64 + return cpuinfo_isa.sha1; + #else + return false; + #endif +} + +static inline bool cpuinfo_has_arm_sha2(void) { + #if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64 + return cpuinfo_isa.sha2; + #else + return false; + #endif +} + +static inline bool cpuinfo_has_arm_pmull(void) { + #if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64 + return cpuinfo_isa.pmull; + #else + return false; + #endif +} + +static inline bool cpuinfo_has_arm_crc32(void) { + #if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64 + return cpuinfo_isa.crc32; + #else + return false; + #endif +} + +static inline bool cpuinfo_has_arm_sve(void) { + #if CPUINFO_ARCH_ARM64 + return cpuinfo_isa.sve; + #else + return false; + #endif +} + +static inline bool cpuinfo_has_arm_sve2(void) { + #if CPUINFO_ARCH_ARM64 + return cpuinfo_isa.sve2; + #else + return false; + #endif +} + +const struct cpuinfo_processor* CPUINFO_ABI cpuinfo_get_processors(void); +const struct cpuinfo_core* CPUINFO_ABI cpuinfo_get_cores(void); +const struct cpuinfo_cluster* CPUINFO_ABI cpuinfo_get_clusters(void); +const struct cpuinfo_package* CPUINFO_ABI cpuinfo_get_packages(void); +const struct cpuinfo_uarch_info* CPUINFO_ABI cpuinfo_get_uarchs(void); +const struct cpuinfo_cache* CPUINFO_ABI cpuinfo_get_l1i_caches(void); +const struct cpuinfo_cache* CPUINFO_ABI cpuinfo_get_l1d_caches(void); +const struct cpuinfo_cache* CPUINFO_ABI cpuinfo_get_l2_caches(void); +const struct cpuinfo_cache* CPUINFO_ABI cpuinfo_get_l3_caches(void); +const struct cpuinfo_cache* CPUINFO_ABI cpuinfo_get_l4_caches(void); + +const struct cpuinfo_processor* CPUINFO_ABI cpuinfo_get_processor(uint32_t index); +const struct cpuinfo_core* CPUINFO_ABI cpuinfo_get_core(uint32_t index); +const struct cpuinfo_cluster* CPUINFO_ABI cpuinfo_get_cluster(uint32_t index); +const struct cpuinfo_package* CPUINFO_ABI cpuinfo_get_package(uint32_t index); +const struct cpuinfo_uarch_info* CPUINFO_ABI cpuinfo_get_uarch(uint32_t index); +const struct cpuinfo_cache* CPUINFO_ABI cpuinfo_get_l1i_cache(uint32_t index); +const struct cpuinfo_cache* CPUINFO_ABI cpuinfo_get_l1d_cache(uint32_t index); +const struct cpuinfo_cache* CPUINFO_ABI cpuinfo_get_l2_cache(uint32_t index); +const struct cpuinfo_cache* CPUINFO_ABI cpuinfo_get_l3_cache(uint32_t index); +const struct cpuinfo_cache* CPUINFO_ABI cpuinfo_get_l4_cache(uint32_t index); + +uint32_t CPUINFO_ABI cpuinfo_get_processors_count(void); +uint32_t CPUINFO_ABI cpuinfo_get_cores_count(void); +uint32_t CPUINFO_ABI cpuinfo_get_clusters_count(void); +uint32_t CPUINFO_ABI cpuinfo_get_packages_count(void); +uint32_t CPUINFO_ABI cpuinfo_get_uarchs_count(void); +uint32_t CPUINFO_ABI cpuinfo_get_l1i_caches_count(void); +uint32_t CPUINFO_ABI cpuinfo_get_l1d_caches_count(void); +uint32_t CPUINFO_ABI cpuinfo_get_l2_caches_count(void); +uint32_t CPUINFO_ABI cpuinfo_get_l3_caches_count(void); +uint32_t CPUINFO_ABI cpuinfo_get_l4_caches_count(void); + +/** + * Returns upper bound on cache size. + */ +uint32_t CPUINFO_ABI cpuinfo_get_max_cache_size(void); + +/** + * Identify the logical processor that executes the current thread. + * + * There is no guarantee that the thread will stay on the same logical processor for any time. + * Callers should treat the result as only a hint, and be prepared to handle NULL return value. + */ +const struct cpuinfo_processor* CPUINFO_ABI cpuinfo_get_current_processor(void); + +/** + * Identify the core that executes the current thread. + * + * There is no guarantee that the thread will stay on the same core for any time. + * Callers should treat the result as only a hint, and be prepared to handle NULL return value. + */ +const struct cpuinfo_core* CPUINFO_ABI cpuinfo_get_current_core(void); + +/** + * Identify the microarchitecture index of the core that executes the current thread. + * If the system does not support such identification, the function returns 0. + * + * There is no guarantee that the thread will stay on the same type of core for any time. + * Callers should treat the result as only a hint. + */ +uint32_t CPUINFO_ABI cpuinfo_get_current_uarch_index(void); + +/** + * Identify the microarchitecture index of the core that executes the current thread. + * If the system does not support such identification, the function returns the user-specified default value. + * + * There is no guarantee that the thread will stay on the same type of core for any time. + * Callers should treat the result as only a hint. + */ +uint32_t CPUINFO_ABI cpuinfo_get_current_uarch_index_with_default(uint32_t default_uarch_index); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* CPUINFO_H */ diff --git a/dep/cpuinfo/src/api.c b/dep/cpuinfo/src/api.c new file mode 100644 index 000000000..f91b421cc --- /dev/null +++ b/dep/cpuinfo/src/api.c @@ -0,0 +1,410 @@ +#include +#include + +#include +#include +#include + +#ifdef __linux__ + #include + + #include + #include + #if !defined(__NR_getcpu) + #include + #endif +#endif + +bool cpuinfo_is_initialized = false; + +struct cpuinfo_processor* cpuinfo_processors = NULL; +struct cpuinfo_core* cpuinfo_cores = NULL; +struct cpuinfo_cluster* cpuinfo_clusters = NULL; +struct cpuinfo_package* cpuinfo_packages = NULL; +struct cpuinfo_cache* cpuinfo_cache[cpuinfo_cache_level_max] = { NULL }; + +uint32_t cpuinfo_processors_count = 0; +uint32_t cpuinfo_cores_count = 0; +uint32_t cpuinfo_clusters_count = 0; +uint32_t cpuinfo_packages_count = 0; +uint32_t cpuinfo_cache_count[cpuinfo_cache_level_max] = { 0 }; +uint32_t cpuinfo_max_cache_size = 0; + +#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64 + struct cpuinfo_uarch_info* cpuinfo_uarchs = NULL; + uint32_t cpuinfo_uarchs_count = 0; +#else + struct cpuinfo_uarch_info cpuinfo_global_uarch = { cpuinfo_uarch_unknown }; +#endif + +#ifdef __linux__ + uint32_t cpuinfo_linux_cpu_max = 0; + const struct cpuinfo_processor** cpuinfo_linux_cpu_to_processor_map = NULL; + const struct cpuinfo_core** cpuinfo_linux_cpu_to_core_map = NULL; + #if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64 + const uint32_t* cpuinfo_linux_cpu_to_uarch_index_map = NULL; + #endif +#endif + + +const struct cpuinfo_processor* cpuinfo_get_processors(void) { + if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) { + cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "processors"); + } + return cpuinfo_processors; +} + +const struct cpuinfo_core* cpuinfo_get_cores(void) { + if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) { + cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "core"); + } + return cpuinfo_cores; +} + +const struct cpuinfo_cluster* cpuinfo_get_clusters(void) { + if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) { + cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "clusters"); + } + return cpuinfo_clusters; +} + +const struct cpuinfo_package* cpuinfo_get_packages(void) { + if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) { + cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "packages"); + } + return cpuinfo_packages; +} + +const struct cpuinfo_uarch_info* cpuinfo_get_uarchs() { + if (!cpuinfo_is_initialized) { + cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "uarchs"); + } + #if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64 + return cpuinfo_uarchs; + #else + return &cpuinfo_global_uarch; + #endif +} + +const struct cpuinfo_processor* cpuinfo_get_processor(uint32_t index) { + if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) { + cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "processor"); + } + if CPUINFO_UNLIKELY(index >= cpuinfo_processors_count) { + return NULL; + } + return &cpuinfo_processors[index]; +} + +const struct cpuinfo_core* cpuinfo_get_core(uint32_t index) { + if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) { + cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "core"); + } + if CPUINFO_UNLIKELY(index >= cpuinfo_cores_count) { + return NULL; + } + return &cpuinfo_cores[index]; +} + +const struct cpuinfo_cluster* cpuinfo_get_cluster(uint32_t index) { + if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) { + cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "cluster"); + } + if CPUINFO_UNLIKELY(index >= cpuinfo_clusters_count) { + return NULL; + } + return &cpuinfo_clusters[index]; +} + +const struct cpuinfo_package* cpuinfo_get_package(uint32_t index) { + if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) { + cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "package"); + } + if CPUINFO_UNLIKELY(index >= cpuinfo_packages_count) { + return NULL; + } + return &cpuinfo_packages[index]; +} + +const struct cpuinfo_uarch_info* cpuinfo_get_uarch(uint32_t index) { + if (!cpuinfo_is_initialized) { + cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "uarch"); + } + #if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64 + if CPUINFO_UNLIKELY(index >= cpuinfo_uarchs_count) { + return NULL; + } + return &cpuinfo_uarchs[index]; + #else + if CPUINFO_UNLIKELY(index != 0) { + return NULL; + } + return &cpuinfo_global_uarch; + #endif +} + +uint32_t cpuinfo_get_processors_count(void) { + if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) { + cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "processors_count"); + } + return cpuinfo_processors_count; +} + +uint32_t cpuinfo_get_cores_count(void) { + if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) { + cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "cores_count"); + } + return cpuinfo_cores_count; +} + +uint32_t cpuinfo_get_clusters_count(void) { + if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) { + cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "clusters_count"); + } + return cpuinfo_clusters_count; +} + +uint32_t cpuinfo_get_packages_count(void) { + if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) { + cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "packages_count"); + } + return cpuinfo_packages_count; +} + +uint32_t cpuinfo_get_uarchs_count(void) { + if (!cpuinfo_is_initialized) { + cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "uarchs_count"); + } + #if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64 + return cpuinfo_uarchs_count; + #else + return 1; + #endif +} + +const struct cpuinfo_cache* CPUINFO_ABI cpuinfo_get_l1i_caches(void) { + if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) { + cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "l1i_caches"); + } + return cpuinfo_cache[cpuinfo_cache_level_1i]; +} + +const struct cpuinfo_cache* CPUINFO_ABI cpuinfo_get_l1d_caches(void) { + if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) { + cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "l1d_caches"); + } + return cpuinfo_cache[cpuinfo_cache_level_1d]; +} + +const struct cpuinfo_cache* CPUINFO_ABI cpuinfo_get_l2_caches(void) { + if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) { + cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "l2_caches"); + } + return cpuinfo_cache[cpuinfo_cache_level_2]; +} + +const struct cpuinfo_cache* CPUINFO_ABI cpuinfo_get_l3_caches(void) { + if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) { + cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "l3_caches"); + } + return cpuinfo_cache[cpuinfo_cache_level_3]; +} + +const struct cpuinfo_cache* CPUINFO_ABI cpuinfo_get_l4_caches(void) { + if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) { + cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "l4_caches"); + } + return cpuinfo_cache[cpuinfo_cache_level_4]; +} + +const struct cpuinfo_cache* CPUINFO_ABI cpuinfo_get_l1i_cache(uint32_t index) { + if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) { + cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "l1i_cache"); + } + if CPUINFO_UNLIKELY(index >= cpuinfo_cache_count[cpuinfo_cache_level_1i]) { + return NULL; + } + return &cpuinfo_cache[cpuinfo_cache_level_1i][index]; +} + +const struct cpuinfo_cache* CPUINFO_ABI cpuinfo_get_l1d_cache(uint32_t index) { + if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) { + cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "l1d_cache"); + } + if CPUINFO_UNLIKELY(index >= cpuinfo_cache_count[cpuinfo_cache_level_1d]) { + return NULL; + } + return &cpuinfo_cache[cpuinfo_cache_level_1d][index]; +} + +const struct cpuinfo_cache* CPUINFO_ABI cpuinfo_get_l2_cache(uint32_t index) { + if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) { + cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "l2_cache"); + } + if CPUINFO_UNLIKELY(index >= cpuinfo_cache_count[cpuinfo_cache_level_2]) { + return NULL; + } + return &cpuinfo_cache[cpuinfo_cache_level_2][index]; +} + +const struct cpuinfo_cache* CPUINFO_ABI cpuinfo_get_l3_cache(uint32_t index) { + if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) { + cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "l3_cache"); + } + if CPUINFO_UNLIKELY(index >= cpuinfo_cache_count[cpuinfo_cache_level_3]) { + return NULL; + } + return &cpuinfo_cache[cpuinfo_cache_level_3][index]; +} + +const struct cpuinfo_cache* CPUINFO_ABI cpuinfo_get_l4_cache(uint32_t index) { + if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) { + cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "l4_cache"); + } + if CPUINFO_UNLIKELY(index >= cpuinfo_cache_count[cpuinfo_cache_level_4]) { + return NULL; + } + return &cpuinfo_cache[cpuinfo_cache_level_4][index]; +} + +uint32_t CPUINFO_ABI cpuinfo_get_l1i_caches_count(void) { + if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) { + cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "l1i_caches_count"); + } + return cpuinfo_cache_count[cpuinfo_cache_level_1i]; +} + +uint32_t CPUINFO_ABI cpuinfo_get_l1d_caches_count(void) { + if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) { + cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "l1d_caches_count"); + } + return cpuinfo_cache_count[cpuinfo_cache_level_1d]; +} + +uint32_t CPUINFO_ABI cpuinfo_get_l2_caches_count(void) { + if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) { + cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "l2_caches_count"); + } + return cpuinfo_cache_count[cpuinfo_cache_level_2]; +} + +uint32_t CPUINFO_ABI cpuinfo_get_l3_caches_count(void) { + if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) { + cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "l3_caches_count"); + } + return cpuinfo_cache_count[cpuinfo_cache_level_3]; +} + +uint32_t CPUINFO_ABI cpuinfo_get_l4_caches_count(void) { + if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) { + cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "l4_caches_count"); + } + return cpuinfo_cache_count[cpuinfo_cache_level_4]; +} + +uint32_t CPUINFO_ABI cpuinfo_get_max_cache_size(void) { + if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) { + cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "max_cache_size"); + } + return cpuinfo_max_cache_size; +} + +const struct cpuinfo_processor* CPUINFO_ABI cpuinfo_get_current_processor(void) { + if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) { + cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "current_processor"); + } + #ifdef __linux__ + /* Initializing this variable silences a MemorySanitizer error. */ + unsigned cpu = 0; + if CPUINFO_UNLIKELY(syscall(__NR_getcpu, &cpu, NULL, NULL) != 0) { + return 0; + } + if CPUINFO_UNLIKELY((uint32_t) cpu >= cpuinfo_linux_cpu_max) { + return 0; + } + return cpuinfo_linux_cpu_to_processor_map[cpu]; + #else + return NULL; + #endif +} + +const struct cpuinfo_core* CPUINFO_ABI cpuinfo_get_current_core(void) { + if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) { + cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "current_core"); + } + #ifdef __linux__ + /* Initializing this variable silences a MemorySanitizer error. */ + unsigned cpu = 0; + if CPUINFO_UNLIKELY(syscall(__NR_getcpu, &cpu, NULL, NULL) != 0) { + return 0; + } + if CPUINFO_UNLIKELY((uint32_t) cpu >= cpuinfo_linux_cpu_max) { + return 0; + } + return cpuinfo_linux_cpu_to_core_map[cpu]; + #else + return NULL; + #endif +} + +uint32_t CPUINFO_ABI cpuinfo_get_current_uarch_index(void) { + if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) { + cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "current_uarch_index"); + } + #if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64 + #ifdef __linux__ + if (cpuinfo_linux_cpu_to_uarch_index_map == NULL) { + /* Special case: avoid syscall on systems with only a single type of cores */ + return 0; + } + + /* General case */ + /* Initializing this variable silences a MemorySanitizer error. */ + unsigned cpu = 0; + if CPUINFO_UNLIKELY(syscall(__NR_getcpu, &cpu, NULL, NULL) != 0) { + return 0; + } + if CPUINFO_UNLIKELY((uint32_t) cpu >= cpuinfo_linux_cpu_max) { + return 0; + } + return cpuinfo_linux_cpu_to_uarch_index_map[cpu]; + #else + /* Fallback: pretend to be on the big core. */ + return 0; + #endif + #else + /* Only ARM/ARM64 processors may include cores of different types in the same package. */ + return 0; + #endif +} + +uint32_t CPUINFO_ABI cpuinfo_get_current_uarch_index_with_default(uint32_t default_uarch_index) { + if CPUINFO_UNLIKELY(!cpuinfo_is_initialized) { + cpuinfo_log_fatal("cpuinfo_get_%s called before cpuinfo is initialized", "current_uarch_index_with_default"); + } + #if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64 + #ifdef __linux__ + if (cpuinfo_linux_cpu_to_uarch_index_map == NULL) { + /* Special case: avoid syscall on systems with only a single type of cores */ + return 0; + } + + /* General case */ + /* Initializing this variable silences a MemorySanitizer error. */ + unsigned cpu = 0; + if CPUINFO_UNLIKELY(syscall(__NR_getcpu, &cpu, NULL, NULL) != 0) { + return default_uarch_index; + } + if CPUINFO_UNLIKELY((uint32_t) cpu >= cpuinfo_linux_cpu_max) { + return default_uarch_index; + } + return cpuinfo_linux_cpu_to_uarch_index_map[cpu]; + #else + /* Fallback: no API to query current core, use default uarch index. */ + return default_uarch_index; + #endif + #else + /* Only ARM/ARM64 processors may include cores of different types in the same package. */ + return 0; + #endif +} diff --git a/dep/cpuinfo/src/arm/android/api.h b/dep/cpuinfo/src/arm/android/api.h new file mode 100644 index 000000000..228632ace --- /dev/null +++ b/dep/cpuinfo/src/arm/android/api.h @@ -0,0 +1,20 @@ +#pragma once + +#include +#include +#include +#include + +enum cpuinfo_android_chipset_property { + cpuinfo_android_chipset_property_proc_cpuinfo_hardware = 0, + cpuinfo_android_chipset_property_ro_product_board, + cpuinfo_android_chipset_property_ro_board_platform, + cpuinfo_android_chipset_property_ro_mediatek_platform, + cpuinfo_android_chipset_property_ro_arch, + cpuinfo_android_chipset_property_ro_chipname, + cpuinfo_android_chipset_property_ro_hardware_chipname, + cpuinfo_android_chipset_property_max, +}; + +CPUINFO_INTERNAL void cpuinfo_arm_android_parse_properties( + struct cpuinfo_android_properties properties[restrict static 1]); diff --git a/dep/cpuinfo/src/arm/android/properties.c b/dep/cpuinfo/src/arm/android/properties.c new file mode 100644 index 000000000..5f93889d7 --- /dev/null +++ b/dep/cpuinfo/src/arm/android/properties.c @@ -0,0 +1,67 @@ +#include +#include +#include +#include +#include + +#include + +#include +#include +#include +#include + +#if CPUINFO_MOCK + #include + + static struct cpuinfo_mock_property* cpuinfo_mock_properties = NULL; + + void CPUINFO_ABI cpuinfo_mock_android_properties(struct cpuinfo_mock_property* properties) { + cpuinfo_log_info("Android properties mocking enabled"); + cpuinfo_mock_properties = properties; + } + + static int cpuinfo_android_property_get(const char* key, char* value) { + if (cpuinfo_mock_properties != NULL) { + for (const struct cpuinfo_mock_property* prop = cpuinfo_mock_properties; prop->key != NULL; prop++) { + if (strncmp(key, prop->key, CPUINFO_BUILD_PROP_NAME_MAX) == 0) { + strncpy(value, prop->value, CPUINFO_BUILD_PROP_VALUE_MAX); + return (int) strnlen(prop->value, CPUINFO_BUILD_PROP_VALUE_MAX); + } + } + } + *value = '\0'; + return 0; + } +#else + static inline int cpuinfo_android_property_get(const char* key, char* value) { + return __system_property_get(key, value); + } +#endif + +void cpuinfo_arm_android_parse_properties(struct cpuinfo_android_properties properties[restrict static 1]) { + const int ro_product_board_length = + cpuinfo_android_property_get("ro.product.board", properties->ro_product_board); + cpuinfo_log_debug("read ro.product.board = \"%.*s\"", ro_product_board_length, properties->ro_product_board); + + const int ro_board_platform_length = + cpuinfo_android_property_get("ro.board.platform", properties->ro_board_platform); + cpuinfo_log_debug("read ro.board.platform = \"%.*s\"", ro_board_platform_length, properties->ro_board_platform); + + const int ro_mediatek_platform_length = + cpuinfo_android_property_get("ro.mediatek.platform", properties->ro_mediatek_platform); + cpuinfo_log_debug("read ro.mediatek.platform = \"%.*s\"", + ro_mediatek_platform_length, properties->ro_mediatek_platform); + + const int ro_arch_length = + cpuinfo_android_property_get("ro.arch", properties->ro_arch); + cpuinfo_log_debug("read ro.arch = \"%.*s\"", ro_arch_length, properties->ro_arch); + + const int ro_chipname_length = + cpuinfo_android_property_get("ro.chipname", properties->ro_chipname); + cpuinfo_log_debug("read ro.chipname = \"%.*s\"", ro_chipname_length, properties->ro_chipname); + + const int ro_hardware_chipname_length = + cpuinfo_android_property_get("ro.hardware.chipname", properties->ro_hardware_chipname); + cpuinfo_log_debug("read ro.hardware.chipname = \"%.*s\"", ro_hardware_chipname_length, properties->ro_hardware_chipname); +} diff --git a/dep/cpuinfo/src/arm/api.h b/dep/cpuinfo/src/arm/api.h new file mode 100644 index 000000000..2724df6da --- /dev/null +++ b/dep/cpuinfo/src/arm/api.h @@ -0,0 +1,154 @@ +#pragma once + +#include +#include + +#include +#include + +enum cpuinfo_arm_chipset_vendor { + cpuinfo_arm_chipset_vendor_unknown = 0, + cpuinfo_arm_chipset_vendor_qualcomm, + cpuinfo_arm_chipset_vendor_mediatek, + cpuinfo_arm_chipset_vendor_samsung, + cpuinfo_arm_chipset_vendor_hisilicon, + cpuinfo_arm_chipset_vendor_actions, + cpuinfo_arm_chipset_vendor_allwinner, + cpuinfo_arm_chipset_vendor_amlogic, + cpuinfo_arm_chipset_vendor_broadcom, + cpuinfo_arm_chipset_vendor_lg, + cpuinfo_arm_chipset_vendor_leadcore, + cpuinfo_arm_chipset_vendor_marvell, + cpuinfo_arm_chipset_vendor_mstar, + cpuinfo_arm_chipset_vendor_novathor, + cpuinfo_arm_chipset_vendor_nvidia, + cpuinfo_arm_chipset_vendor_pinecone, + cpuinfo_arm_chipset_vendor_renesas, + cpuinfo_arm_chipset_vendor_rockchip, + cpuinfo_arm_chipset_vendor_spreadtrum, + cpuinfo_arm_chipset_vendor_telechips, + cpuinfo_arm_chipset_vendor_texas_instruments, + cpuinfo_arm_chipset_vendor_wondermedia, + cpuinfo_arm_chipset_vendor_max, +}; + +enum cpuinfo_arm_chipset_series { + cpuinfo_arm_chipset_series_unknown = 0, + cpuinfo_arm_chipset_series_qualcomm_qsd, + cpuinfo_arm_chipset_series_qualcomm_msm, + cpuinfo_arm_chipset_series_qualcomm_apq, + cpuinfo_arm_chipset_series_qualcomm_snapdragon, + cpuinfo_arm_chipset_series_mediatek_mt, + cpuinfo_arm_chipset_series_samsung_exynos, + cpuinfo_arm_chipset_series_hisilicon_k3v, + cpuinfo_arm_chipset_series_hisilicon_hi, + cpuinfo_arm_chipset_series_hisilicon_kirin, + cpuinfo_arm_chipset_series_actions_atm, + cpuinfo_arm_chipset_series_allwinner_a, + cpuinfo_arm_chipset_series_amlogic_aml, + cpuinfo_arm_chipset_series_amlogic_s, + cpuinfo_arm_chipset_series_broadcom_bcm, + cpuinfo_arm_chipset_series_lg_nuclun, + cpuinfo_arm_chipset_series_leadcore_lc, + cpuinfo_arm_chipset_series_marvell_pxa, + cpuinfo_arm_chipset_series_mstar_6a, + cpuinfo_arm_chipset_series_novathor_u, + cpuinfo_arm_chipset_series_nvidia_tegra_t, + cpuinfo_arm_chipset_series_nvidia_tegra_ap, + cpuinfo_arm_chipset_series_nvidia_tegra_sl, + cpuinfo_arm_chipset_series_pinecone_surge_s, + cpuinfo_arm_chipset_series_renesas_mp, + cpuinfo_arm_chipset_series_rockchip_rk, + cpuinfo_arm_chipset_series_spreadtrum_sc, + cpuinfo_arm_chipset_series_telechips_tcc, + cpuinfo_arm_chipset_series_texas_instruments_omap, + cpuinfo_arm_chipset_series_wondermedia_wm, + cpuinfo_arm_chipset_series_max, +}; + +#define CPUINFO_ARM_CHIPSET_SUFFIX_MAX 8 + +struct cpuinfo_arm_chipset { + enum cpuinfo_arm_chipset_vendor vendor; + enum cpuinfo_arm_chipset_series series; + uint32_t model; + char suffix[CPUINFO_ARM_CHIPSET_SUFFIX_MAX]; +}; + +#define CPUINFO_ARM_CHIPSET_NAME_MAX CPUINFO_PACKAGE_NAME_MAX + +#ifndef __cplusplus +#ifndef _MSC_VER + CPUINFO_INTERNAL void cpuinfo_arm_chipset_to_string( + const struct cpuinfo_arm_chipset chipset[restrict static 1], + char name[restrict static CPUINFO_ARM_CHIPSET_NAME_MAX]); + + CPUINFO_INTERNAL void cpuinfo_arm_fixup_chipset( + struct cpuinfo_arm_chipset chipset[restrict static 1], uint32_t cores, uint32_t max_cpu_freq_max); + + CPUINFO_INTERNAL void cpuinfo_arm_decode_vendor_uarch( + uint32_t midr, + #if CPUINFO_ARCH_ARM + bool has_vfpv4, + #endif + enum cpuinfo_vendor vendor[restrict static 1], + enum cpuinfo_uarch uarch[restrict static 1]); + + CPUINFO_INTERNAL void cpuinfo_arm_decode_cache( + enum cpuinfo_uarch uarch, + uint32_t cluster_cores, + uint32_t midr, + const struct cpuinfo_arm_chipset chipset[restrict static 1], + uint32_t cluster_id, + uint32_t arch_version, + struct cpuinfo_cache l1i[restrict static 1], + struct cpuinfo_cache l1d[restrict static 1], + struct cpuinfo_cache l2[restrict static 1], + struct cpuinfo_cache l3[restrict static 1]); + + CPUINFO_INTERNAL uint32_t cpuinfo_arm_compute_max_cache_size( + const struct cpuinfo_processor processor[1]); +#else + CPUINFO_INTERNAL void cpuinfo_arm_chipset_to_string( + const struct cpuinfo_arm_chipset chipset[1], + char name[CPUINFO_ARM_CHIPSET_NAME_MAX]); + + CPUINFO_INTERNAL void cpuinfo_arm_fixup_chipset( + struct cpuinfo_arm_chipset chipset[1], uint32_t cores, uint32_t max_cpu_freq_max); + + CPUINFO_INTERNAL void cpuinfo_arm_decode_vendor_uarch( + uint32_t midr, + #if CPUINFO_ARCH_ARM + bool has_vfpv4, + #endif + enum cpuinfo_vendor vendor[1], + enum cpuinfo_uarch uarch[1]); + + CPUINFO_INTERNAL void cpuinfo_arm_decode_cache( + enum cpuinfo_uarch uarch, + uint32_t cluster_cores, + uint32_t midr, + const struct cpuinfo_arm_chipset chipset[1], + uint32_t cluster_id, + uint32_t arch_version, + struct cpuinfo_cache l1i[1], + struct cpuinfo_cache l1d[1], + struct cpuinfo_cache l2[1], + struct cpuinfo_cache l3[1]); + + CPUINFO_INTERNAL uint32_t cpuinfo_arm_compute_max_cache_size( + const struct cpuinfo_processor processor[1]); +#endif +#else /* defined(__cplusplus) */ + CPUINFO_INTERNAL void cpuinfo_arm_decode_cache( + enum cpuinfo_uarch uarch, + uint32_t cluster_cores, + uint32_t midr, + const struct cpuinfo_arm_chipset chipset[1], + uint32_t cluster_id, + uint32_t arch_version, + struct cpuinfo_cache l1i[1], + struct cpuinfo_cache l1d[1], + struct cpuinfo_cache l2[1], + struct cpuinfo_cache l3[1]); +#endif diff --git a/dep/cpuinfo/src/arm/cache.c b/dep/cpuinfo/src/arm/cache.c new file mode 100644 index 000000000..af29ca682 --- /dev/null +++ b/dep/cpuinfo/src/arm/cache.c @@ -0,0 +1,1687 @@ +#include + +#include +#include +#include +#include +#include + + +void cpuinfo_arm_decode_cache( + enum cpuinfo_uarch uarch, + uint32_t cluster_cores, + uint32_t midr, +#ifndef _MSC_VER + const struct cpuinfo_arm_chipset chipset[restrict static 1], +#else + const struct cpuinfo_arm_chipset chipset[1], +#endif + uint32_t cluster_id, + uint32_t arch_version, +#ifndef _MSC_VER + struct cpuinfo_cache l1i[restrict static 1], + struct cpuinfo_cache l1d[restrict static 1], + struct cpuinfo_cache l2[restrict static 1], + struct cpuinfo_cache l3[restrict static 1]) +#else + struct cpuinfo_cache l1i[1], + struct cpuinfo_cache l1d[1], + struct cpuinfo_cache l2[1], + struct cpuinfo_cache l3[1]) +#endif +{ + switch (uarch) { +#if CPUINFO_ARCH_ARM && !defined(__ARM_ARCH_7A__) && !defined(__ARM_ARCH_8A__) + case cpuinfo_uarch_xscale: + switch (midr_get_part(midr) >> 8) { + case 2: + /* + * PXA 210/25X/26X + * + * See "Computer Organization and Design, Revised Printing: The Hardware/Software Interface" + * by David A. Patterson, John L. Hennessy + */ + *l1i = (struct cpuinfo_cache) { + .size = 16 * 1024, + .associativity = 32, + .line_size = 32 + }; + *l1d = (struct cpuinfo_cache) { + .size = 16 * 1024, + .associativity = 4, + .line_size = 64 + }; + break; + case 4: + /* PXA 27X */ + *l1i = (struct cpuinfo_cache) { + .size = 32 * 1024, + .associativity = 32, + .line_size = 32 + }; + *l1d = (struct cpuinfo_cache) { + .size = 32 * 1024, + .associativity = 32, + .line_size = 32 + }; + break; + case 6: + /* + * PXA 3XX + * + * See http://download.intel.com/design/intelxscale/31628302.pdf + */ + *l1i = (struct cpuinfo_cache) { + .size = 32 * 1024, + .associativity = 4, + .line_size = 32 + }; + *l1d = (struct cpuinfo_cache) { + .size = 32 * 1024, + .associativity = 4, + .line_size = 32 + }; + *l2 = (struct cpuinfo_cache) { + .size = 256 * 1024, + .associativity = 8, + .line_size = 32 + }; + break; + } + break; + case cpuinfo_uarch_arm11: + *l1i = (struct cpuinfo_cache) { + .size = 16 * 1024, + .associativity = 4, + .line_size = 32 + }; + *l1d = (struct cpuinfo_cache) { + .size = 16 * 1024, + .associativity = 4, + .line_size = 32 + }; + break; +#endif /* CPUINFO_ARCH_ARM && !defined(__ARM_ARCH_7A__) && !defined(__ARM_ARCH_8A__) */ +#if CPUINFO_ARCH_ARM && !defined(__ARM_ARCH_8A__) + case cpuinfo_uarch_cortex_a5: + /* + * Cortex-A5 Technical Reference Manual: + * 7.1.1. Memory system + * The Cortex-A5 processor has separate instruction and data caches. + * The caches have the following features: + * - Data cache is 4-way set-associative. + * - Instruction cache is 2-way set-associative. + * - The cache line length is eight words. + * - You can configure the instruction and data caches independently during implementation + * to sizes of 4KB, 8KB, 16KB, 32KB, or 64KB. + * 1.1.3. System design components + * PrimeCell Level 2 Cache Controller (PL310) + * The addition of an on-chip secondary cache, also referred to as a Level 2 or L2 cache, is a + * recognized method of improving the performance of ARM-based systems when significant memory traffic + * is generated by the processor. The PrimeCell Level 2 Cache Controller reduces the number of external + * memory accesses and has been optimized for use with the Cortex-A5 processor. + * 8.1.7. Exclusive L2 cache + * The Cortex-A5 processor can be connected to an L2 cache that supports an exclusive cache mode. + * This mode must be activated both in the Cortex-A5 processor and in the L2 cache controller. + * + * +--------------------+-----------+-----------+----------+-----------+ + * | Processor model | L1D cache | L1I cache | L2 cache | Reference | + * +--------------------+-----------+-----------+----------+-----------+ + * | Qualcomm MSM7225A | | | | | + * | Qualcomm MSM7625A | | | | | + * | Qualcomm MSM7227A | | | | | + * | Qualcomm MSM7627A | 32K | 32K | 256K | Wiki [1] | + * | Qualcomm MSM7225AB | | | | | + * | Qualcomm MSM7225AB | | | | | + * | Qualcomm QSD8250 | | | | | + * | Qualcomm QSD8650 | | | | | + * +--------------------+-----------+-----------+----------+-----------+ + * | Spreadtrum SC6821 | 32K | 32K | ? | | + * | Spreadtrum SC6825 | 32K | 32K | 256K | Wiki [2] | + * | Spreadtrum SC8810 | ? | ? | ? | | + * | Spreadtrum SC8825 | 32K | 32K | ? | | + * +--------------------+-----------+-----------+----------+-----------+ + * + * [1] https://en.wikipedia.org/wiki/List_of_Qualcomm_Snapdragon_systems-on-chip#Snapdragon_S1 + * [2] https://en.wikipedia.org/wiki/Spreadtrum + */ + *l1i = (struct cpuinfo_cache) { + .size = 32 * 1024, + .associativity = 2, + .line_size = 32 + }; + *l1d = (struct cpuinfo_cache) { + .size = 32 * 1024, + .associativity = 4, + .line_size = 32 + }; + *l2 = (struct cpuinfo_cache) { + .size = 256 * 1024, + /* + * Follow NXP specification: "Eight-way set-associative 512 kB L2 cache with 32B line size" + * Reference: http://www.nxp.com/assets/documents/data/en/application-notes/AN4947.pdf + */ + .associativity = 8, + .line_size = 32 + }; + break; + case cpuinfo_uarch_cortex_a7: + /* + * Cortex-A7 MPCore Technical Reference Manual: + * 6.1. About the L1 memory system + * The L1 memory system consists of separate instruction and data caches. You can configure the + * instruction and data caches independently during implementation to sizes of 8KB, 16KB, 32KB, or 64KB. + * + * The L1 instruction memory system has the following features: + * - Instruction side cache line length of 32-bytes. + * - 2-way set-associative instruction cache. + * + * The L1 data memory system has the following features: + * - Data side cache line length of 64-bytes. + * - 4-way set-associative data cache. + * + * 7.1. About the L2 Memory system + * The L2 memory system consists of an: + * - Optional tightly-coupled L2 cache that includes: + * - Configurable L2 cache size of 128KB, 256KB, 512KB, and 1MB. + * - Fixed line length of 64 bytes + * - 8-way set-associative cache structure + * + * +--------------------+-------+-----------+-----------+-----------+-----------+ + * | Processor model | Cores | L1D cache | L1I cache | L2 cache | Reference | + * +--------------------+-------+-----------+-----------+-----------+-----------+ + * | Allwinner A20 | 2 | 32K | 32K | 256K | [1] | + * | Allwinner A23 | 2 | 32K | 32K | 256K | [2] | + * | Allwinner A31 | 4 | 32K | 32K | 1M | [3] | + * | Allwinner A31s | 4 | 32K | 32K | 1M | [4] | + * | Allwinner A33 | 4 | 32K | 32K | 512K | [5] | + * | Allwinner A80 Octa | 4(+4) | 32K | 32K | 512K(+2M) | [6] | + * | Allwinner A81T | 8 | 32K | 32K | 1M | [7] | + * +--------------------+-------+-----------+-----------+-----------+-----------+ + * | Broadcom BCM2836 | 4 | 32K | 32K | 512K | [8] | + * +--------------------+-------+-----------+-----------+-----------+-----------+ + * | Kirin 920 | 4(+4) | ? | ? | 512K | [9] | + * +--------------------+-------+-----------+-----------+-----------+-----------+ + * + * [1] https://linux-sunxi.org/A20 + * [2] https://linux-sunxi.org/A23 + * [3] http://dl.linux-sunxi.org/A31/A3x_release_document/A31/IC/A31%20datasheet%20V1.3%2020131106.pdf + * [4] https://github.com/allwinner-zh/documents/blob/master/A31s/A31s_Datasheet_v1.5_20150510.pdf + * [5] http://dl.linux-sunxi.org/A33/A33_Datasheet_release1.0.pdf + * [6] https://linux-sunxi.org/images/1/10/A80_Datasheet_Revision_1.0_0404.pdf + * [7] http://dl.linux-sunxi.org/A83T/A83T_datasheet_Revision_1.1.pdf + * [8] https://www.raspberrypi.org/forums/viewtopic.php?t=98428 + * [9] http://www.gizmochina.com/2014/10/07/hisilicon-kirin-920-tear-down/ + */ + *l1i = (struct cpuinfo_cache) { + .size = 32 * 1024, + .associativity = 2, + .line_size = 32 + }; + *l1d = (struct cpuinfo_cache) { + .size = 32 * 1024, + .associativity = 4, + .line_size = 64 + }; + *l2 = (struct cpuinfo_cache) { + .size = 128 * 1024 * cluster_cores, + .associativity = 8, + .line_size = 64 + }; + break; + case cpuinfo_uarch_cortex_a8: + /* + * Cortex-A8 Technical Reference Manual: + * 7.1. About the L1 memory system + * The L1 memory system consists of separate instruction and data caches in a Harvard arrangement. + * The L1 memory system provides the core with: + * - fixed line length of 64 bytes + * - support for 16KB or 32KB caches + * - 4-way set associative cache structure + * 8.1. About the L2 memory system + * The L2 memory system is tightly coupled to the L1 data cache and L1 instruction cache. + * The key features of the L2 memory system include: + * - configurable cache size of 0KB, 128KB, 256KB, 512KB, and 1MB + * - fixed line length of 64 bytes + * - 8-way set associative cache structure + * + * +----------------------+-----------+-----------+-----------+-----------+ + * | Processor model | L1D cache | L1I cache | L2 cache | Reference | + * +----------------------+-----------+-----------+-----------+-----------+ + * | Exynos 3 Single 3110 | 32K | 32K | 512K | [1] | + * +----------------------+-----------+-----------+-----------+-----------+ + * | TI DM 3730 | 32K | 32K | 256K | [2] | + * +----------------------+-----------+-----------+-----------+-----------+ + * + * [1] https://en.wikichip.org/w/images/0/04/Exynos_3110.pdf + * [2] https://www.ti.com/lit/ds/symlink/dm3725.pdf + */ + *l1i = (struct cpuinfo_cache) { + .size = 32 * 1024, + .associativity = 4, + .line_size = 64 + }; + *l1d = (struct cpuinfo_cache) { + .size = 32 * 1024, + .associativity = 4, + .line_size = 64 + }; + *l2 = (struct cpuinfo_cache) { + .associativity = 8, + .line_size = 64 + }; + switch (chipset->vendor) { + case cpuinfo_arm_chipset_vendor_samsung: + l2->size = 512 * 1024; + break; + default: + l2->size = 256 * 1024; + break; + } + + break; + case cpuinfo_uarch_cortex_a9: + /* + * ARM Cortex‑A9 Technical Reference Manual: + * 7.1.1 Memory system + * The Cortex‑A9 processor has separate instruction and data caches. + * The caches have the following features: + * - Both caches are 4-way set-associative. + * - The cache line length is eight words. + * - You can configure the instruction and data caches independently during implementation + * to sizes of 16KB, 32KB, or 64KB. + * 8.1.5 Exclusive L2 cache + * The Cortex‑A9 processor can be connected to an L2 cache that supports an exclusive cache mode. + * This mode must be activated both in the Cortex‑A9 processor and in the L2 cache controller. + * + * +--------------------+-------+-----------+-----------+-----------+-----------+ + * | Processor model | Cores | L1D cache | L1I cache | L2 cache | Reference | + * +--------------------+-------+-----------+-----------+-----------+-----------+ + * | Exynos 4 Dual 4210 | 2 | 32K | 32K | 1M | [1] | + * | Exynos 4 Dual 4212 | 2 | 32K | 32K | 1M | [2] | + * | Exynos 4 Quad 4412 | 4 | 32K | 32K | 1M | [3] | + * | Exynos 4 Quad 4415 | 4 | 32K | 32K | 1M | | + * | TI OMAP 4430 | 2 | 32K | 32K | 1M | [4] | + * | TI OMAP 4460 | 2 | 32K | 32K | 1M | [5] | + * +--------------------+-------+-----------+-----------+-----------+-----------+ + * + * [1] http://www.samsung.com/global/business/semiconductor/file/product/Exynos_4_Dual_45nm_User_Manaul_Public_REV1.00-0.pdf + * [2] http://www.samsung.com/global/business/semiconductor/file/product/Exynos_4_Dual_32nm_User_Manaul_Public_REV100-0.pdf + * [3] http://www.samsung.com/global/business/semiconductor/file/product/Exynos_4_Quad_User_Manaul_Public_REV1.00-0.pdf + * [4] https://www.hotchips.org/wp-content/uploads/hc_archives/hc21/2_mon/HC21.24.400.ClientProcessors-Epub/HC21.24.421.Witt-OMAP4430.pdf + * [5] http://www.anandtech.com/show/5310/samsung-galaxy-nexus-ice-cream-sandwich-review/9 + */ + + /* Use Exynos 4 specs */ + *l1i = (struct cpuinfo_cache) { + .size = 32 * 1024, + .associativity = 4, + .line_size = 32 + }; + *l1d = (struct cpuinfo_cache) { + .size = 32 * 1024, + .associativity = 4, + .line_size = 32 + }; + *l2 = (struct cpuinfo_cache) { + .size = 1024 * 1024, + /* OMAP4460 in Pandaboard ES has 16-way set-associative L2 cache */ + .associativity = 16, + .line_size = 32 + }; + break; + case cpuinfo_uarch_cortex_a15: + /* + * 6.1. About the L1 memory system + * The L1 memory system consists of separate instruction and data caches. + * The L1 instruction memory system has the following features: + * - 32KB 2-way set-associative instruction cache. + * - Fixed line length of 64 bytes. + * The L1 data memory system has the following features: + * - 32KB 2-way set-associative data cache. + * - Fixed line length of 64 bytes. + * 7.1. About the L2 memory system + * The features of the L2 memory system include: + * - Configurable L2 cache size of 512KB, 1MB, 2MB and 4MB. + * - Fixed line length of 64 bytes. + * - 16-way set-associative cache structure. + * + * +--------------------+-------+-----------+-----------+-----------+-----------+ + * | Processor model | Cores | L1D cache | L1I cache | L2 cache | Reference | + * +--------------------+-------+-----------+-----------+-----------+-----------+ + * | Exynos 5 Dual 5250 | 2 | 32K | 32K | 1M | [1] | + * | Exynos 5 Hexa 5260 | 2(+4) | 32K | 32K | 1M(+512K) | [2] | + * | Exynos 5 Octa 5410 | 4(+4) | 32K | 32K | 2M(+512K) | [3] | + * | Exynos 5 Octa 5420 | 4(+4) | 32K | 32K | 2M(+512K) | [3] | + * | Exynos 5 Octa 5422 | 4(+4) | 32K | 32K | 2M(+512K) | [3] | + * | Exynos 5 Octa 5430 | 4(+4) | 32K | 32K | 2M(+512K) | [3] | + * | Exynos 5 Octa 5800 | 4(+4) | 32K | 32K | 2M(+512K) | [3] | + * | Kirin 920 | 4(+4) | ? | ? | 2M(+512K) | [4] | + * +--------------------+-------+-----------+-----------+-----------+-----------+ + * + * [1] http://www.arndaleboard.org/wiki/downloads/supports/Exynos_5_Dual_User_Manaul_Public_REV1.00.pdf + * [2] http://www.yicsystem.com/wp-content/uploads/2014/08/Espresso5260P-Guide-Book.pdf + * [3] http://www.anandtech.com/show/6768/samsung-details-exynos-5-octa-architecture-power-at-isscc-13 + * [4] http://www.gizmochina.com/2014/10/07/hisilicon-kirin-920-tear-down/ + */ + *l1i = (struct cpuinfo_cache) { + .size = 32 * 1024, + .associativity = 2, + .line_size = 64 + }; + *l1d = (struct cpuinfo_cache) { + .size = 32 * 1024, + .associativity = 2, + .line_size = 64 + }; + *l2 = (struct cpuinfo_cache) { + .size = cluster_cores * 512 * 1024, + .associativity = 16, + .line_size = 64 + }; + break; + case cpuinfo_uarch_cortex_a17: + /* + * ARM Cortex-A17 MPCore Processor Technical Reference Manual: + * 6.1. About the L1 memory system + * The L1 memory system consists of separate instruction and data caches. + * The size of the instruction cache is implemented as either 32KB or 64KB. + * The size of the data cache is 32KB. + * + * The L1 instruction cache has the following features: + * - Instruction side cache line length of 64-bytes. + * - 4-way set-associative instruction cache. + * + * The L1 data cache has the following features: + * - Data side cache line length of 64-bytes. + * - 4-way set-associative data cache. + * + * 7.1. About the L2 Memory system + * An integrated L2 cache: + * - The cache size is implemented as either 256KB, 512KB, 1MB, 2MB, 4MB or 8MB. + * - A fixed line length of 64 bytes. + * - 16-way set-associative cache structure. + * + * +------------------+-------+-----------+-----------+-----------+-----------+ + * | Processor model | Cores | L1D cache | L1I cache | L2 cache | Reference | + * +------------------+-------+-----------+-----------+-----------+-----------+ + * | MediaTek MT6595 | 4(+4) | 32K | 32K | 2M(+512K) | [1] | + * +------------------+-------+-----------+-----------+-----------+-----------+ + * + * [1] https://blog.osakana.net/archives/5268 + */ + *l1i = (struct cpuinfo_cache) { + .size = 32 * 1024, + .associativity = 4, + .line_size = 64 + }; + *l1d = (struct cpuinfo_cache) { + .size = 32 * 1024, + .associativity = 4, + .line_size = 64 + }; + *l2 = (struct cpuinfo_cache) { + .size = cluster_cores * 512 * 1024, + .associativity = 16, + .line_size = 64 + }; + break; +#endif /* CPUINFO_ARCH_ARM && !defined(__ARM_ARCH_8A__) */ + case cpuinfo_uarch_cortex_a35: + /* + * ARM Cortex‑A35 Processor Technical Reference Manual: + * 6.1. About the L1 memory system + * The L1 memory system includes several power-saving and performance-enhancing features. + * These include separate instruction and data caches, which can be configured + * independently during implementation to sizes of 8KB, 16KB, 32KB, or 64KB. + * + * L1 instruction-side memory system + * A dedicated instruction cache that: + * - is virtually indexed and physically tagged. + * - is 2-way set associative. + * - is configurable to be 8KB, 16KB, 32KB, or 64KB. + * - uses a cache line length of 64 bytes. + * + * L1 data-side memory system + * A dedicated data cache that: + * - is physically indexed and physically tagged. + * - is 4-way set associative. + * - is configurable to be 8KB, 16KB, 32KB, or 64KB. + * - uses a cache line length of 64 bytes. + * + * 7.1. About the L2 memory system + * The L2 cache is 8-way set associative. + * Further features of the L2 cache are: + * - Configurable size of 128KB, 256KB, 512KB, and 1MB. + * - Fixed line length of 64 bytes. + * - Physically indexed and tagged. + * + * +-----------------+---------+-----------+-----------+-----------+-----------+ + * | Processor model | Cores | L1D cache | L1I cache | L2 cache | Reference | + * +-----------------+---------+-----------+-----------+-----------+-----------+ + * | MediaTek MT6599 | 4(+4+2) | ? | ? | ? | | + * +-----------------+---------+-----------+-----------+-----------+-----------+ + */ + *l1i = (struct cpuinfo_cache) { + .size = 16 * 1024, /* assumption based on low-end Cortex-A53 */ + .associativity = 2, + .line_size = 64 + }; + *l1d = (struct cpuinfo_cache) { + .size = 16 * 1024, /* assumption based on low-end Cortex-A53 */ + .associativity = 4, + .line_size = 64 + }; + *l2 = (struct cpuinfo_cache) { + .size = 256 * 1024, /* assumption based on low-end Cortex-A53 */ + .associativity = 8, + .line_size = 64 + }; + break; + case cpuinfo_uarch_cortex_a53: + /* + * ARM Cortex-A53 MPCore Processor Technical Reference Manual: + * 6.1. About the L1 memory system + * The L1 memory system consists of separate instruction and data caches. The implementer configures the + * instruction and data caches independently during implementation, to sizes of 8KB, 16KB, 32KB, or 64KB. + * + * The L1 Instruction memory system has the following key features: + * - Instruction side cache line length of 64 bytes. + * - 2-way set associative L1 Instruction cache. + * + * The L1 Data memory system has the following features: + * - Data side cache line length of 64 bytes. + * - 4-way set associative L1 Data cache. + * + * 7.1. About the L2 memory system + * The L2 memory system consists of an: + * - Optional tightly-coupled L2 cache that includes: + * - Configurable L2 cache size of 128KB, 256KB, 512KB, 1MB and 2MB. + * - Fixed line length of 64 bytes. + * - 16-way set-associative cache structure. + * + * +--------------------+-------+-----------+-----------+-----------+-----------+ + * | Processor model | Cores | L1D cache | L1I cache | L2 cache | Reference | + * +--------------------+-------+-----------+-----------+-----------+-----------+ + * | Broadcom BCM2837 | 4 | 16K | 16K | 512K | [1] | + * | Exynos 7420 | 4(+4) | 32K | 32K | 256K | [2, 3] | + * | Exynos 8890 | 4(+4) | 32K | 32K | 256K | [4] | + * | Rochchip RK3368 | 4+4 | 32K | 32K | 512K+256K | sysfs | + * | MediaTek MT8173C | 2(+2) | 32K | 32K | 512K(+1M) | sysfs | + * | Snapdragon 410 | 4 | 32K | 32K | 512K | [3] | + * | Snapdragon 630 | 4+4 | 32K | 32K | 1M+512K | sysfs | + * | Snapdragon 636 | 4(+4) | 32K+64K | 32K+64K | 1M+1M | sysfs | + * | Snapdragon 660 | 4(+4) | 32K+64K | 32K+64K | 1M+1M | sysfs | + * | Snapdragon 835 | 4(+4) | 32K+64K | 32K+64K | 1M(+2M) | sysfs | + * | Kirin 620 | 4+4 | 32K | 32K | 512K | [5] | + * +--------------------+-------+-----------+-----------+-----------+-----------+ + * + * [1] https://www.raspberrypi.org/forums/viewtopic.php?f=91&t=145766 + * [2] http://www.anandtech.com/show/9330/exynos-7420-deep-dive/2 + * [3] https://www.usenix.org/system/files/conference/usenixsecurity16/sec16_paper_lipp.pdf + * [4] http://www.boardset.com/products/products_v8890.php + * [5] http://mirror.lemaker.org/Hi6220V100_Multi-Mode_Application_Processor_Function_Description.pdf + */ + if (midr_is_qualcomm_cortex_a53_silver(midr)) { + /* Qualcomm-modified Cortex-A53 in Snapdragon 630/660/835 */ + + uint32_t l2_size = 512 * 1024; + switch (chipset->series) { + case cpuinfo_arm_chipset_series_qualcomm_msm: + if (chipset->model == 8998) { + /* Snapdragon 835 (MSM8998): 1 MB L2 (little cores only) */ + l2_size = 1024 * 1024; + } + break; + case cpuinfo_arm_chipset_series_qualcomm_snapdragon: + switch (chipset->model) { + case 630: + if (cluster_id == 0) { + /* Snapdragon 630: 1 MB L2 for the big cores */ + l2_size = 1024 * 1024; + } + break; + case 636: + /* Snapdragon 636: 1 MB L2 (little cores only) */ + l2_size = 1024 * 1024; + break; + case 660: + /* Snapdragon 660: 1 MB L2 (little cores only) */ + l2_size = 1024 * 1024; + break; + } + break; + default: + break; + } + + *l1i = (struct cpuinfo_cache) { + .size = 32 * 1024, + .associativity = 2, + .line_size = 64 + }; + *l1d = (struct cpuinfo_cache) { + .size = 32 * 1024, + .associativity = 4, + .line_size = 64 + }; + *l2 = (struct cpuinfo_cache) { + .size = l2_size, + .associativity = 16, + .line_size = 64 + }; + } else { + /* Standard Cortex-A53 */ + + /* Use conservative values by default */ + uint32_t l1_size = 16 * 1024; + uint32_t l2_size = 256 * 1024; + switch (chipset->series) { + case cpuinfo_arm_chipset_series_qualcomm_msm: + l1_size = 32 * 1024; + l2_size = 512 * 1024; + switch (chipset->model) { + case 8937: /* Snapdragon 430 */ + case 8940: /* Snapdragon 435 */ + case 8953: /* Snapdragon 625 or 626 (8953PRO) */ + if (cluster_id == 0) { + /* 1M L2 for big cluster */ + l2_size = 1024 * 1024; + } + break; + case 8952: /* Snapdragon 617 */ + if (cluster_id != 0) { + /* 256K L2 for LITTLE cluster */ + l2_size = 256 * 1024; + } + break; + default: + /* Silence compiler warning about unhandled enum values */ + break; + } + break; + case cpuinfo_arm_chipset_series_qualcomm_apq: + l1_size = 32 * 1024; + l2_size = 512 * 1024; + break; + case cpuinfo_arm_chipset_series_qualcomm_snapdragon: + l1_size = 32 * 1024; + l2_size = 512 * 1024; + if (chipset->model == 450 && cluster_id == 0) { + /* Snapdragon 450: 1M L2 for big cluster */ + l2_size = 1024 * 1024; + } + break; + case cpuinfo_arm_chipset_series_hisilicon_hi: + l1_size = 32 * 1024; + l2_size = 512 * 1024; + break; + case cpuinfo_arm_chipset_series_hisilicon_kirin: + l1_size = 32 * 1024; + switch (chipset->model) { + case 970: /* Kirin 970 */ + l2_size = 1024 * 1024; + break; + default: + l2_size = 512 * 1024; + break; + } + break; + case cpuinfo_arm_chipset_series_mediatek_mt: + switch (chipset->model) { + case 8173: + l1_size = 32 * 1024; + l2_size = 512 * 1024; + break; + } + break; + case cpuinfo_arm_chipset_series_rockchip_rk: + l1_size = 32 * 1024; + switch (chipset->model) { + case 3368: + if (cluster_id == 0) { + /* RK3368: 512 KB L2 for the big cores */ + l2_size = 512 * 1024; + } + break; + } + break; + case cpuinfo_arm_chipset_series_broadcom_bcm: + switch (chipset->model) { + case 2837: /* BCM2837 */ + l2_size = 512 * 1024; + break; + } + break; + case cpuinfo_arm_chipset_series_samsung_exynos: + l1_size = 32 * 1024; + break; + default: + /* Silence compiler warning about unhandled enum values */ + break; + } + *l1i = (struct cpuinfo_cache) { + .size = l1_size, + .associativity = 2, + .line_size = 64 + }; + *l1d = (struct cpuinfo_cache) { + .size = l1_size, + .associativity = 4, + .line_size = 64 + }; + *l2 = (struct cpuinfo_cache) { + .size = l2_size, + .associativity = 16, + .line_size = 64 + }; + } + break; + case cpuinfo_uarch_cortex_a55r0: + case cpuinfo_uarch_cortex_a55: + /* + * ARM Cortex-A55 Core Technical Reference Manual + * A6.1. About the L1 memory system + * The Cortex®-A55 core's L1 memory system enhances core performance and power efficiency. + * It consists of separate instruction and data caches. You can configure instruction and data caches + * independently during implementation to sizes of 16KB, 32KB, or 64KB. + * + * L1 instruction-side memory system + * The L1 instruction-side memory system provides an instruction stream to the DPU. Its key features are: + * - 64-byte instruction side cache line length. + * - 4-way set associative L1 instruction cache. + * + * L1 data-side memory system + * - 64-byte data side cache line length. + * - 4-way set associative L1 data cache. + * + * A7.1 About the L2 memory system + * The Cortex-A55 L2 memory system is required to interface the Cortex-A55 cores to the L3 memory system. + * The L2 memory subsystem consists of: + * - An optional 4-way, set-associative L2 cache with a configurable size of 64KB, 128KB or 256KB. Cache + * lines have a fixed length of 64 bytes. + * + * The main features of the L2 memory system are: + * - Strictly exclusive with L1 data cache. + * - Pseudo-inclusive with L1 instruction cache. + * - Private per-core unified L2 cache. + * + * +--------------------+-------+-----------+-----------+-----------+----------+------------+ + * | Processor model | Cores | L1D cache | L1I cache | L2 cache | L3 cache | Reference | + * +--------------------+-------+-----------+-----------+-----------+----------+------------+ + * | Snapdragon 845 | 4(+4) | 32K | 32K | 128K | 2M | [1], sysfs | + * | Exynos 9810 | 4(+4) | ? | ? | None | 512K | [2] | + * | Kirin 980 | 4(+4) | 32K | 32K | 128K | 4M | [3] | + * +--------------------+-------+-----------+-----------+-----------+----------+------------+ + * + * [1] https://www.anandtech.com/show/12114/qualcomm-announces-snapdragon-845-soc + * [2] https://www.anandtech.com/show/12478/exynos-9810-handson-awkward-first-results + * [3] https://en.wikichip.org/wiki/hisilicon/kirin/980 + */ + if (midr_is_qualcomm_cortex_a55_silver(midr)) { + /* Qualcomm-modified Cortex-A55 in Snapdragon 670 / 710 / 845 */ + uint32_t l3_size = 1024 * 1024; + switch (chipset->series) { + case cpuinfo_arm_chipset_series_qualcomm_snapdragon: + /* Snapdragon 845: 2M L3 cache */ + if (chipset->model == 845) { + l3_size = 2 * 1024 * 1024; + } + break; + default: + break; + } + + *l1i = (struct cpuinfo_cache) { + .size = 32 * 1024, + .associativity = 4, + .line_size = 64, + }; + *l1d = (struct cpuinfo_cache) { + .size = 32 * 1024, + .associativity = 4, + .line_size = 64, + }; + *l2 = (struct cpuinfo_cache) { + .size = 128 * 1024, + .associativity = 4, + .line_size = 64, + }; + *l3 = (struct cpuinfo_cache) { + .size = l3_size, + .associativity = 16, + .line_size = 64, + }; + } else { + /* Standard Cortex-A55 */ + + *l1i = (struct cpuinfo_cache) { + .size = 32 * 1024, + .associativity = 4, + .line_size = 64, + }; + *l1d = (struct cpuinfo_cache) { + .size = 32 * 1024, + .associativity = 4, + .line_size = 64, + }; + if (chipset->series == cpuinfo_arm_chipset_series_samsung_exynos) { + *l2 = (struct cpuinfo_cache) { + .size = 512 * 1024, + /* DynamIQ */ + .associativity = 16, + .line_size = 64, + }; + } else { + uint32_t l3_size = 1024 * 1024; + switch (chipset->series) { + case cpuinfo_arm_chipset_series_hisilicon_kirin: + /* Kirin 980: 4M L3 cache */ + if (chipset->model == 980) { + l3_size = 4 * 1024 * 1024; + } + break; + default: + break; + } + *l2 = (struct cpuinfo_cache) { + .size = 128 * 1024, + .associativity = 4, + .line_size = 64, + }; + *l3 = (struct cpuinfo_cache) { + .size = l3_size, + /* DynamIQ */ + .associativity = 16, + .line_size = 64, + }; + } + } + break; + case cpuinfo_uarch_cortex_a57: + /* + * ARM Cortex-A57 MPCore Processor Technical Reference Manual: + * 6.1. About the L1 memory system + * The L1 memory system consists of separate instruction and data caches. + * + * The L1 instruction memory system has the following features: + * - 48KB 3-way set-associative instruction cache. + * - Fixed line length of 64 bytes. + * + * The L1 data memory system has the following features: + * - 32KB 2-way set-associative data cache. + * - Fixed line length of 64 bytes. + * + * 7.1 About the L2 memory system + * The features of the L2 memory system include: + * - Configurable L2 cache size of 512KB, 1MB, and 2MB. + * - Fixed line length of 64 bytes. + * - 16-way set-associative cache structure. + * - Inclusion property with L1 data caches. + * + * +--------------------+-------+-----------+-----------+-----------+-----------+ + * | Processor model | Cores | L1D cache | L1I cache | L2 cache | Reference | + * +--------------------+-------+-----------+-----------+-----------+-----------+ + * | Snapdragon 810 | 4(+4) | 32K | 48K | 2M | [1] | + * | Exynos 7420 | 4(+4) | 32K | 48K | 2M | [2] | + * | Jetson TX1 | 4 | 32K | 48K | 2M | [3] | + * +--------------------+-------+-----------+-----------+-----------+-----------+ + * + * [1] http://www.anandtech.com/show/9837/snapdragon-820-preview + * [2] http://www.anandtech.com/show/9330/exynos-7420-deep-dive/2 + * [3] https://devblogs.nvidia.com/parallelforall/jetson-tx2-delivers-twice-intelligence-edge/ + */ + *l1i = (struct cpuinfo_cache) { + .size = 48 * 1024, + .associativity = 3, + .line_size = 64 + }; + *l1d = (struct cpuinfo_cache) { + .size = 32 * 1024, + .associativity = 2, + .line_size = 64 + }; + *l2 = (struct cpuinfo_cache) { + .size = cluster_cores * 512 * 1024, + .associativity = 16, + .line_size = 64, + .flags = CPUINFO_CACHE_INCLUSIVE + }; + break; + case cpuinfo_uarch_cortex_a65: + { + /* + * ARM Cortex‑A65 Core Technical Reference Manual + * A6.1. About the L1 memory system + * The L1 memory system enhances the performance and power efficiency in the Cortex‑A65 core. + * It consists of separate instruction and data caches. You can configure instruction and data caches + * independently during implementation to sizes of 32KB or 64KB. + * + * L1 instruction-side memory system + * The L1 instruction-side memory system provides an instruction stream to the DPU. Its key features are: + * - 64-byte instruction side cache line length. + * - 4-way set associative L1 instruction cache. + * + * L1 data-side memory system + * - 64-byte data side cache line length. + * - 4-way set associative L1 data cache. + * + * A7.1 About the L2 memory system + * The Cortex‑A65 L2 memory system is required to interface the Cortex‑A65 cores to the L3 memory system. + * The L2 memory subsystem consists of: + * - An optional 4-way, set-associative L2 cache with a configurable size of 64KB, 128KB, or 256KB. + * Cache lines have a fixed length of 64 bytes. + * + * The main features of the L2 memory system are: + * - Strictly exclusive with L1 data cache. + * - Pseudo-inclusive with L1 instruction cache. + * - Private per-core unified L2 cache. + */ + const uint32_t l1_size = 32 * 1024; + const uint32_t l2_size = 128 * 1024; + const uint32_t l3_size = 512 * 1024; + *l1i = (struct cpuinfo_cache) { + .size = l1_size, + .associativity = 4, + .line_size = 64, + }; + *l1d = (struct cpuinfo_cache) { + .size = l1_size, + .associativity = 4, + .line_size = 64, + }; + *l2 = (struct cpuinfo_cache) { + .size = l2_size, + .associativity = 4, + .line_size = 64, + .flags = CPUINFO_CACHE_INCLUSIVE + }; + *l3 = (struct cpuinfo_cache) { + .size = l3_size, + /* DynamIQ */ + .associativity = 16, + .line_size = 64, + }; + break; + } + case cpuinfo_uarch_cortex_a72: + { + /* + * ARM Cortex-A72 MPCore Processor Technical Reference Manual + * 6.1. About the L1 memory system + * The L1 memory system consists of separate instruction and data caches. + * + * The L1 instruction memory system has the following features: + * - 48KB 3-way set-associative instruction cache. + * - Fixed line length of 64 bytes. + * + * The L1 data memory system has the following features: + * - 32KB 2-way set-associative data cache. + * - Fixed cache line length of 64 bytes. + * + * 7.1 About the L2 memory system + * The features of the L2 memory system include: + * - Configurable L2 cache size of 512KB, 1MB, 2MB and 4MB. + * - Fixed line length of 64 bytes. + * - Banked pipeline structures. + * - Inclusion property with L1 data caches. + * - 16-way set-associative cache structure. + * + * +---------------------+---------+-----------+-----------+------------+-----------+ + * | Processor model | Cores | L1D cache | L1I cache | L2 cache | Reference | + * +---------------------+---------+-----------+-----------+------------+-----------+ + * | Snapdragon 650 | 2(+4) | 32K(+32K) | 48K(+32K) | 1M(+512K) | [1] | + * | Snapdragon 652 | 4(+4) | 32K(+32K) | 48K(+32K) | 1M(+512K) | [2] | + * | Snapdragon 653 | 4(+4) | 32K(+32K) | 48K(+32K) | 1M(+512K) | [3] | + * | HiSilicon Kirin 950 | 4(+4) | 32K+32K | 48K+32K | ? | | + * | HiSilicon Kirin 955 | 4(+4) | 32K+32K | 48K+32K | ? | | + * | MediaTek MT8173C | 2(+2) | 32K(+32K) | 48K(+32K) | 1M(+512K) | sysfs | + * | MediaTek Helio X20 | 2(+4+4) | ? | ? | ? | | + * | MediaTek Helio X23 | 2(+4+4) | ? | ? | ? | | + * | MediaTek Helio X25 | 2(+4+4) | ? | ? | ? | | + * | MediaTek Helio X27 | 2(+4+4) | ? | ? | ? | | + * | Broadcom BCM2711 | 4 | 32K | 48K | 1M | [4] | + * +---------------------+---------+-----------+-----------+------------+-----------+ + * + * [1] http://pdadb.net/index.php?m=processor&id=578&c=qualcomm_snapdragon_618_msm8956__snapdragon_650 + * [2] http://pdadb.net/index.php?m=processor&id=667&c=qualcomm_snapdragon_620_apq8076__snapdragon_652 + * [3] http://pdadb.net/index.php?m=processor&id=692&c=qualcomm_snapdragon_653_msm8976sg__msm8976_pro + * [4] https://www.raspberrypi.org/documentation/hardware/raspberrypi/bcm2711/README.md + */ + uint32_t l2_size; + switch (chipset->series) { + case cpuinfo_arm_chipset_series_hisilicon_kirin: + l2_size = 2 * 1024 * 1024; + break; + default: + l2_size = 1024 * 1024; + break; + } + + *l1i = (struct cpuinfo_cache) { + .size = 48 * 1024, + .associativity = 3, + .line_size = 64 + }; + *l1d = (struct cpuinfo_cache) { + .size = 32 * 1024, + .associativity = 2, + .line_size = 64 + }; + *l2 = (struct cpuinfo_cache) { + .size = l2_size, + .associativity = 16, + .line_size = 64, + .flags = CPUINFO_CACHE_INCLUSIVE + }; + break; + } + case cpuinfo_uarch_cortex_a73: + { + /* + * ARM Cortex‑A73 MPCore Processor Technical Reference Manual + * 6.1. About the L1 memory system + * The L1 memory system consists of separate instruction and data caches. + * The size of the instruction cache is 64KB. + * The size of the data cache is configurable to either 32KB or 64KB. + * + * The L1 instruction memory system has the following key features: + * - Virtually Indexed, Physically Tagged (VIPT), four-way set-associative instruction cache. + * - Fixed cache line length of 64 bytes. + * + * The L1 data memory system has the following features: + * - ...the data cache behaves like an eight-way set associative PIPT cache (for 32KB configurations) + * and a 16-way set associative PIPT cache (for 64KB configurations). + * - Fixed cache line length of 64 bytes. + * + * 7.1 About the L2 memory system + * The L2 memory system consists of: + * - A tightly-integrated L2 cache with: + * - A configurable size of 256KB, 512KB, 1MB, 2MB, 4MB, or 8MB. + * - A 16-way, set-associative structure. + * - A fixed line length of 64 bytes. + * + * The ARM Cortex A73 - Artemis Unveiled [1] + * "ARM still envisions that most vendors will choose to use configurations of 1 to + * 2MB in consumer products. The L2 cache is inclusive of the L1 cache. " + * + * +---------------------+---------+-----------+-----------+-----------+-----------+ + * | Processor model | Cores | L1D cache | L1I cache | L2 cache | Reference | + * +---------------------+---------+-----------+-----------+-----------+-----------+ + * | HiSilicon Kirin 960 | 4(+4) | 64K+32K | 64K+32K | ? | [2] | + * | MediaTek Helio X30 | 2(+4+4) | ? | 64K+ ? | ? | | + * | Snapdragon 636 | 4(+4) | 64K(+32K) | 64K(+32K) | 1M(+1M) | sysfs | + * | Snapdragon 660 | 4(+4) | 64K+32K | 64K+32K | 1M(+1M) | [3] | + * | Snapdragon 835 | 4(+4) | 64K+32K | 64K+32K | 2M(+1M) | sysfs | + * +---------------------+---------+-----------+-----------+-----------+-----------+ + * + * [1] http://www.anandtech.com/show/10347/arm-cortex-a73-artemis-unveiled/2 + * [2] http://www.anandtech.com/show/11088/hisilicon-kirin-960-performance-and-power/3 + * [3] https://arstechnica.com/gadgets/2017/05/qualcomms-snapdragon-660-and-630-bring-more-high-end-features-to-midrange-chips/ + */ + uint32_t l1d_size = 32 * 1024; + uint32_t l2_size = 512 * 1024; + switch (chipset->series) { + case cpuinfo_arm_chipset_series_hisilicon_kirin: + l1d_size = 64 * 1024; + l2_size = 2 * 1024 * 1024; + break; + case cpuinfo_arm_chipset_series_mediatek_mt: + l1d_size = 64 * 1024; + l2_size = 1 * 1024 * 1024; /* TODO: verify assumption */ + break; + default: + switch (midr) { + case UINT32_C(0x51AF8001): /* Kryo 280 Gold */ + l1d_size = 64 * 1024; + l2_size = 2 * 1024 * 1024; + break; + case UINT32_C(0x51AF8002): /* Kryo 260 Gold */ + l1d_size = 64 * 1024; + l2_size = 1 * 1024 * 1024; + break; + } + } + + *l1i = (struct cpuinfo_cache) { + .size = 64 * 1024, + .associativity = 4, + .line_size = 64 + }; + *l1d = (struct cpuinfo_cache) { + .size = l1d_size, + .associativity = (l1d_size >> 12), + .line_size = 64 + }; + *l2 = (struct cpuinfo_cache) { + .size = l2_size, + .associativity = 16, + .line_size = 64, + .flags = CPUINFO_CACHE_INCLUSIVE + }; + break; + } + case cpuinfo_uarch_cortex_a75: + { + /* + * ARM Cortex-A75 Core Technical Reference Manual + * A6.1. About the L1 memory system + * The L1 memory system consists of separate instruction and data caches. Both have a fixed size of 64KB. + * + * A6.1.1 L1 instruction-side memory system + * The L1 instruction memory system has the following key features: + * - Virtually Indexed, Physically Tagged (VIPT), four-way set-associative instruction cache. + * - Fixed cache line length of 64 bytes. + * + * A6.1.2 L1 data-side memory system + * The L1 data memory system has the following features: + * - Physically Indexed, Physically Tagged (PIPT), 16-way set-associative L1 data cache. + * - Fixed cache line length of 64 bytes. + * - Pseudo-random cache replacement policy. + * + * A7.1 About the L2 memory system + * The L2 memory subsystem consist of: + * - An 8-way set associative L2 cache with a configurable size of 256KB or 512KB. + * Cache lines have a fixed length of 64 bytes. + * + * +--------------------+-------+-----------+-----------+-----------+----------+------------+ + * | Processor model | Cores | L1D cache | L1I cache | L2 cache | L3 cache | Reference | + * +--------------------+-------+-----------+-----------+-----------+----------+------------+ + * | Snapdragon 845 | 4(+4) | 64K | 64K | 256K | 2M | [1], sysfs | + * +--------------------+-------+-----------+-----------+-----------+----------+------------+ + * + * [1] https://www.anandtech.com/show/12114/qualcomm-announces-snapdragon-845-soc + */ + uint32_t l3_size = 1024 * 1024; + switch (chipset->series) { + case cpuinfo_arm_chipset_series_qualcomm_snapdragon: + /* Snapdragon 845: 2M L3 cache */ + if (chipset->model == 845) { + l3_size = 2 * 1024 * 1024; + } + break; + default: + break; + } + *l1i = (struct cpuinfo_cache) { + .size = 64 * 1024, + .associativity = 4, + .line_size = 64 + }; + *l1d = (struct cpuinfo_cache) { + .size = 64 * 1024, + .associativity = 16, + .line_size = 64 + }; + *l2 = (struct cpuinfo_cache) { + .size = 256 * 1024, + .associativity = 8, + .line_size = 64 + }; + *l3 = (struct cpuinfo_cache) { + .size = l3_size, + .associativity = 16, + .line_size = 64 + }; + break; + } + case cpuinfo_uarch_cortex_a76: + { + /* + * ARM Cortex-A76 Core Technical Reference Manual + * A6.1. About the L1 memory system + * The L1 memory system consists of separate instruction and data caches. Both have a fixed size of 64KB. + * + * A6.1.1 L1 instruction-side memory system + * The L1 instruction memory system has the following key features: + * - Virtually Indexed, Physically Tagged (VIPT), which behaves as a Physically Indexed, + * Physically Tagged (PIPT) 4-way set-associative L1 data cache. + * - Fixed cache line length of 64 bytes. + * + * A6.1.2 L1 data-side memory system + * The L1 data memory system has the following features: + * - Virtually Indexed, Physically Tagged (VIPT), which behaves as a Physically Indexed, + * Physically Tagged (PIPT) 4-way set-associative L1 data cache. + * - Fixed cache line length of 64 bytes. + * - Pseudo-LRU cache replacement policy. + * + * A7.1 About the L2 memory system + * The L2 memory subsystem consist of: + * - An 8-way set associative L2 cache with a configurable size of 128KB, 256KB or 512KB. + * Cache lines have a fixed length of 64 bytes. + * - Strictly inclusive with L1 data cache. Weakly inclusive with L1 instruction cache. + * - Dynamic biased replacement policy. + * - Modified Exclusive Shared Invalid (MESI) coherency. + * + * +--------------------+-------+-----------+-----------+-----------+----------+------------+ + * | Processor model | Cores | L1D cache | L1I cache | L2 cache | L3 cache | Reference | + * +--------------------+-------+-----------+-----------+-----------+----------+------------+ + * | Kirin 980 | 4(+4) | 64K | 64K | 512K | 4M | [1], [2] | + * +--------------------+-------+-----------+-----------+-----------+----------+------------+ + * + * [1] https://www.anandtech.com/show/13298/hisilicon-announces-the-kirin-980-first-a76-g76-on-7nm + * [2] https://en.wikichip.org/wiki/hisilicon/kirin/980 + */ + uint32_t l2_size = 256 * 1024; + uint32_t l3_size = 1024 * 1024; + switch (chipset->series) { + case cpuinfo_arm_chipset_series_hisilicon_kirin: + /* Kirin 980: 512K L2 cache + 4M L3 cache */ + if (chipset->model == 980) { + l2_size = 512 * 1024; + l3_size = 4 * 1024 * 1024; + } + break; + default: + break; + } + *l1i = (struct cpuinfo_cache) { + .size = 64 * 1024, + .associativity = 4, + .line_size = 64, + }; + *l1d = (struct cpuinfo_cache) { + .size = 64 * 1024, + .associativity = 4, + .line_size = 64, + }; + *l2 = (struct cpuinfo_cache) { + .size = l2_size, + .associativity = 8, + .line_size = 64, + .flags = CPUINFO_CACHE_INCLUSIVE, + }; + *l3 = (struct cpuinfo_cache) { + .size = l3_size, + .associativity = 16, + .line_size = 64, + }; + break; + } + case cpuinfo_uarch_cortex_a77: + { + /* + * ARM Cortex-A77 Core Technical Reference Manual + * A6.1. About the L1 memory system + * The L1 memory system consists of separate instruction and data caches. Both have a fixed size of 64KB. + * + * A6.1.1 L1 instruction-side memory system + * The L1 instruction memory system has the following key features: + * - Virtually Indexed, Physically Tagged (VIPT), which behaves as a Physically Indexed, + * Physically Tagged (PIPT) 4-way set-associative L1 data cache. + * - Fixed cache line length of 64 bytes. + * + * A6.1.2 L1 data-side memory system + * The L1 data memory system has the following features: + * - Virtually Indexed, Physically Tagged (VIPT), which behaves as a Physically Indexed, + * Physically Tagged (PIPT) 4-way set-associative L1 data cache. + * - Fixed cache line length of 64 bytes. + * - Pseudo-LRU cache replacement policy. + * + * A7.1 About the L2 memory system + * The L2 memory subsystem consist of: + * - An 8-way set associative L2 cache with a configurable size of 128KB, 256KB or 512KB. Cache lines + * have a fixed length of 64 bytes. + * - Strictly inclusive with L1 data cache. Weakly inclusive with L1 instruction cache. + */ + const uint32_t l2_size = 256 * 1024; + const uint32_t l3_size = 1024 * 1024; + *l1i = (struct cpuinfo_cache) { + .size = 64 * 1024, + .associativity = 4, + .line_size = 64, + }; + *l1d = (struct cpuinfo_cache) { + .size = 64 * 1024, + .associativity = 4, + .line_size = 64, + }; + *l2 = (struct cpuinfo_cache) { + .size = l2_size, + .associativity = 8, + .line_size = 64, + .flags = CPUINFO_CACHE_INCLUSIVE, + }; + *l3 = (struct cpuinfo_cache) { + .size = l3_size, + .associativity = 16, + .line_size = 64, + }; + break; + } +#if CPUINFO_ARCH_ARM && !defined(__ARM_ARCH_8A__) + case cpuinfo_uarch_scorpion: + /* + * - "The CPU includes 32KB instruction and data caches as + * well as a complete memory-management unit (MMU) suitable + * for high-level operating systems. The CPU also has + * 256KB of SRAM that can be allocated in 64KB increments + * to level-two (L2) cache or tightly coupled memory (TCM)." [1] + * We interpret it as L2 cache being 4-way set-associative on single-core Scorpion. + * - L1 Data Cache = 32 KB. 32 B/line. [2] + * - L2 Cache = 256 KB. 128 B/line. [2] + * - 256 KB (single-core) or 512 KB (dual-core) L2 cache [3] + * - Single or dual-core configuration [3] + * - For L1 cache assume the same associativity as Krait + * + * [1] https://www.qualcomm.com/media/documents/files/linley-report-on-dual-core-snapdragon.pdf + * [2] http://www.7-cpu.com/cpu/Snapdragon.html + * [3] https://en.wikipedia.org/wiki/Scorpion_(CPU) + */ + *l1i = (struct cpuinfo_cache) { + .size = 32 * 1024, + .associativity = 4, + .line_size = 32 + }; + *l1d = (struct cpuinfo_cache) { + .size = 32 * 1024, + .associativity = 4, + .line_size = 32 + }; + *l2 = (struct cpuinfo_cache) { + .size = cluster_cores * 256 * 1024, + .associativity = 4, + .line_size = 128 + }; + break; + case cpuinfo_uarch_krait: + /* + * - L0 Data cache = 4 KB. 64 B/line, direct mapped [1] + * - L0 Instruction cache = 4 KB. [1] + * - L1 Data cache = 16 KB. 64 B/line, 4-way [1] + * - L1 Instruction cache = 16 KB, 4-way [1] + * - L2 Cache = 1 MB, 128 B/line, 8-way. Each core has fast access only to 512 KB of L2 cache. [1] + * - L2 = 1MB (dual core) or 2MB (quad core), 8-way set associative [2] + * + * [1] http://www.7-cpu.com/cpu/Krait.html + * [2] http://www.anandtech.com/show/4940/qualcomm-new-snapdragon-s4-msm8960-krait-architecture/2 + */ + *l1i = (struct cpuinfo_cache) { + .size = 16 * 1024, + .associativity = 4, + .line_size = 64 /* assume same as L1D */ + }; + *l1d = (struct cpuinfo_cache) { + .size = 16 * 1024, + .associativity = 4, + .line_size = 64 + }; + *l2 = (struct cpuinfo_cache) { + .size = cluster_cores * 512 * 1024, + .associativity = 8, + .line_size = 128 + }; + break; +#endif /* CPUINFO_ARCH_ARM && !defined(__ARM_ARCH_8A__) */ + case cpuinfo_uarch_kryo: + /* + * +-----------------+-------+-----------+-----------+-----------+-----------+ + * | Processor model | Cores | L1D cache | L1I cache | L2 cache | Reference | + * +-----------------+-------+-----------+-----------+-----------+-----------+ + * | Snapdragon 820 | 2+2 | 24K | 32K | 1M+512K | [1, 2] | + * | Snapdragon 821 | 2+2 | ? | ? | 1M+512K | [1] | + * +-----------------+-------+-----------+-----------+-----------+-----------+ + * + * [1] http://www.anandtech.com/show/9837/snapdragon-820-preview/2 + * [2] https://www.inforcecomputing.com/public_docs/Inforce6601/Inforce_6601_Micro-SOM_FAQs_04-2016-1.pdf + */ + *l1i = (struct cpuinfo_cache) { + .size = 32 * 1024, + .associativity = 4, + .line_size = 64 + }; + *l1d = (struct cpuinfo_cache) { + .size = 24 * 1024, + .associativity = 3, + .line_size = 64 + }; + if (midr_is_kryo_silver(midr)) { + /* Kryo "Silver" */ + *l2 = (struct cpuinfo_cache) { + .size = 512 * 1024, + .associativity = 8, + .line_size = 128 + }; + } else { + /* Kryo "Gold" */ + *l2 = (struct cpuinfo_cache) { + .size = 1024 * 1024, + .associativity = 8, + .line_size = 128 + }; + } + break; + case cpuinfo_uarch_denver: + case cpuinfo_uarch_denver2: + /* + * The Denver chip includes a 128KB, 4-way level 1 instruction cache, a 64KB, 4-way level 2 data cache, + * and a 2MB, 16-way level 2 cache, all of which can service both cores. [1] + * + * All the caches have 64-byte lines. [2] + * + * [1] http://www.pcworld.com/article/2463900/nvidia-reveals-pc-like-performance-for-denver-tegra-k1.html + * [2] http://linleygroup.com/newsletters/newsletter_detail.php?num=5205&year=2014 + */ + *l1i = (struct cpuinfo_cache) { + .size = 128 * 1024, + .associativity = 4, + .line_size = 64 + }; + *l1d = (struct cpuinfo_cache) { + .size = 64 * 1024, + .associativity = 4, + .line_size = 64 + }; + *l2 = (struct cpuinfo_cache) { + .size = 2 * 1024 * 1024, + .associativity = 16, + .line_size = 64 + }; + break; + case cpuinfo_uarch_exynos_m1: + case cpuinfo_uarch_exynos_m2: + /* + * - "Moving past branch prediction we can see some elements of how the cache is set up for the L1 I$, + * namely 64 KB split into four sets with 128-byte line sizes for 128 cache lines per set" [1] + * - "For loads and stores, a 32 KB, 8-way set associative cache with 64 byte line size is used" [1] + * - "The L2 cache here is 2MB shared across all cores split into 16 sets. This memory is also split + * into 4 banks and has a 22 cycle latency" [1] + * + * +--------------------+-------+-----------+-----------+-----------+-----------+ + * | Processor model | Cores | L1D cache | L1I cache | L2 cache | Reference | + * +--------------------+-------+-----------+-----------+-----------+-----------+ + * | Exynos 8 Octa 8890 | 4(+4) | 64K | 32K | 2M | [1] | + * | Exynos 8 Octa 8895 | 4(+4) | 64K | 32K | 2M | [2] | + * +--------------------+-------+-----------+-----------+-----------+-----------+ + * + * [1] http://www.anandtech.com/show/10590/hot-chips-2016-exynos-m1-architecture-disclosed + * [2] https://www.extremetech.com/mobile/244949-samsungs-exynos-8895-features-custom-cpu-cores-first-10nm-chip-market + */ + *l1i = (struct cpuinfo_cache) { + .size = 64 * 1024, + .associativity = 4, + .line_size = 128 + }; + *l1d = (struct cpuinfo_cache) { + .size = 32 * 1024, + .associativity = 8, + .line_size = 64 + }; + *l2 = (struct cpuinfo_cache) { + .size = 2 * 1024 * 1024, + .associativity = 16, + .line_size = 64 + }; + break; + case cpuinfo_uarch_exynos_m3: + /* + * +--------------------+-------+-----------+-----------+-----------+----------+------------+ + * | Processor model | Cores | L1D cache | L1I cache | L2 cache | L3 cache | Reference | + * +--------------------+-------+-----------+-----------+-----------+----------+------------+ + * | Exynos 9810 | 4(+4) | 64K | ? | 512K | 4M | [1] | + * +--------------------+-------+-----------+-----------+-----------+----------+------------+ + * + * [1] https://www.anandtech.com/show/12478/exynos-9810-handson-awkward-first-results + */ + *l1i = (struct cpuinfo_cache) { + .size = 64 * 1024 /* assume same as in Exynos M1/M2 cores */, + .associativity = 4 /* assume same as in Exynos M1/M2 cores */, + .line_size = 128 /* assume same as in Exynos M1/M2 cores */ + }; + *l1d = (struct cpuinfo_cache) { + .size = 64 * 1024, + .associativity = 8 /* assume same as in Exynos M1/M2 cores */, + .line_size = 64 /* assume same as in Exynos M1/M2 cores */, + }; + *l2 = (struct cpuinfo_cache) { + .size = 512 * 1024, + .associativity = 16 /* assume same as in Exynos M1/M2 cores */, + .line_size = 64 /* assume same as in Exynos M1/M2 cores */, + }; + *l3 = (struct cpuinfo_cache) { + .size = 4 * 1024 * 1024, + .associativity = 16 /* assume DynamIQ cache */, + .line_size = 64 /* assume DynamIQ cache */, + }; + break; +#if CPUINFO_ARCH_ARM64 && !defined(__ANDROID__) + case cpuinfo_uarch_thunderx: + /* + * "78K-Icache and 32K-D cache per core, 16 MB shared L2 cache" [1] + * + * [1] https://www.cavium.com/pdfFiles/ThunderX_CP_PB_Rev1.pdf + */ + *l1i = (struct cpuinfo_cache) { + .size = 78 * 1024, + .associativity = 4 /* assumption */, + .line_size = 64 /* assumption */ + }; + *l1d = (struct cpuinfo_cache) { + .size = 32 * 1024, + .associativity = 4 /* assumption */, + .line_size = 64 /* assumption */ + }; + *l2 = (struct cpuinfo_cache) { + .size = 16 * 1024 * 1024, + .associativity = 8 /* assumption */, + .line_size = 64 /* assumption */ + }; + break; + case cpuinfo_uarch_taishan_v110: + /* + * It features private 64 KiB L1 instruction and data caches as well as 512 KiB of private L2. [1] + * + * +------------------+-------+-----------+-----------+-----------+----------+-----------+ + * | Processor model | Cores | L1D cache | L1I cache | L2 cache | L3 cache | Reference | + * +------------------+-------+-----------+-----------+-----------+----------+-----------+ + * | Kunpeng 920-3226 | 32 | 64K | 64K | 512K | 32M | [2] | + * +------------------+-------+-----------+-----------+-----------+----------+-----------+ + * | Kunpeng 920-4826 | 48 | 64K | 64K | 512K | 48M | [3] | + * +------------------+-------+-----------+-----------+-----------+----------+-----------+ + * | Kunpeng 920-6426 | 64 | 64K | 64K | 512K | 64M | [4] | + * +------------------+-------+-----------+-----------+-----------+----------+-----------+ + * + * [1] https://en.wikichip.org/wiki/hisilicon/microarchitectures/taishan_v110 + * [2] https://en.wikichip.org/wiki/hisilicon/kunpeng/920-3226 + * [3] https://en.wikichip.org/wiki/hisilicon/kunpeng/920-4826 + * [4] https://en.wikichip.org/wiki/hisilicon/kunpeng/920-6426 + */ + *l1i = (struct cpuinfo_cache) { + .size = 64 * 1024, + .associativity = 4 /* assumption */, + .line_size = 128 /* assumption */, + }; + *l1d = (struct cpuinfo_cache) { + .size = 64 * 1024, + .associativity = 4 /* assumption */, + .line_size = 128 /* assumption */, + }; + *l2 = (struct cpuinfo_cache) { + .size = 512 * 1024, + .associativity = 8 /* assumption */, + .line_size = 128 /* assumption */, + .flags = CPUINFO_CACHE_INCLUSIVE /* assumption */, + }; + *l3 = (struct cpuinfo_cache) { + .size = cluster_cores * 1024 * 1024, + .associativity = 16 /* assumption */, + .line_size = 128 /* assumption */, + }; + break; +#endif + case cpuinfo_uarch_cortex_a12: + case cpuinfo_uarch_cortex_a32: + default: + cpuinfo_log_warning("target uarch not recognized; using generic cache parameters"); + /* Follow OpenBLAS */ + if (arch_version >= 8) { + *l1i = (struct cpuinfo_cache) { + .size = 32 * 1024, + .associativity = 4, + .line_size = 64 + }; + *l1d = (struct cpuinfo_cache) { + .size = 32 * 1024, + .associativity = 4, + .line_size = 64 + }; + *l2 = (struct cpuinfo_cache) { + .size = cluster_cores * 256 * 1024, + .associativity = 8, + .line_size = 64 + }; + } else { + *l1i = (struct cpuinfo_cache) { + .size = 16 * 1024, + .associativity = 4, + .line_size = 32 + }; + *l1d = (struct cpuinfo_cache) { + .size = 16 * 1024, + .associativity = 4, + .line_size = 32 + }; + if (arch_version >= 7) { + *l2 = (struct cpuinfo_cache) { + .size = cluster_cores * 128 * 1024, + .associativity = 8, + .line_size = 32 + }; + } + } + break; + } + l1i->sets = l1i->size / (l1i->associativity * l1i->line_size); + l1i->partitions = 1; + l1d->sets = l1d->size / (l1d->associativity * l1d->line_size); + l1d->partitions = 1; + if (l2->size != 0) { + l2->sets = l2->size / (l2->associativity * l2->line_size); + l2->partitions = 1; + if (l3->size != 0) { + l3->sets = l3->size / (l3->associativity * l3->line_size); + l3->partitions = 1; + } + } +} + +uint32_t cpuinfo_arm_compute_max_cache_size(const struct cpuinfo_processor* processor) { + /* + * There is no precise way to detect cache size on ARM/ARM64, and cache size reported by cpuinfo + * may underestimate the actual cache size. Thus, we use microarchitecture-specific maximum. + */ + switch (processor->core->uarch) { + case cpuinfo_uarch_xscale: + case cpuinfo_uarch_arm11: + case cpuinfo_uarch_scorpion: + case cpuinfo_uarch_krait: + case cpuinfo_uarch_kryo: + case cpuinfo_uarch_exynos_m1: + case cpuinfo_uarch_exynos_m2: + case cpuinfo_uarch_exynos_m3: + /* cpuinfo-detected cache size always correct */ + return cpuinfo_compute_max_cache_size(processor); + case cpuinfo_uarch_cortex_a5: + /* Max observed (NXP Vybrid SoC) */ + return 512 * 1024; + case cpuinfo_uarch_cortex_a7: + /* + * Cortex-A7 MPCore Technical Reference Manual: + * 7.1. About the L2 Memory system + * The L2 memory system consists of an: + * - Optional tightly-coupled L2 cache that includes: + * - Configurable L2 cache size of 128KB, 256KB, 512KB, and 1MB. + */ + return 1024 * 1024; + case cpuinfo_uarch_cortex_a8: + /* + * Cortex-A8 Technical Reference Manual: + * 8.1. About the L2 memory system + * The key features of the L2 memory system include: + * - configurable cache size of 0KB, 128KB, 256KB, 512KB, and 1MB + */ + return 1024 * 1024; + case cpuinfo_uarch_cortex_a9: + /* Max observed (e.g. Exynos 4212) */ + return 1024 * 1024; + case cpuinfo_uarch_cortex_a12: + case cpuinfo_uarch_cortex_a17: + /* + * ARM Cortex-A17 MPCore Processor Technical Reference Manual: + * 7.1. About the L2 Memory system + * The key features of the L2 memory system include: + * - An integrated L2 cache: + * - The cache size is implemented as either 256KB, 512KB, 1MB, 2MB, 4MB or 8MB. + */ + return 8 * 1024 * 1024; + case cpuinfo_uarch_cortex_a15: + /* + * ARM Cortex-A15 MPCore Processor Technical Reference Manual: + * 7.1. About the L2 memory system + * The features of the L2 memory system include: + * - Configurable L2 cache size of 512KB, 1MB, 2MB and 4MB. + */ + return 4 * 1024 * 1024; + case cpuinfo_uarch_cortex_a35: + /* + * ARM Cortex‑A35 Processor Technical Reference Manual: + * 7.1 About the L2 memory system + * L2 cache + * - Further features of the L2 cache are: + * - Configurable size of 128KB, 256KB, 512KB, and 1MB. + */ + return 1024 * 1024; + case cpuinfo_uarch_cortex_a53: + /* + * ARM Cortex-A53 MPCore Processor Technical Reference Manual: + * 7.1. About the L2 memory system + * The L2 memory system consists of an: + * - Optional tightly-coupled L2 cache that includes: + * - Configurable L2 cache size of 128KB, 256KB, 512KB, 1MB and 2MB. + */ + return 2 * 1024 * 1024; + case cpuinfo_uarch_cortex_a57: + /* + * ARM Cortex-A57 MPCore Processor Technical Reference Manual: + * 7.1 About the L2 memory system + * The features of the L2 memory system include: + * - Configurable L2 cache size of 512KB, 1MB, and 2MB. + */ + return 2 * 1024 * 1024; + case cpuinfo_uarch_cortex_a72: + /* + * ARM Cortex-A72 MPCore Processor Technical Reference Manual: + * 7.1 About the L2 memory system + * The features of the L2 memory system include: + * - Configurable L2 cache size of 512KB, 1MB, 2MB and 4MB. + */ + return 4 * 1024 * 1024; + case cpuinfo_uarch_cortex_a73: + /* + * ARM Cortex‑A73 MPCore Processor Technical Reference Manual + * 7.1 About the L2 memory system + * The L2 memory system consists of: + * - A tightly-integrated L2 cache with: + * - A configurable size of 256KB, 512KB, 1MB, 2MB, 4MB, or 8MB. + */ + return 8 * 1024 * 1024; + case cpuinfo_uarch_cortex_a55: + case cpuinfo_uarch_cortex_a75: + case cpuinfo_uarch_cortex_a76: + case cpuinfo_uarch_exynos_m4: + default: + /* + * ARM DynamIQ Shared Unit Technical Reference Manual + * 1.3 Implementation options + * L3_CACHE_SIZE + * - 256KB + * - 512KB + * - 1024KB + * - 1536KB + * - 2048KB + * - 3072KB + * - 4096KB + */ + return 4 * 1024 * 1024; + } +} diff --git a/dep/cpuinfo/src/arm/linux/aarch32-isa.c b/dep/cpuinfo/src/arm/linux/aarch32-isa.c new file mode 100644 index 000000000..df68aa147 --- /dev/null +++ b/dep/cpuinfo/src/arm/linux/aarch32-isa.c @@ -0,0 +1,267 @@ +#include + +#if CPUINFO_MOCK + #include +#endif +#include +#include +#include +#include + + +#if CPUINFO_MOCK + uint32_t cpuinfo_arm_fpsid = 0; + uint32_t cpuinfo_arm_mvfr0 = 0; + uint32_t cpuinfo_arm_wcid = 0; + + void cpuinfo_set_fpsid(uint32_t fpsid) { + cpuinfo_arm_fpsid = fpsid; + } + + void cpuinfo_set_wcid(uint32_t wcid) { + cpuinfo_arm_wcid = wcid; + } +#endif + + +void cpuinfo_arm_linux_decode_isa_from_proc_cpuinfo( + uint32_t features, + uint32_t features2, + uint32_t midr, + uint32_t architecture_version, + uint32_t architecture_flags, + const struct cpuinfo_arm_chipset chipset[restrict static 1], + struct cpuinfo_arm_isa isa[restrict static 1]) +{ + if (architecture_version >= 8) { + /* + * ARMv7 code running on ARMv8: IDIV, VFP, NEON are always supported, + * but may be not reported in /proc/cpuinfo features. + */ + isa->armv5e = true; + isa->armv6 = true; + isa->armv6k = true; + isa->armv7 = true; + isa->armv7mp = true; + isa->armv8 = true; + isa->thumb = true; + isa->thumb2 = true; + isa->idiv = true; + isa->vfpv3 = true; + isa->d32 = true; + isa->fp16 = true; + isa->fma = true; + isa->neon = true; + + /* + * NEON FP16 compute extension and VQRDMLAH/VQRDMLSH instructions are not indicated in /proc/cpuinfo. + * Use a MIDR-based heuristic to whitelist processors known to support it: + * - Processors with Cortex-A55 cores + * - Processors with Cortex-A65 cores + * - Processors with Cortex-A75 cores + * - Processors with Cortex-A76 cores + * - Processors with Cortex-A77 cores + * - Processors with Exynos M4 cores + * - Processors with Exynos M5 cores + * - Neoverse N1 cores + */ + if (chipset->series == cpuinfo_arm_chipset_series_samsung_exynos && chipset->model == 9810) { + /* Only little cores of Exynos 9810 support FP16 & RDM */ + cpuinfo_log_warning("FP16 arithmetics and RDM disabled: only little cores in Exynos 9810 support these extensions"); + } else { + switch (midr & (CPUINFO_ARM_MIDR_IMPLEMENTER_MASK | CPUINFO_ARM_MIDR_PART_MASK)) { + case UINT32_C(0x4100D050): /* Cortex-A55 */ + case UINT32_C(0x4100D060): /* Cortex-A65 */ + case UINT32_C(0x4100D0B0): /* Cortex-A76 */ + case UINT32_C(0x4100D0C0): /* Neoverse N1 */ + case UINT32_C(0x4100D0D0): /* Cortex-A77 */ + case UINT32_C(0x4100D0E0): /* Cortex-A76AE */ + case UINT32_C(0x4800D400): /* Cortex-A76 (HiSilicon) */ + case UINT32_C(0x51008020): /* Kryo 385 Gold (Cortex-A75) */ + case UINT32_C(0x51008030): /* Kryo 385 Silver (Cortex-A55) */ + case UINT32_C(0x51008040): /* Kryo 485 Gold (Cortex-A76) */ + case UINT32_C(0x51008050): /* Kryo 485 Silver (Cortex-A55) */ + case UINT32_C(0x53000030): /* Exynos M4 */ + case UINT32_C(0x53000040): /* Exynos M5 */ + isa->fp16arith = true; + isa->rdm = true; + break; + } + } + + /* + * NEON VDOT instructions are not indicated in /proc/cpuinfo. + * Use a MIDR-based heuristic to whitelist processors known to support it. + */ + switch (midr & (CPUINFO_ARM_MIDR_IMPLEMENTER_MASK | CPUINFO_ARM_MIDR_PART_MASK)) { + case UINT32_C(0x4100D0B0): /* Cortex-A76 */ + case UINT32_C(0x4100D0D0): /* Cortex-A77 */ + case UINT32_C(0x4100D0E0): /* Cortex-A76AE */ + case UINT32_C(0x4800D400): /* Cortex-A76 (HiSilicon) */ + case UINT32_C(0x51008040): /* Kryo 485 Gold (Cortex-A76) */ + case UINT32_C(0x51008050): /* Kryo 485 Silver (Cortex-A55) */ + case UINT32_C(0x53000030): /* Exynos-M4 */ + case UINT32_C(0x53000040): /* Exynos-M5 */ + isa->dot = true; + break; + case UINT32_C(0x4100D050): /* Cortex A55: revision 1 or later only */ + isa->dot = !!(midr_get_variant(midr) >= 1); + break; + case UINT32_C(0x4100D0A0): /* Cortex A75: revision 2 or later only */ + isa->dot = !!(midr_get_variant(midr) >= 2); + break; + } + } else { + /* ARMv7 or lower: use feature flags to detect optional features */ + + /* + * ARM11 (ARM 1136/1156/1176/11 MPCore) processors can report v7 architecture + * even though they support only ARMv6 instruction set. + */ + if (architecture_version == 7 && midr_is_arm11(midr)) { + cpuinfo_log_warning("kernel-reported architecture ARMv7 ignored due to mismatch with processor microarchitecture (ARM11)"); + architecture_version = 6; + } + + if (architecture_version < 7) { + const uint32_t armv7_features_mask = CPUINFO_ARM_LINUX_FEATURE_VFPV3 | CPUINFO_ARM_LINUX_FEATURE_VFPV3D16 | CPUINFO_ARM_LINUX_FEATURE_VFPD32 | + CPUINFO_ARM_LINUX_FEATURE_VFPV4 | CPUINFO_ARM_LINUX_FEATURE_NEON | CPUINFO_ARM_LINUX_FEATURE_IDIVT | CPUINFO_ARM_LINUX_FEATURE_IDIVA; + if (features & armv7_features_mask) { + architecture_version = 7; + } + } + if ((architecture_version >= 6) || (features & CPUINFO_ARM_LINUX_FEATURE_EDSP) || (architecture_flags & CPUINFO_ARM_LINUX_ARCH_E)) { + isa->armv5e = true; + } + if (architecture_version >= 6) { + isa->armv6 = true; + } + if (architecture_version >= 7) { + isa->armv6k = true; + isa->armv7 = true; + + /* + * ARMv7 MP extension (PLDW instruction) is not indicated in /proc/cpuinfo. + * Use heuristic list of supporting processors: + * - Processors supporting UDIV/SDIV instructions ("idiva" + "idivt" features in /proc/cpuinfo) + * - Cortex-A5 + * - Cortex-A9 + * - Dual-Core Scorpion + * - Krait (supports UDIV/SDIV, but kernels may not report it in /proc/cpuinfo) + * + * TODO: check single-core Qualcomm Scorpion. + */ + switch (midr & (CPUINFO_ARM_MIDR_IMPLEMENTER_MASK | CPUINFO_ARM_MIDR_PART_MASK)) { + case UINT32_C(0x4100C050): /* Cortex-A5 */ + case UINT32_C(0x4100C090): /* Cortex-A9 */ + case UINT32_C(0x510002D0): /* Scorpion (dual-core) */ + case UINT32_C(0x510004D0): /* Krait (dual-core) */ + case UINT32_C(0x510006F0): /* Krait (quad-core) */ + isa->armv7mp = true; + break; + default: + /* In practice IDIV instruction implies ARMv7+MP ISA */ + isa->armv7mp = (features & CPUINFO_ARM_LINUX_FEATURE_IDIV) == CPUINFO_ARM_LINUX_FEATURE_IDIV; + break; + } + } + + if (features & CPUINFO_ARM_LINUX_FEATURE_IWMMXT) { + const uint32_t wcid = read_wcid(); + cpuinfo_log_debug("WCID = 0x%08"PRIx32, wcid); + const uint32_t coprocessor_type = (wcid >> 8) & UINT32_C(0xFF); + if (coprocessor_type >= 0x10) { + isa->wmmx = true; + if (coprocessor_type >= 0x20) { + isa->wmmx2 = true; + } + } else { + cpuinfo_log_warning("WMMX ISA disabled: OS reported iwmmxt feature, " + "but WCID coprocessor type 0x%"PRIx32" indicates no WMMX support", + coprocessor_type); + } + } + + if ((features & CPUINFO_ARM_LINUX_FEATURE_THUMB) || (architecture_flags & CPUINFO_ARM_LINUX_ARCH_T)) { + isa->thumb = true; + + /* + * There is no separate feature flag for Thumb 2. + * All ARMv7 processors and ARM 1156 support Thumb 2. + */ + if (architecture_version >= 7 || midr_is_arm1156(midr)) { + isa->thumb2 = true; + } + } + if (features & CPUINFO_ARM_LINUX_FEATURE_THUMBEE) { + isa->thumbee = true; + } + if ((features & CPUINFO_ARM_LINUX_FEATURE_JAVA) || (architecture_flags & CPUINFO_ARM_LINUX_ARCH_J)) { + isa->jazelle = true; + } + + /* Qualcomm Krait may have buggy kernel configuration that doesn't report IDIV */ + if ((features & CPUINFO_ARM_LINUX_FEATURE_IDIV) == CPUINFO_ARM_LINUX_FEATURE_IDIV || midr_is_krait(midr)) { + isa->idiv = true; + } + + const uint32_t vfp_mask = \ + CPUINFO_ARM_LINUX_FEATURE_VFP | CPUINFO_ARM_LINUX_FEATURE_VFPV3 | CPUINFO_ARM_LINUX_FEATURE_VFPV3D16 | \ + CPUINFO_ARM_LINUX_FEATURE_VFPD32 | CPUINFO_ARM_LINUX_FEATURE_VFPV4 | CPUINFO_ARM_LINUX_FEATURE_NEON; + if (features & vfp_mask) { + const uint32_t vfpv3_mask = CPUINFO_ARM_LINUX_FEATURE_VFPV3 | CPUINFO_ARM_LINUX_FEATURE_VFPV3D16 | \ + CPUINFO_ARM_LINUX_FEATURE_VFPD32 | CPUINFO_ARM_LINUX_FEATURE_VFPV4 | CPUINFO_ARM_LINUX_FEATURE_NEON; + if ((architecture_version >= 7) || (features & vfpv3_mask)) { + isa->vfpv3 = true; + + const uint32_t d32_mask = CPUINFO_ARM_LINUX_FEATURE_VFPD32 | CPUINFO_ARM_LINUX_FEATURE_NEON; + if (features & d32_mask) { + isa->d32 = true; + } + } else { + #if defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_8A__) || defined(__ARM_ARCH) && (__ARM_ARCH >= 7) + isa->vfpv3 = true; + #else + const uint32_t fpsid = read_fpsid(); + cpuinfo_log_debug("FPSID = 0x%08"PRIx32, fpsid); + const uint32_t subarchitecture = (fpsid >> 16) & UINT32_C(0x7F); + if (subarchitecture >= 0x01) { + isa->vfpv2 = true; + } + #endif + } + } + if (features & CPUINFO_ARM_LINUX_FEATURE_NEON) { + isa->neon = true; + } + + /* + * There is no separate feature flag for FP16 support. + * VFPv4 implies VFPv3-FP16 support (and in practice, NEON-HP as well). + * Additionally, ARM Cortex-A9 and Qualcomm Scorpion support FP16. + */ + if ((features & CPUINFO_ARM_LINUX_FEATURE_VFPV4) || midr_is_cortex_a9(midr) || midr_is_scorpion(midr)) { + isa->fp16 = true; + } + + if (features & CPUINFO_ARM_LINUX_FEATURE_VFPV4) { + isa->fma = true; + } + } + + if (features2 & CPUINFO_ARM_LINUX_FEATURE2_AES) { + isa->aes = true; + } + if (features2 & CPUINFO_ARM_LINUX_FEATURE2_PMULL) { + isa->pmull = true; + } + if (features2 & CPUINFO_ARM_LINUX_FEATURE2_SHA1) { + isa->sha1 = true; + } + if (features2 & CPUINFO_ARM_LINUX_FEATURE2_SHA2) { + isa->sha2 = true; + } + if (features2 & CPUINFO_ARM_LINUX_FEATURE2_CRC32) { + isa->crc32 = true; + } +} diff --git a/dep/cpuinfo/src/arm/linux/aarch64-isa.c b/dep/cpuinfo/src/arm/linux/aarch64-isa.c new file mode 100644 index 000000000..2000e1a15 --- /dev/null +++ b/dep/cpuinfo/src/arm/linux/aarch64-isa.c @@ -0,0 +1,127 @@ +#include + +#include +#include + + +void cpuinfo_arm64_linux_decode_isa_from_proc_cpuinfo( + uint32_t features, + uint32_t features2, + uint32_t midr, + const struct cpuinfo_arm_chipset chipset[restrict static 1], + struct cpuinfo_arm_isa isa[restrict static 1]) +{ + if (features & CPUINFO_ARM_LINUX_FEATURE_AES) { + isa->aes = true; + } + if (features & CPUINFO_ARM_LINUX_FEATURE_PMULL) { + isa->pmull = true; + } + if (features & CPUINFO_ARM_LINUX_FEATURE_SHA1) { + isa->sha1 = true; + } + if (features & CPUINFO_ARM_LINUX_FEATURE_SHA2) { + isa->sha2 = true; + } + if (features & CPUINFO_ARM_LINUX_FEATURE_CRC32) { + isa->crc32 = true; + } + if (features & CPUINFO_ARM_LINUX_FEATURE_ATOMICS) { + isa->atomics = true; + } + + /* + * Some phones ship with an old kernel configuration that doesn't report NEON FP16 compute extension and SQRDMLAH/SQRDMLSH/UQRDMLAH/UQRDMLSH instructions. + * Use a MIDR-based heuristic to whitelist processors known to support it: + * - Processors with Cortex-A55 cores + * - Processors with Cortex-A65 cores + * - Processors with Cortex-A75 cores + * - Processors with Cortex-A76 cores + * - Processors with Cortex-A77 cores + * - Processors with Exynos M4 cores + * - Processors with Exynos M5 cores + * - Neoverse N1 cores + */ + if (chipset->series == cpuinfo_arm_chipset_series_samsung_exynos && chipset->model == 9810) { + /* Exynos 9810 reports that it supports FP16 compute, but in fact only little cores do */ + cpuinfo_log_warning("FP16 arithmetics and RDM disabled: only little cores in Exynos 9810 support these extensions"); + } else { + const uint32_t fp16arith_mask = CPUINFO_ARM_LINUX_FEATURE_FPHP | CPUINFO_ARM_LINUX_FEATURE_ASIMDHP; + switch (midr & (CPUINFO_ARM_MIDR_IMPLEMENTER_MASK | CPUINFO_ARM_MIDR_PART_MASK)) { + case UINT32_C(0x4100D050): /* Cortex-A55 */ + case UINT32_C(0x4100D060): /* Cortex-A65 */ + case UINT32_C(0x4100D0B0): /* Cortex-A76 */ + case UINT32_C(0x4100D0C0): /* Neoverse N1 */ + case UINT32_C(0x4100D0D0): /* Cortex-A77 */ + case UINT32_C(0x4100D0E0): /* Cortex-A76AE */ + case UINT32_C(0x4800D400): /* Cortex-A76 (HiSilicon) */ + case UINT32_C(0x51008020): /* Kryo 385 Gold (Cortex-A75) */ + case UINT32_C(0x51008030): /* Kryo 385 Silver (Cortex-A55) */ + case UINT32_C(0x51008040): /* Kryo 485 Gold (Cortex-A76) */ + case UINT32_C(0x51008050): /* Kryo 485 Silver (Cortex-A55) */ + case UINT32_C(0x53000030): /* Exynos M4 */ + case UINT32_C(0x53000040): /* Exynos M5 */ + isa->fp16arith = true; + isa->rdm = true; + break; + default: + if ((features & fp16arith_mask) == fp16arith_mask) { + isa->fp16arith = true; + } else if (features & CPUINFO_ARM_LINUX_FEATURE_FPHP) { + cpuinfo_log_warning("FP16 arithmetics disabled: detected support only for scalar operations"); + } else if (features & CPUINFO_ARM_LINUX_FEATURE_ASIMDHP) { + cpuinfo_log_warning("FP16 arithmetics disabled: detected support only for SIMD operations"); + } + if (features & CPUINFO_ARM_LINUX_FEATURE_ASIMDRDM) { + isa->rdm = true; + } + break; + } + } + + /* + * Many phones ship with an old kernel configuration that doesn't report UDOT/SDOT instructions. + * Use a MIDR-based heuristic to whitelist processors known to support it. + */ + switch (midr & (CPUINFO_ARM_MIDR_IMPLEMENTER_MASK | CPUINFO_ARM_MIDR_PART_MASK)) { + case UINT32_C(0x4100D060): /* Cortex-A65 */ + case UINT32_C(0x4100D0B0): /* Cortex-A76 */ + case UINT32_C(0x4100D0C0): /* Neoverse N1 */ + case UINT32_C(0x4100D0D0): /* Cortex-A77 */ + case UINT32_C(0x4100D0E0): /* Cortex-A76AE */ + case UINT32_C(0x4100D4A0): /* Neoverse E1 */ + case UINT32_C(0x4800D400): /* Cortex-A76 (HiSilicon) */ + case UINT32_C(0x51008040): /* Kryo 485 Gold (Cortex-A76) */ + case UINT32_C(0x51008050): /* Kryo 485 Silver (Cortex-A55) */ + case UINT32_C(0x53000030): /* Exynos-M4 */ + case UINT32_C(0x53000040): /* Exynos-M5 */ + isa->dot = true; + break; + case UINT32_C(0x4100D050): /* Cortex A55: revision 1 or later only */ + isa->dot = !!(midr_get_variant(midr) >= 1); + break; + case UINT32_C(0x4100D0A0): /* Cortex A75: revision 2 or later only */ + isa->dot = !!(midr_get_variant(midr) >= 2); + break; + default: + if (features & CPUINFO_ARM_LINUX_FEATURE_ASIMDDP) { + isa->dot = true; + } + break; + } + if (features & CPUINFO_ARM_LINUX_FEATURE_JSCVT) { + isa->jscvt = true; + } + if (features & CPUINFO_ARM_LINUX_FEATURE_JSCVT) { + isa->jscvt = true; + } + if (features & CPUINFO_ARM_LINUX_FEATURE_FCMA) { + isa->fcma = true; + } + if (features & CPUINFO_ARM_LINUX_FEATURE_SVE) { + isa->sve = true; + } + if (features2 & CPUINFO_ARM_LINUX_FEATURE2_SVE2) { + isa->sve2 = true; + } +} diff --git a/dep/cpuinfo/src/arm/linux/api.h b/dep/cpuinfo/src/arm/linux/api.h new file mode 100644 index 000000000..1c09f827e --- /dev/null +++ b/dep/cpuinfo/src/arm/linux/api.h @@ -0,0 +1,384 @@ +#pragma once + +#include +#include + +#include +#include +#include +#include +#include + +/* No hard limit in the kernel, maximum length observed on non-rogue kernels is 64 */ +#define CPUINFO_HARDWARE_VALUE_MAX 64 +/* No hard limit in the kernel, maximum length on Raspberry Pi is 8. Add 1 symbol to detect overly large revision strings */ +#define CPUINFO_REVISION_VALUE_MAX 9 + +#ifdef __ANDROID__ + /* As per include/sys/system_properties.h in Android NDK */ + #define CPUINFO_BUILD_PROP_NAME_MAX 32 + #define CPUINFO_BUILD_PROP_VALUE_MAX 92 + + struct cpuinfo_android_properties { + char proc_cpuinfo_hardware[CPUINFO_HARDWARE_VALUE_MAX]; + char ro_product_board[CPUINFO_BUILD_PROP_VALUE_MAX]; + char ro_board_platform[CPUINFO_BUILD_PROP_VALUE_MAX]; + char ro_mediatek_platform[CPUINFO_BUILD_PROP_VALUE_MAX]; + char ro_arch[CPUINFO_BUILD_PROP_VALUE_MAX]; + char ro_chipname[CPUINFO_BUILD_PROP_VALUE_MAX]; + char ro_hardware_chipname[CPUINFO_BUILD_PROP_VALUE_MAX]; + }; +#endif + +#define CPUINFO_ARM_LINUX_ARCH_T UINT32_C(0x00000001) +#define CPUINFO_ARM_LINUX_ARCH_E UINT32_C(0x00000002) +#define CPUINFO_ARM_LINUX_ARCH_J UINT32_C(0x00000004) + +#define CPUINFO_ARM_LINUX_ARCH_TE UINT32_C(0x00000003) +#define CPUINFO_ARM_LINUX_ARCH_TEJ UINT32_C(0x00000007) + +struct cpuinfo_arm_linux_proc_cpuinfo_cache { + uint32_t i_size; + uint32_t i_assoc; + uint32_t i_line_length; + uint32_t i_sets; + uint32_t d_size; + uint32_t d_assoc; + uint32_t d_line_length; + uint32_t d_sets; +}; + +#if CPUINFO_ARCH_ARM + /* arch/arm/include/uapi/asm/hwcap.h */ + + #define CPUINFO_ARM_LINUX_FEATURE_SWP UINT32_C(0x00000001) + #define CPUINFO_ARM_LINUX_FEATURE_HALF UINT32_C(0x00000002) + #define CPUINFO_ARM_LINUX_FEATURE_THUMB UINT32_C(0x00000004) + #define CPUINFO_ARM_LINUX_FEATURE_26BIT UINT32_C(0x00000008) + #define CPUINFO_ARM_LINUX_FEATURE_FASTMULT UINT32_C(0x00000010) + #define CPUINFO_ARM_LINUX_FEATURE_FPA UINT32_C(0x00000020) + #define CPUINFO_ARM_LINUX_FEATURE_VFP UINT32_C(0x00000040) + #define CPUINFO_ARM_LINUX_FEATURE_EDSP UINT32_C(0x00000080) + #define CPUINFO_ARM_LINUX_FEATURE_JAVA UINT32_C(0x00000100) + #define CPUINFO_ARM_LINUX_FEATURE_IWMMXT UINT32_C(0x00000200) + #define CPUINFO_ARM_LINUX_FEATURE_CRUNCH UINT32_C(0x00000400) + #define CPUINFO_ARM_LINUX_FEATURE_THUMBEE UINT32_C(0x00000800) + #define CPUINFO_ARM_LINUX_FEATURE_NEON UINT32_C(0x00001000) + #define CPUINFO_ARM_LINUX_FEATURE_VFPV3 UINT32_C(0x00002000) + #define CPUINFO_ARM_LINUX_FEATURE_VFPV3D16 UINT32_C(0x00004000) /* Also set for VFPv4 with 16 double-precision registers */ + #define CPUINFO_ARM_LINUX_FEATURE_TLS UINT32_C(0x00008000) + #define CPUINFO_ARM_LINUX_FEATURE_VFPV4 UINT32_C(0x00010000) + #define CPUINFO_ARM_LINUX_FEATURE_IDIVA UINT32_C(0x00020000) + #define CPUINFO_ARM_LINUX_FEATURE_IDIVT UINT32_C(0x00040000) + #define CPUINFO_ARM_LINUX_FEATURE_IDIV UINT32_C(0x00060000) + #define CPUINFO_ARM_LINUX_FEATURE_VFPD32 UINT32_C(0x00080000) + #define CPUINFO_ARM_LINUX_FEATURE_LPAE UINT32_C(0x00100000) + #define CPUINFO_ARM_LINUX_FEATURE_EVTSTRM UINT32_C(0x00200000) + + #define CPUINFO_ARM_LINUX_FEATURE2_AES UINT32_C(0x00000001) + #define CPUINFO_ARM_LINUX_FEATURE2_PMULL UINT32_C(0x00000002) + #define CPUINFO_ARM_LINUX_FEATURE2_SHA1 UINT32_C(0x00000004) + #define CPUINFO_ARM_LINUX_FEATURE2_SHA2 UINT32_C(0x00000008) + #define CPUINFO_ARM_LINUX_FEATURE2_CRC32 UINT32_C(0x00000010) +#elif CPUINFO_ARCH_ARM64 + /* arch/arm64/include/uapi/asm/hwcap.h */ + #define CPUINFO_ARM_LINUX_FEATURE_FP UINT32_C(0x00000001) + #define CPUINFO_ARM_LINUX_FEATURE_ASIMD UINT32_C(0x00000002) + #define CPUINFO_ARM_LINUX_FEATURE_EVTSTRM UINT32_C(0x00000004) + #define CPUINFO_ARM_LINUX_FEATURE_AES UINT32_C(0x00000008) + #define CPUINFO_ARM_LINUX_FEATURE_PMULL UINT32_C(0x00000010) + #define CPUINFO_ARM_LINUX_FEATURE_SHA1 UINT32_C(0x00000020) + #define CPUINFO_ARM_LINUX_FEATURE_SHA2 UINT32_C(0x00000040) + #define CPUINFO_ARM_LINUX_FEATURE_CRC32 UINT32_C(0x00000080) + #define CPUINFO_ARM_LINUX_FEATURE_ATOMICS UINT32_C(0x00000100) + #define CPUINFO_ARM_LINUX_FEATURE_FPHP UINT32_C(0x00000200) + #define CPUINFO_ARM_LINUX_FEATURE_ASIMDHP UINT32_C(0x00000400) + #define CPUINFO_ARM_LINUX_FEATURE_CPUID UINT32_C(0x00000800) + #define CPUINFO_ARM_LINUX_FEATURE_ASIMDRDM UINT32_C(0x00001000) + #define CPUINFO_ARM_LINUX_FEATURE_JSCVT UINT32_C(0x00002000) + #define CPUINFO_ARM_LINUX_FEATURE_FCMA UINT32_C(0x00004000) + #define CPUINFO_ARM_LINUX_FEATURE_LRCPC UINT32_C(0x00008000) + #define CPUINFO_ARM_LINUX_FEATURE_DCPOP UINT32_C(0x00010000) + #define CPUINFO_ARM_LINUX_FEATURE_SHA3 UINT32_C(0x00020000) + #define CPUINFO_ARM_LINUX_FEATURE_SM3 UINT32_C(0x00040000) + #define CPUINFO_ARM_LINUX_FEATURE_SM4 UINT32_C(0x00080000) + #define CPUINFO_ARM_LINUX_FEATURE_ASIMDDP UINT32_C(0x00100000) + #define CPUINFO_ARM_LINUX_FEATURE_SHA512 UINT32_C(0x00200000) + #define CPUINFO_ARM_LINUX_FEATURE_SVE UINT32_C(0x00400000) + #define CPUINFO_ARM_LINUX_FEATURE_ASIMDFHM UINT32_C(0x00800000) + #define CPUINFO_ARM_LINUX_FEATURE_DIT UINT32_C(0x01000000) + #define CPUINFO_ARM_LINUX_FEATURE_USCAT UINT32_C(0x02000000) + #define CPUINFO_ARM_LINUX_FEATURE_ILRCPC UINT32_C(0x04000000) + #define CPUINFO_ARM_LINUX_FEATURE_FLAGM UINT32_C(0x08000000) + #define CPUINFO_ARM_LINUX_FEATURE_SSBS UINT32_C(0x10000000) + #define CPUINFO_ARM_LINUX_FEATURE_SB UINT32_C(0x20000000) + #define CPUINFO_ARM_LINUX_FEATURE_PACA UINT32_C(0x40000000) + #define CPUINFO_ARM_LINUX_FEATURE_PACG UINT32_C(0x80000000) + + #define CPUINFO_ARM_LINUX_FEATURE2_DCPODP UINT32_C(0x00000001) + #define CPUINFO_ARM_LINUX_FEATURE2_SVE2 UINT32_C(0x00000002) + #define CPUINFO_ARM_LINUX_FEATURE2_SVEAES UINT32_C(0x00000004) + #define CPUINFO_ARM_LINUX_FEATURE2_SVEPMULL UINT32_C(0x00000008) + #define CPUINFO_ARM_LINUX_FEATURE2_SVEBITPERM UINT32_C(0x00000010) + #define CPUINFO_ARM_LINUX_FEATURE2_SVESHA3 UINT32_C(0x00000020) + #define CPUINFO_ARM_LINUX_FEATURE2_SVESM4 UINT32_C(0x00000040) + #define CPUINFO_ARM_LINUX_FEATURE2_FLAGM2 UINT32_C(0x00000080) + #define CPUINFO_ARM_LINUX_FEATURE2_FRINT UINT32_C(0x00000100) + #define CPUINFO_ARM_LINUX_FEATURE2_SVEI8MM UINT32_C(0x00000200) + #define CPUINFO_ARM_LINUX_FEATURE2_SVEF32MM UINT32_C(0x00000400) + #define CPUINFO_ARM_LINUX_FEATURE2_SVEF64MM UINT32_C(0x00000800) + #define CPUINFO_ARM_LINUX_FEATURE2_SVEBF16 UINT32_C(0x00001000) + #define CPUINFO_ARM_LINUX_FEATURE2_I8MM UINT32_C(0x00002000) + #define CPUINFO_ARM_LINUX_FEATURE2_BF16 UINT32_C(0x00004000) + #define CPUINFO_ARM_LINUX_FEATURE2_DGH UINT32_C(0x00008000) + #define CPUINFO_ARM_LINUX_FEATURE2_RNG UINT32_C(0x00010000) + #define CPUINFO_ARM_LINUX_FEATURE2_BTI UINT32_C(0x00020000) +#endif + +#define CPUINFO_ARM_LINUX_VALID_ARCHITECTURE UINT32_C(0x00010000) +#define CPUINFO_ARM_LINUX_VALID_IMPLEMENTER UINT32_C(0x00020000) +#define CPUINFO_ARM_LINUX_VALID_VARIANT UINT32_C(0x00040000) +#define CPUINFO_ARM_LINUX_VALID_PART UINT32_C(0x00080000) +#define CPUINFO_ARM_LINUX_VALID_REVISION UINT32_C(0x00100000) +#define CPUINFO_ARM_LINUX_VALID_PROCESSOR UINT32_C(0x00200000) +#define CPUINFO_ARM_LINUX_VALID_FEATURES UINT32_C(0x00400000) +#if CPUINFO_ARCH_ARM + #define CPUINFO_ARM_LINUX_VALID_ICACHE_SIZE UINT32_C(0x01000000) + #define CPUINFO_ARM_LINUX_VALID_ICACHE_SETS UINT32_C(0x02000000) + #define CPUINFO_ARM_LINUX_VALID_ICACHE_WAYS UINT32_C(0x04000000) + #define CPUINFO_ARM_LINUX_VALID_ICACHE_LINE UINT32_C(0x08000000) + #define CPUINFO_ARM_LINUX_VALID_DCACHE_SIZE UINT32_C(0x10000000) + #define CPUINFO_ARM_LINUX_VALID_DCACHE_SETS UINT32_C(0x20000000) + #define CPUINFO_ARM_LINUX_VALID_DCACHE_WAYS UINT32_C(0x40000000) + #define CPUINFO_ARM_LINUX_VALID_DCACHE_LINE UINT32_C(0x80000000) +#endif + +#define CPUINFO_ARM_LINUX_VALID_INFO UINT32_C(0x007F0000) +#define CPUINFO_ARM_LINUX_VALID_MIDR UINT32_C(0x003F0000) +#if CPUINFO_ARCH_ARM + #define CPUINFO_ARM_LINUX_VALID_ICACHE UINT32_C(0x0F000000) + #define CPUINFO_ARM_LINUX_VALID_DCACHE UINT32_C(0xF0000000) + #define CPUINFO_ARM_LINUX_VALID_CACHE_LINE UINT32_C(0x88000000) +#endif + +struct cpuinfo_arm_linux_processor { + uint32_t architecture_version; +#if CPUINFO_ARCH_ARM + uint32_t architecture_flags; + struct cpuinfo_arm_linux_proc_cpuinfo_cache proc_cpuinfo_cache; +#endif + uint32_t features; + uint32_t features2; + /** + * Main ID Register value. + */ + uint32_t midr; + enum cpuinfo_vendor vendor; + enum cpuinfo_uarch uarch; + uint32_t uarch_index; + /** + * ID of the physical package which includes this logical processor. + * The value is parsed from /sys/devices/system/cpu/cpu/topology/physical_package_id + */ + uint32_t package_id; + /** + * Minimum processor ID on the package which includes this logical processor. + * This value can serve as an ID for the cluster of logical processors: it is the + * same for all logical processors on the same package. + */ + uint32_t package_leader_id; + /** + * Number of logical processors in the package. + */ + uint32_t package_processor_count; + /** + * Maximum frequency, in kHZ. + * The value is parsed from /sys/devices/system/cpu/cpu/cpufreq/cpuinfo_max_freq + * If failed to read or parse the file, the value is 0. + */ + uint32_t max_frequency; + /** + * Minimum frequency, in kHZ. + * The value is parsed from /sys/devices/system/cpu/cpu/cpufreq/cpuinfo_min_freq + * If failed to read or parse the file, the value is 0. + */ + uint32_t min_frequency; + /** Linux processor ID */ + uint32_t system_processor_id; + uint32_t flags; +}; + +struct cpuinfo_arm_linux_cluster { + uint32_t processor_id_min; + uint32_t processor_id_max; +}; + +/* Returns true if the two processors do belong to the same cluster */ +static inline bool cpuinfo_arm_linux_processor_equals( + struct cpuinfo_arm_linux_processor processor_i[restrict static 1], + struct cpuinfo_arm_linux_processor processor_j[restrict static 1]) +{ + const uint32_t joint_flags = processor_i->flags & processor_j->flags; + + bool same_max_frequency = false; + if (joint_flags & CPUINFO_LINUX_FLAG_MAX_FREQUENCY) { + if (processor_i->max_frequency != processor_j->max_frequency) { + return false; + } else { + same_max_frequency = true; + } + } + + bool same_min_frequency = false; + if (joint_flags & CPUINFO_LINUX_FLAG_MIN_FREQUENCY) { + if (processor_i->min_frequency != processor_j->min_frequency) { + return false; + } else { + same_min_frequency = true; + } + } + + if ((joint_flags & CPUINFO_ARM_LINUX_VALID_MIDR) == CPUINFO_ARM_LINUX_VALID_MIDR) { + if (processor_i->midr == processor_j->midr) { + if (midr_is_cortex_a53(processor_i->midr)) { + return same_min_frequency & same_max_frequency; + } else { + return true; + } + } + } + + return same_max_frequency && same_min_frequency; +} + +/* Returns true if the two processors certainly don't belong to the same cluster */ +static inline bool cpuinfo_arm_linux_processor_not_equals( + struct cpuinfo_arm_linux_processor processor_i[restrict static 1], + struct cpuinfo_arm_linux_processor processor_j[restrict static 1]) +{ + const uint32_t joint_flags = processor_i->flags & processor_j->flags; + + if (joint_flags & CPUINFO_LINUX_FLAG_MAX_FREQUENCY) { + if (processor_i->max_frequency != processor_j->max_frequency) { + return true; + } + } + + if (joint_flags & CPUINFO_LINUX_FLAG_MIN_FREQUENCY) { + if (processor_i->min_frequency != processor_j->min_frequency) { + return true; + } + } + + if ((joint_flags & CPUINFO_ARM_LINUX_VALID_MIDR) == CPUINFO_ARM_LINUX_VALID_MIDR) { + if (processor_i->midr != processor_j->midr) { + return true; + } + } + + return false; +} + +CPUINFO_INTERNAL bool cpuinfo_arm_linux_parse_proc_cpuinfo( + char hardware[restrict static CPUINFO_HARDWARE_VALUE_MAX], + char revision[restrict static CPUINFO_REVISION_VALUE_MAX], + uint32_t max_processors_count, + struct cpuinfo_arm_linux_processor processors[restrict static max_processors_count]); + +#if CPUINFO_ARCH_ARM + CPUINFO_INTERNAL bool cpuinfo_arm_linux_hwcap_from_getauxval( + uint32_t hwcap[restrict static 1], + uint32_t hwcap2[restrict static 1]); + CPUINFO_INTERNAL bool cpuinfo_arm_linux_hwcap_from_procfs( + uint32_t hwcap[restrict static 1], + uint32_t hwcap2[restrict static 1]); + + CPUINFO_INTERNAL void cpuinfo_arm_linux_decode_isa_from_proc_cpuinfo( + uint32_t features, + uint32_t features2, + uint32_t midr, + uint32_t architecture_version, + uint32_t architecture_flags, + const struct cpuinfo_arm_chipset chipset[restrict static 1], + struct cpuinfo_arm_isa isa[restrict static 1]); +#elif CPUINFO_ARCH_ARM64 + CPUINFO_INTERNAL void cpuinfo_arm_linux_hwcap_from_getauxval( + uint32_t hwcap[restrict static 1], + uint32_t hwcap2[restrict static 1]); + + CPUINFO_INTERNAL void cpuinfo_arm64_linux_decode_isa_from_proc_cpuinfo( + uint32_t features, + uint32_t features2, + uint32_t midr, + const struct cpuinfo_arm_chipset chipset[restrict static 1], + struct cpuinfo_arm_isa isa[restrict static 1]); +#endif + +#ifdef __ANDROID__ + CPUINFO_INTERNAL struct cpuinfo_arm_chipset + cpuinfo_arm_android_decode_chipset( + const struct cpuinfo_android_properties properties[restrict static 1], + uint32_t cores, + uint32_t max_cpu_freq_max); +#else + CPUINFO_INTERNAL struct cpuinfo_arm_chipset + cpuinfo_arm_linux_decode_chipset( + const char hardware[restrict static CPUINFO_HARDWARE_VALUE_MAX], + const char revision[restrict static CPUINFO_REVISION_VALUE_MAX], + uint32_t cores, + uint32_t max_cpu_freq_max); +#endif + +CPUINFO_INTERNAL struct cpuinfo_arm_chipset + cpuinfo_arm_linux_decode_chipset_from_proc_cpuinfo_hardware( + const char proc_cpuinfo_hardware[restrict static CPUINFO_HARDWARE_VALUE_MAX], + uint32_t cores, uint32_t max_cpu_freq_max, bool is_tegra); + +#ifdef __ANDROID__ + CPUINFO_INTERNAL struct cpuinfo_arm_chipset + cpuinfo_arm_android_decode_chipset_from_ro_product_board( + const char ro_product_board[restrict static CPUINFO_BUILD_PROP_VALUE_MAX], + uint32_t cores, uint32_t max_cpu_freq_max); + CPUINFO_INTERNAL struct cpuinfo_arm_chipset + cpuinfo_arm_android_decode_chipset_from_ro_board_platform( + const char ro_board_platform[restrict static CPUINFO_BUILD_PROP_VALUE_MAX], + uint32_t cores, uint32_t max_cpu_freq_max); + CPUINFO_INTERNAL struct cpuinfo_arm_chipset + cpuinfo_arm_android_decode_chipset_from_ro_mediatek_platform( + const char ro_mediatek_platform[restrict static CPUINFO_BUILD_PROP_VALUE_MAX]); + CPUINFO_INTERNAL struct cpuinfo_arm_chipset + cpuinfo_arm_android_decode_chipset_from_ro_arch( + const char ro_arch[restrict static CPUINFO_BUILD_PROP_VALUE_MAX]); + CPUINFO_INTERNAL struct cpuinfo_arm_chipset + cpuinfo_arm_android_decode_chipset_from_ro_chipname( + const char ro_chipname[restrict static CPUINFO_BUILD_PROP_VALUE_MAX]); + CPUINFO_INTERNAL struct cpuinfo_arm_chipset + cpuinfo_arm_android_decode_chipset_from_ro_hardware_chipname( + const char ro_hardware_chipname[restrict static CPUINFO_BUILD_PROP_VALUE_MAX]); +#else + CPUINFO_INTERNAL struct cpuinfo_arm_chipset + cpuinfo_arm_linux_decode_chipset_from_proc_cpuinfo_revision( + const char proc_cpuinfo_revision[restrict static CPUINFO_REVISION_VALUE_MAX]); +#endif + +CPUINFO_INTERNAL bool cpuinfo_arm_linux_detect_core_clusters_by_heuristic( + uint32_t usable_processors, + uint32_t max_processors, + struct cpuinfo_arm_linux_processor processors[restrict static max_processors]); + +CPUINFO_INTERNAL void cpuinfo_arm_linux_detect_core_clusters_by_sequential_scan( + uint32_t max_processors, + struct cpuinfo_arm_linux_processor processors[restrict static max_processors]); + +CPUINFO_INTERNAL void cpuinfo_arm_linux_count_cluster_processors( + uint32_t max_processors, + struct cpuinfo_arm_linux_processor processors[restrict static max_processors]); + +CPUINFO_INTERNAL uint32_t cpuinfo_arm_linux_detect_cluster_midr( + const struct cpuinfo_arm_chipset chipset[restrict static 1], + uint32_t max_processors, + uint32_t usable_processors, + struct cpuinfo_arm_linux_processor processors[restrict static max_processors]); + +extern CPUINFO_INTERNAL const uint32_t* cpuinfo_linux_cpu_to_uarch_index_map; +extern CPUINFO_INTERNAL uint32_t cpuinfo_linux_cpu_to_uarch_index_map_entries; diff --git a/dep/cpuinfo/src/arm/linux/chipset.c b/dep/cpuinfo/src/arm/linux/chipset.c new file mode 100644 index 000000000..e36283c55 --- /dev/null +++ b/dep/cpuinfo/src/arm/linux/chipset.c @@ -0,0 +1,3860 @@ +#include +#include +#include +#include + +#include +#ifdef __ANDROID__ + #include +#endif +#include +#include + + +static inline bool is_ascii_whitespace(char c) { + switch (c) { + case ' ': + case '\t': + case '\r': + case '\n': + return true; + default: + return false; + } +} + +static inline bool is_ascii_alphabetic(char c) { + const char lower_c = c | '\x20'; + return (uint8_t) (lower_c - 'a') <= (uint8_t) ('z' - 'a'); +} + +static inline bool is_ascii_alphabetic_uppercase(char c) { + return (uint8_t) (c - 'A') <= (uint8_t) ('Z' - 'A'); +} + +static inline bool is_ascii_numeric(char c) { + return (uint8_t) (c - '0') < 10; +} + +static inline uint16_t load_u16le(const void* ptr) { +#if defined(__ARM_ARCH_7A__) || defined(__aarch64__) + return *((const uint16_t*) ptr); +#else + const uint8_t* byte_ptr = (const uint8_t*) ptr; + return ((uint16_t) byte_ptr[1] << 8) | (uint16_t) byte_ptr[0]; +#endif +} + +static inline uint32_t load_u24le(const void* ptr) { +#if defined(__ARM_ARCH_7A__) || defined(__aarch64__) + return ((uint32_t) ((const uint8_t*) ptr)[2] << 16) | ((uint32_t) *((const uint16_t*) ptr)); +#else + const uint8_t* byte_ptr = (const uint8_t*) ptr; + return ((uint32_t) byte_ptr[2] << 16) | ((uint32_t) byte_ptr[1] << 8) | (uint32_t) byte_ptr[0]; +#endif +} + +static inline uint32_t load_u32le(const void* ptr) { +#if defined(__ARM_ARCH_7A__) || defined(__aarch64__) + return *((const uint32_t*) ptr); +#else + return ((uint32_t) ((const uint8_t*) ptr)[3] << 24) | load_u24le(ptr); +#endif +} + +/* + * Map from ARM chipset series ID to ARM chipset vendor ID. + * This map is used to avoid storing vendor IDs in tables. + */ +static enum cpuinfo_arm_chipset_vendor chipset_series_vendor[cpuinfo_arm_chipset_series_max] = { + [cpuinfo_arm_chipset_series_unknown] = cpuinfo_arm_chipset_vendor_unknown, + [cpuinfo_arm_chipset_series_qualcomm_qsd] = cpuinfo_arm_chipset_vendor_qualcomm, + [cpuinfo_arm_chipset_series_qualcomm_msm] = cpuinfo_arm_chipset_vendor_qualcomm, + [cpuinfo_arm_chipset_series_qualcomm_apq] = cpuinfo_arm_chipset_vendor_qualcomm, + [cpuinfo_arm_chipset_series_qualcomm_snapdragon] = cpuinfo_arm_chipset_vendor_qualcomm, + [cpuinfo_arm_chipset_series_mediatek_mt] = cpuinfo_arm_chipset_vendor_mediatek, + [cpuinfo_arm_chipset_series_samsung_exynos] = cpuinfo_arm_chipset_vendor_samsung, + [cpuinfo_arm_chipset_series_hisilicon_k3v] = cpuinfo_arm_chipset_vendor_hisilicon, + [cpuinfo_arm_chipset_series_hisilicon_hi] = cpuinfo_arm_chipset_vendor_hisilicon, + [cpuinfo_arm_chipset_series_hisilicon_kirin] = cpuinfo_arm_chipset_vendor_hisilicon, + [cpuinfo_arm_chipset_series_actions_atm] = cpuinfo_arm_chipset_vendor_actions, + [cpuinfo_arm_chipset_series_allwinner_a] = cpuinfo_arm_chipset_vendor_allwinner, + [cpuinfo_arm_chipset_series_amlogic_aml] = cpuinfo_arm_chipset_vendor_amlogic, + [cpuinfo_arm_chipset_series_amlogic_s] = cpuinfo_arm_chipset_vendor_amlogic, + [cpuinfo_arm_chipset_series_broadcom_bcm] = cpuinfo_arm_chipset_vendor_broadcom, + [cpuinfo_arm_chipset_series_lg_nuclun] = cpuinfo_arm_chipset_vendor_lg, + [cpuinfo_arm_chipset_series_leadcore_lc] = cpuinfo_arm_chipset_vendor_leadcore, + [cpuinfo_arm_chipset_series_marvell_pxa] = cpuinfo_arm_chipset_vendor_marvell, + [cpuinfo_arm_chipset_series_mstar_6a] = cpuinfo_arm_chipset_vendor_mstar, + [cpuinfo_arm_chipset_series_novathor_u] = cpuinfo_arm_chipset_vendor_novathor, + [cpuinfo_arm_chipset_series_nvidia_tegra_t] = cpuinfo_arm_chipset_vendor_nvidia, + [cpuinfo_arm_chipset_series_nvidia_tegra_ap] = cpuinfo_arm_chipset_vendor_nvidia, + [cpuinfo_arm_chipset_series_nvidia_tegra_sl] = cpuinfo_arm_chipset_vendor_nvidia, + [cpuinfo_arm_chipset_series_pinecone_surge_s] = cpuinfo_arm_chipset_vendor_pinecone, + [cpuinfo_arm_chipset_series_renesas_mp] = cpuinfo_arm_chipset_vendor_renesas, + [cpuinfo_arm_chipset_series_rockchip_rk] = cpuinfo_arm_chipset_vendor_rockchip, + [cpuinfo_arm_chipset_series_spreadtrum_sc] = cpuinfo_arm_chipset_vendor_spreadtrum, + [cpuinfo_arm_chipset_series_telechips_tcc] = cpuinfo_arm_chipset_vendor_telechips, + [cpuinfo_arm_chipset_series_texas_instruments_omap] = cpuinfo_arm_chipset_vendor_texas_instruments, + [cpuinfo_arm_chipset_series_wondermedia_wm] = cpuinfo_arm_chipset_vendor_wondermedia, +}; + +/** + * Tries to match /(MSM|APQ)\d{4}([A-Z\-]*)/ signature (case-insensitive) for Qualcomm MSM and APQ chipsets. + * If match successful, extracts model information into \p chipset argument. + * + * @param start - start of the platform identifier (/proc/cpuinfo Hardware string, ro.product.board, ro.board.platform + * or ro.chipname) to match. + * @param end - end of the platform identifier (/proc/cpuinfo Hardware string, ro.product.board, ro.board.platform or + * ro.chipname) to match. + * @param[out] chipset - location where chipset information will be stored upon a successful match. + * + * @returns true if signature matched, false otherwise. + */ +static bool match_msm_apq( + const char* start, const char* end, + struct cpuinfo_arm_chipset chipset[restrict static 1]) +{ + /* Expect at least 7 symbols: 3 symbols "MSM" or "APQ" + 4 digits */ + if (start + 7 > end) { + return false; + } + + /* Check that string starts with "MSM" or "APQ", case-insensitive. + * The first three characters are loaded as 24-bit little endian word, binary ORed with 0x20 to convert to lower + * case, and compared to "MSM" and "APQ" strings as integers. + */ + const uint32_t series_signature = UINT32_C(0x00202020) | load_u24le(start); + enum cpuinfo_arm_chipset_series series; + switch (series_signature) { + case UINT32_C(0x6D736D): /* "msm" = reverse("msm") */ + series = cpuinfo_arm_chipset_series_qualcomm_msm; + break; + case UINT32_C(0x717061): /* "qpa" = reverse("apq") */ + series = cpuinfo_arm_chipset_series_qualcomm_apq; + break; + default: + return false; + } + + /* Sometimes there is a space ' ' following the MSM/APQ series */ + const char* pos = start + 3; + if (*pos == ' ') { + pos++; + + /* Expect at least 4 more symbols (4-digit model number) */ + if (pos + 4 > end) { + return false; + } + } + + /* Validate and parse 4-digit model number */ + uint32_t model = 0; + for (uint32_t i = 0; i < 4; i++) { + const uint32_t digit = (uint32_t) (uint8_t) (*pos++) - '0'; + if (digit >= 10) { + /* Not really a digit */ + return false; + } + model = model * 10 + digit; + } + + /* Suffix is optional, so if we got to this point, parsing is successful. Commit parsed chipset. */ + *chipset = (struct cpuinfo_arm_chipset) { + .vendor = cpuinfo_arm_chipset_vendor_qualcomm, + .series = series, + .model = model, + }; + + /* Parse as many suffix characters as match the pattern [A-Za-z\-] */ + for (uint32_t i = 0; i < CPUINFO_ARM_CHIPSET_SUFFIX_MAX; i++) { + if (pos + i == end) { + break; + } + + const char c = pos[i]; + if (is_ascii_alphabetic(c)) { + /* Matched a letter [A-Za-z] */ + chipset->suffix[i] = c & '\xDF'; + } else if (c == '-') { + /* Matched a dash '-' */ + chipset->suffix[i] = c; + } else { + /* Neither of [A-Za-z\-] */ + break; + } + } + return true; +} + +/** + * Tries to match /SDM\d{3}$/ signature for Qualcomm Snapdragon chipsets. + * If match successful, extracts model information into \p chipset argument. + * + * @param start - start of the /proc/cpuinfo Hardware string to match. + * @param end - end of the /proc/cpuinfo Hardware string to match. + * @param[out] chipset - location where chipset information will be stored upon a successful match. + * + * @returns true if signature matched, false otherwise. + */ +static bool match_sdm( + const char* start, const char* end, + struct cpuinfo_arm_chipset chipset[restrict static 1]) +{ + /* Expect exactly 6 symbols: 3 symbols "SDM" + 3 digits */ + if (start + 6 != end) { + return false; + } + + /* Check that string starts with "SDM". + * The first three characters are loaded and compared as 24-bit little endian word. + */ + const uint32_t expected_sdm = load_u24le(start); + if (expected_sdm != UINT32_C(0x004D4453) /* "MDS" = reverse("SDM") */) { + return false; + } + + /* Validate and parse 3-digit model number */ + uint32_t model = 0; + for (uint32_t i = 3; i < 6; i++) { + const uint32_t digit = (uint32_t) (uint8_t) start[i] - '0'; + if (digit >= 10) { + /* Not really a digit */ + return false; + } + model = model * 10 + digit; + } + + /* Return parsed chipset. */ + *chipset = (struct cpuinfo_arm_chipset) { + .vendor = cpuinfo_arm_chipset_vendor_qualcomm, + .series = cpuinfo_arm_chipset_series_qualcomm_snapdragon, + .model = model, + }; + return true; +} + +/** + * Tries to match /SM\d{4}$/ signature for Qualcomm Snapdragon chipsets. + * If match successful, extracts model information into \p chipset argument. + * + * @param start - start of the /proc/cpuinfo Hardware string to match. + * @param end - end of the /proc/cpuinfo Hardware string to match. + * @param[out] chipset - location where chipset information will be stored upon a successful match. + * + * @returns true if signature matched, false otherwise. + */ +static bool match_sm( + const char* start, const char* end, + struct cpuinfo_arm_chipset chipset[restrict static 1]) +{ + /* Expect exactly 6 symbols: 2 symbols "SM" + 4 digits */ + if (start + 6 != end) { + return false; + } + + /* Check that string starts with "SM". + * The first three characters are loaded and compared as 16-bit little endian word. + */ + const uint32_t expected_sm = load_u16le(start); + if (expected_sm != UINT16_C(0x4D53) /* "MS" = reverse("SM") */) { + return false; + } + + /* Validate and parse 4-digit model number */ + uint32_t model = 0; + for (uint32_t i = 2; i < 6; i++) { + const uint32_t digit = (uint32_t) (uint8_t) start[i] - '0'; + if (digit >= 10) { + /* Not really a digit */ + return false; + } + model = model * 10 + digit; + } + + /* Return parsed chipset. */ + *chipset = (struct cpuinfo_arm_chipset) { + .vendor = cpuinfo_arm_chipset_vendor_qualcomm, + .series = cpuinfo_arm_chipset_series_qualcomm_snapdragon, + .model = model, + }; + return true; +} + +/** + * Tries to match /Samsung Exynos\d{4}$/ signature (case-insensitive) for Samsung Exynos chipsets. + * If match successful, extracts model information into \p chipset argument. + * + * @param start - start of the /proc/cpuinfo Hardware string to match. + * @param end - end of the /proc/cpuinfo Hardware string to match. + * @param[out] chipset - location where chipset information will be stored upon a successful match. + * + * @returns true if signature matched, false otherwise. + */ +static bool match_samsung_exynos( + const char* start, const char* end, + struct cpuinfo_arm_chipset chipset[restrict static 1]) +{ + /* + * Expect at 18-19 symbols: + * - "Samsung" (7 symbols) + space + "Exynos" (6 symbols) + optional space 4-digit model number + */ + const size_t length = end - start; + switch (length) { + case 18: + case 19: + break; + default: + return false; + } + + /* + * Check that the string starts with "samsung exynos", case-insensitive. + * Blocks of 4 characters are loaded and compared as little-endian 32-bit word. + * Case-insensitive characters are binary ORed with 0x20 to convert them to lowercase. + */ + const uint32_t expected_sams = UINT32_C(0x20202000) | load_u32le(start); + if (expected_sams != UINT32_C(0x736D6153) /* "smaS" = reverse("Sams") */) { + return false; + } + const uint32_t expected_ung = UINT32_C(0x00202020) | load_u32le(start + 4); + if (expected_ung != UINT32_C(0x20676E75) /* " ung" = reverse("ung ") */) { + return false; + } + const uint32_t expected_exyn = UINT32_C(0x20202000) | load_u32le(start + 8); + if (expected_exyn != UINT32_C(0x6E797845) /* "nyxE" = reverse("Exyn") */) { + return false; + } + const uint16_t expected_os = UINT16_C(0x2020) | load_u16le(start + 12); + if (expected_os != UINT16_C(0x736F) /* "so" = reverse("os") */) { + return false; + } + + const char* pos = start + 14; + + /* There can be a space ' ' following the "Exynos" string */ + if (*pos == ' ') { + pos++; + + /* If optional space if present, we expect exactly 19 characters */ + if (length != 19) { + return false; + } + } + + /* Validate and parse 4-digit model number */ + uint32_t model = 0; + for (uint32_t i = 0; i < 4; i++) { + const uint32_t digit = (uint32_t) (uint8_t) (*pos++) - '0'; + if (digit >= 10) { + /* Not really a digit */ + return false; + } + model = model * 10 + digit; + } + + /* Return parsed chipset */ + *chipset = (struct cpuinfo_arm_chipset) { + .vendor = cpuinfo_arm_chipset_vendor_samsung, + .series = cpuinfo_arm_chipset_series_samsung_exynos, + .model = model, + }; + return true; +} + +/** + * Tries to match /exynos\d{4}$/ signature for Samsung Exynos chipsets. + * If match successful, extracts model information into \p chipset argument. + * + * @param start - start of the platform identifier (ro.board.platform or ro.chipname) to match. + * @param end - end of the platform identifier (ro.board.platform or ro.chipname) to match. + * @param[out] chipset - location where chipset information will be stored upon a successful match. + * + * @returns true if signature matched, false otherwise. + */ +static bool match_exynos( + const char* start, const char* end, + struct cpuinfo_arm_chipset chipset[restrict static 1]) +{ + /* Expect exactly 10 symbols: "exynos" (6 symbols) + 4-digit model number */ + if (start + 10 != end) { + return false; + } + + /* Load first 4 bytes as little endian 32-bit word */ + const uint32_t expected_exyn = load_u32le(start); + if (expected_exyn != UINT32_C(0x6E797865) /* "nyxe" = reverse("exyn") */ ) { + return false; + } + + /* Load next 2 bytes as little endian 16-bit word */ + const uint16_t expected_os = load_u16le(start + 4); + if (expected_os != UINT16_C(0x736F) /* "so" = reverse("os") */ ) { + return false; + } + + /* Check and parse 4-digit model number */ + uint32_t model = 0; + for (uint32_t i = 6; i < 10; i++) { + const uint32_t digit = (uint32_t) (uint8_t) start[i] - '0'; + if (digit >= 10) { + /* Not really a digit */ + return false; + } + model = model * 10 + digit; + } + + /* Return parsed chipset. */ + *chipset = (struct cpuinfo_arm_chipset) { + .vendor = cpuinfo_arm_chipset_vendor_samsung, + .series = cpuinfo_arm_chipset_series_samsung_exynos, + .model = model, + }; + return true; +} + +/** + * Tries to match /universal\d{4}$/ signature for Samsung Exynos chipsets. + * If match successful, extracts model information into \p chipset argument. + * + * @param start - start of the platform identifier (/proc/cpuinfo Hardware string, ro.product.board or ro.chipname) + * to match. + * @param end - end of the platform identifier (/proc/cpuinfo Hardware string, ro.product.board or ro.chipname) + * to match. + * @param[out] chipset - location where chipset information will be stored upon a successful match. + * + * @returns true if signature matched, false otherwise. + */ +static bool match_universal( + const char* start, const char* end, + struct cpuinfo_arm_chipset chipset[restrict static 1]) +{ + /* Expect exactly 13 symbols: "universal" (9 symbols) + 4-digit model number */ + if (start + 13 != end) { + return false; + } + + /* + * Check that the string starts with "universal". + * Blocks of 4 characters are loaded and compared as little-endian 32-bit word. + * Case-insensitive characters are binary ORed with 0x20 to convert them to lowercase. + */ + const uint8_t expected_u = UINT8_C(0x20) | (uint8_t) start[0]; + if (expected_u != UINT8_C(0x75) /* "u" */) { + return false; + } + const uint32_t expected_nive = UINT32_C(0x20202020) | load_u32le(start + 1); + if (expected_nive != UINT32_C(0x6576696E) /* "evin" = reverse("nive") */ ) { + return false; + } + const uint32_t expected_ersa = UINT32_C(0x20202020) | load_u32le(start + 5); + if (expected_ersa != UINT32_C(0x6C617372) /* "lasr" = reverse("rsal") */) { + return false; + } + + /* Validate and parse 4-digit model number */ + uint32_t model = 0; + for (uint32_t i = 9; i < 13; i++) { + const uint32_t digit = (uint32_t) (uint8_t) start[i] - '0'; + if (digit >= 10) { + /* Not really a digit */ + return false; + } + model = model * 10 + digit; + } + + /* Return parsed chipset. */ + *chipset = (struct cpuinfo_arm_chipset) { + .vendor = cpuinfo_arm_chipset_vendor_samsung, + .series = cpuinfo_arm_chipset_series_samsung_exynos, + .model = model, + }; + return true; +} + +/** + * Compares, case insensitively, a string to known values "SMDK4210" and "SMDK4x12" for Samsung Exynos chipsets. + * If platform identifier matches one of the SMDK* values, extracts model information into \p chipset argument. + * For "SMDK4x12" match, decodes the chipset name using number of cores. + * + * @param start - start of the platform identifier (/proc/cpuinfo Hardware string or ro.product.board) to match. + * @param end - end of the platform identifier (/proc/cpuinfo Hardware string or ro.product.board) to match. + * @param cores - number of cores in the chipset. + * @param[out] chipset - location where chipset information will be stored upon a successful match. + * + * @returns true if signature matched, false otherwise. + */ +static bool match_and_parse_smdk( + const char* start, const char* end, uint32_t cores, + struct cpuinfo_arm_chipset chipset[restrict static 1]) +{ + /* Expect exactly 8 symbols: "SMDK" (4 symbols) + 4-digit model number */ + if (start + 8 != end) { + return false; + } + + /* + * Check that string starts with "MT" (case-insensitive). + * The first four characters are loaded as a 32-bit little endian word and converted to lowercase. + */ + const uint32_t expected_smdk = UINT32_C(0x20202020) | load_u32le(start); + if (expected_smdk != UINT32_C(0x6B646D73) /* "kdms" = reverse("smdk") */) { + return false; + } + + /* + * Check that string ends with "4210" or "4x12". + * The last four characters are loaded and compared as a 32-bit little endian word. + */ + uint32_t model = 0; + const uint32_t expected_model = load_u32le(start + 4); + switch (expected_model) { + case UINT32_C(0x30313234): /* "0124" = reverse("4210") */ + model = 4210; + break; + case UINT32_C(0x32317834): /* "21x4" = reverse("4x12") */ + switch (cores) { + case 2: + model = 4212; + break; + case 4: + model = 4412; + break; + default: + cpuinfo_log_warning("system reported invalid %"PRIu32"-core Exynos 4x12 chipset", cores); + } + } + + if (model == 0) { + return false; + } + + *chipset = (struct cpuinfo_arm_chipset) { + .vendor = cpuinfo_arm_chipset_vendor_samsung, + .series = cpuinfo_arm_chipset_series_samsung_exynos, + .model = model, + }; + return true; +} + +/** + * Tries to match /MTK?\d{4}[A-Z/]*$/ signature for MediaTek MT chipsets. + * If match successful, extracts model information into \p chipset argument. + * + * @param start - start of the platform identifier (/proc/cpuinfo Hardware string, ro.product.board, ro.board.platform, + * ro.mediatek.platform, or ro.chipname) to match. + * @param end - end of the platform identifier (/proc/cpuinfo Hardware string, ro.product.board, ro.board.platform, + * ro.mediatek.platform, or ro.chipname) to match. + * @param match_end - indicates if the function should attempt to match through the end of the string and fail if there + * are unparsed characters in the end, or match only MTK signature, model number, and some of the + * suffix characters (the ones that pass validation). + * @param[out] chipset - location where chipset information will be stored upon a successful match. + * + * @returns true if signature matched, false otherwise. + */ +static bool match_mt( + const char* start, const char* end, bool match_end, + struct cpuinfo_arm_chipset chipset[restrict static 1]) +{ + /* Expect at least 6 symbols: "MT" (2 symbols) + 4-digit model number */ + if (start + 6 > end) { + return false; + } + + /* + * Check that string starts with "MT" (case-insensitive). + * The first two characters are loaded as 16-bit little endian word and converted to lowercase. + */ + const uint16_t mt = UINT16_C(0x2020) | load_u16le(start); + if (mt != UINT16_C(0x746D) /* "tm" */) { + return false; + } + + + /* Some images report "MTK" rather than "MT" */ + const char* pos = start + 2; + if (((uint8_t) *pos | UINT8_C(0x20)) == (uint8_t) 'k') { + pos++; + + /* Expect 4 more symbols after "MTK" (4-digit model number) */ + if (pos + 4 > end) { + return false; + } + } + + /* Validate and parse 4-digit model number */ + uint32_t model = 0; + for (uint32_t i = 0; i < 4; i++) { + const uint32_t digit = (uint32_t) (uint8_t) (*pos++) - '0'; + if (digit >= 10) { + /* Not really a digit */ + return false; + } + model = model * 10 + digit; + } + + /* Record parsed chipset. This implicitly zeroes-out suffix, which will be parsed later. */ + *chipset = (struct cpuinfo_arm_chipset) { + .vendor = cpuinfo_arm_chipset_vendor_mediatek, + .series = cpuinfo_arm_chipset_series_mediatek_mt, + .model = model, + }; + + if (match_end) { + /* Check that the potential suffix does not exceed maximum length */ + const size_t suffix_length = end - pos; + if (suffix_length > CPUINFO_ARM_CHIPSET_SUFFIX_MAX) { + return false; + } + + /* Validate suffix characters and copy them to chipset structure */ + for (size_t i = 0; i < suffix_length; i++) { + const char c = (*pos++); + if (is_ascii_alphabetic(c)) { + /* Matched a letter [A-Za-z], convert to uppercase */ + chipset->suffix[i] = c & '\xDF'; + } else if (c == '/') { + /* Matched a slash '/' */ + chipset->suffix[i] = c; + } else { + /* Invalid suffix character (neither of [A-Za-z/]) */ + return false; + } + } + } else { + /* Validate and parse as many suffix characters as we can */ + for (size_t i = 0; i < CPUINFO_ARM_CHIPSET_SUFFIX_MAX; i++) { + if (pos + i == end) { + break; + } + + const char c = pos[i]; + if (is_ascii_alphabetic(c)) { + /* Matched a letter [A-Za-z], convert to uppercase */ + chipset->suffix[i] = c & '\xDF'; + } else if (c == '/') { + /* Matched a slash '/' */ + chipset->suffix[i] = c; + } else { + /* Invalid suffix character (neither of [A-Za-z/]). This marks the end of the suffix. */ + break; + } + } + } + /* All suffix characters successfully validated and copied to chipset data */ + return true; +} + +/** + * Tries to match /[Kk]irin\s?\d{3}$/ signature for HiSilicon Kirin chipsets. + * If match successful, extracts model information into \p chipset argument. + * + * @param start - start of the /proc/cpuinfo Hardware string to match. + * @param end - end of the /proc/cpuinfo Hardware string to match. + * @param[out] chipset - location where chipset information will be stored upon a successful match. + * + * @returns true if signature matched, false otherwise. + */ +static bool match_kirin( + const char* start, const char* end, + struct cpuinfo_arm_chipset chipset[restrict static 1]) +{ + /* Expect 8-9 symbols: "Kirin" (5 symbols) + optional whitespace (1 symbol) + 3-digit model number */ + const size_t length = end - start; + switch (length) { + case 8: + case 9: + break; + default: + return false; + } + + /* Check that the string starts with "Kirin" or "kirin". */ + if (((uint8_t) start[0] | UINT8_C(0x20)) != (uint8_t) 'k') { + return false; + } + /* Symbols 1-5 are loaded and compared as little-endian 32-bit word. */ + const uint32_t irin = load_u32le(start + 1); + if (irin != UINT32_C(0x6E697269) /* "niri" = reverse("irin") */) { + return false; + } + + /* Check for optional whitespace after "Kirin" */ + if (is_ascii_whitespace(start[5])) { + /* When whitespace is present after "Kirin", expect 9 symbols total */ + if (length != 9) { + return false; + } + } + + /* Validate and parse 3-digit model number */ + uint32_t model = 0; + for (int32_t i = 0; i < 3; i++) { + const uint32_t digit = (uint32_t) (uint8_t) end[i - 3] - '0'; + if (digit >= 10) { + /* Not really a digit */ + return false; + } + model = model * 10 + digit; + } + + /* + * Thats it, return parsed chipset. + * Technically, Kirin 910T has a suffix, but it never appears in the form of "910T" string. + * Instead, Kirin 910T devices report "hi6620oem" string (handled outside of this function). + */ + *chipset = (struct cpuinfo_arm_chipset) { + .vendor = cpuinfo_arm_chipset_vendor_hisilicon, + .series = cpuinfo_arm_chipset_series_hisilicon_kirin, + .model = model, + }; + return true; +} + +/** + * Tries to match /rk\d{4}[a-z]?$/ signature for Rockchip RK chipsets. + * If match successful, extracts model information into \p chipset argument. + * + * @param start - start of the platform identifier (/proc/cpuinfo Hardware string or ro.board.platform) to match. + * @param end - end of the platform identifier (/proc/cpuinfo Hardware string or ro.board.platform) to match. + * @param[out] chipset - location where chipset information will be stored upon a successful match. + * + * @returns true if signature matched, false otherwise. + */ +static bool match_rk( + const char* start, const char* end, + struct cpuinfo_arm_chipset chipset[restrict static 1]) +{ + /* Expect 6-7 symbols: "RK" (2 symbols) + 4-digit model number + optional 1-letter suffix */ + const size_t length = end - start; + switch (length) { + case 6: + case 7: + break; + default: + return false; + } + + /* + * Check that string starts with "RK" (case-insensitive). + * The first two characters are loaded as 16-bit little endian word and converted to lowercase. + */ + const uint16_t expected_rk = UINT16_C(0x2020) | load_u16le(start); + if (expected_rk != UINT16_C(0x6B72) /* "kr" = reverse("rk") */) { + return false; + } + + /* Validate and parse 4-digit model number */ + uint32_t model = 0; + for (uint32_t i = 2; i < 6; i++) { + const uint32_t digit = (uint32_t) (uint8_t) start[i] - '0'; + if (digit >= 10) { + /* Not really a digit */ + return false; + } + model = model * 10 + digit; + } + + /* Parse optional suffix */ + char suffix = 0; + if (length == 7) { + /* Parse the suffix letter */ + const char c = start[6]; + if (is_ascii_alphabetic(c)) { + /* Convert to upper case */ + suffix = c & '\xDF'; + } else { + /* Invalid suffix character */ + return false; + } + } + + /* Return parsed chipset */ + *chipset = (struct cpuinfo_arm_chipset) { + .vendor = cpuinfo_arm_chipset_vendor_rockchip, + .series = cpuinfo_arm_chipset_series_rockchip_rk, + .model = model, + .suffix = { + [0] = suffix, + }, + }; + return true; +} + +/** + * Tries to match, case-insentitively, /s[cp]\d{4}[a-z]*|scx15$/ signature for Spreadtrum SC chipsets. + * If match successful, extracts model information into \p chipset argument. + * + * @param start - start of the platform identifier (/proc/cpuinfo Hardware string, ro.product.board, + * ro.board.platform, or ro.chipname) to match. + * @param end - end of the platform identifier (/proc/cpuinfo Hardware string, ro.product.board, + * ro.board.platform, or ro.chipname) to match. + * @param[out] chipset - location where chipset information will be stored upon a successful match. + * + * @returns true if signature matched, false otherwise. + */ +static bool match_sc( + const char* start, const char* end, + struct cpuinfo_arm_chipset chipset[restrict static 1]) +{ + /* Expect at least 5 symbols: "scx15" */ + if (start + 5 > end) { + return false; + } + + /* + * Check that string starts with "S[CP]" (case-insensitive). + * The first two characters are loaded as 16-bit little endian word and converted to lowercase. + */ + const uint16_t expected_sc_or_sp = UINT16_C(0x2020) | load_u16le(start); + switch (expected_sc_or_sp) { + case UINT16_C(0x6373): /* "cs" = reverse("sc") */ + case UINT16_C(0x7073): /* "ps" = reverse("sp") */ + break; + default: + return false; + } + + /* Special case: "scx" prefix (SC7715 reported as "scx15") */ + if ((start[2] | '\x20') == 'x') { + /* Expect exactly 5 characters: "scx15" */ + if (start + 5 != end) { + return false; + } + + /* Check that string ends with "15" */ + const uint16_t expected_15 = load_u16le(start + 3); + if (expected_15 != UINT16_C(0x3531) /* "51" = reverse("15") */ ) { + return false; + } + + *chipset = (struct cpuinfo_arm_chipset) { + .vendor = cpuinfo_arm_chipset_vendor_spreadtrum, + .series = cpuinfo_arm_chipset_series_spreadtrum_sc, + .model = 7715, + }; + return true; + } + + /* Expect at least 6 symbols: "S[CP]" (2 symbols) + 4-digit model number */ + if (start + 6 > end) { + return false; + } + + /* Validate and parse 4-digit model number */ + uint32_t model = 0; + for (uint32_t i = 2; i < 6; i++) { + const uint32_t digit = (uint32_t) (uint8_t) start[i] - '0'; + if (digit >= 10) { + /* Not really a digit */ + return false; + } + model = model * 10 + digit; + } + + /* Write parsed chipset */ + *chipset = (struct cpuinfo_arm_chipset) { + .vendor = cpuinfo_arm_chipset_vendor_spreadtrum, + .series = cpuinfo_arm_chipset_series_spreadtrum_sc, + .model = model, + }; + + /* Validate and copy suffix letters. If suffix is too long, truncate at CPUINFO_ARM_CHIPSET_SUFFIX_MAX letters. */ + const char* suffix = start + 6; + for (size_t i = 0; i < CPUINFO_ARM_CHIPSET_SUFFIX_MAX; i++) { + if (suffix + i == end) { + break; + } + + const char c = suffix[i]; + if (!is_ascii_alphabetic(c)) { + /* Invalid suffix character */ + return false; + } + /* Convert suffix letter to uppercase */ + chipset->suffix[i] = c & '\xDF'; + } + return true; +} + +/** + * Tries to match /lc\d{4}[a-z]?$/ signature for Leadcore LC chipsets. + * If match successful, extracts model information into \p chipset argument. + * + * @param start - start of the platform identifier (ro.product.board or ro.board.platform) to match. + * @param end - end of the platform identifier (ro.product.board or ro.board.platform) to match. + * @param[out] chipset - location where chipset information will be stored upon a successful match. + * + * @returns true if signature matched, false otherwise. + */ +static bool match_lc( + const char* start, const char* end, + struct cpuinfo_arm_chipset chipset[restrict static 1]) +{ + /* Expect at 6-7 symbols: "lc" (2 symbols) + 4-digit model number + optional 1-letter suffix */ + const size_t length = end - start; + switch (length) { + case 6: + case 7: + break; + default: + return false; + } + + /* Check that string starts with "lc". The first two characters are loaded as 16-bit little endian word */ + const uint16_t expected_lc = load_u16le(start); + if (expected_lc != UINT16_C(0x636C) /* "cl" = reverse("lc") */) { + return false; + } + + /* Validate and parse 4-digit model number */ + uint32_t model = 0; + for (uint32_t i = 2; i < 6; i++) { + const uint32_t digit = (uint32_t) (uint8_t) start[i] - '0'; + if (digit >= 10) { + /* Not really a digit */ + return false; + } + model = model * 10 + digit; + } + + /* Parse optional suffix letter */ + char suffix = 0; + if (length == 7) { + const char c = start[6]; + if (is_ascii_alphabetic(c)) { + /* Convert to uppercase */ + chipset->suffix[0] = c & '\xDF'; + } else { + /* Invalid suffix character */ + return false; + } + } + + /* Return parsed chipset */ + *chipset = (struct cpuinfo_arm_chipset) { + .vendor = cpuinfo_arm_chipset_vendor_leadcore, + .series = cpuinfo_arm_chipset_series_leadcore_lc, + .model = model, + .suffix = { + [0] = suffix, + }, + }; + return true; +} + +/** + * Tries to match /PXA(\d{3,4}|1L88)$/ signature for Marvell PXA chipsets. + * If match successful, extracts model information into \p chipset argument. + * + * @param start - start of the platform identifier (/proc/cpuinfo Hardware string, ro.product.board or ro.chipname) + * to match. + * @param end - end of the platform identifier (/proc/cpuinfo Hardaware string, ro.product.board or ro.chipname) to + * match. + * @param[out] chipset - location where chipset information will be stored upon a successful match. + * + * @returns true if signature matched, false otherwise. + */ +static bool match_pxa( + const char* start, const char* end, + struct cpuinfo_arm_chipset chipset[restrict static 1]) +{ + /* Expect 6-7 symbols: "PXA" (3 symbols) + 3-4 digit model number */ + const size_t length = end - start; + switch (length) { + case 6: + case 7: + break; + default: + return false; + } + + /* Check that the string starts with "PXA". Symbols 1-3 are loaded and compared as little-endian 16-bit word. */ + if (start[0] != 'P') { + return false; + } + const uint16_t expected_xa = load_u16le(start + 1); + if (expected_xa != UINT16_C(0x4158) /* "AX" = reverse("XA") */) { + return false; + } + + uint32_t model = 0; + + + /* Check for a very common typo: "PXA1L88" for "PXA1088" */ + if (length == 7) { + /* Load 4 model "number" symbols as a little endian 32-bit word and compare to "1L88" */ + const uint32_t expected_1L88 = load_u32le(start + 3); + if (expected_1L88 == UINT32_C(0x38384C31) /* "88L1" = reverse("1L88") */) { + model = 1088; + goto write_chipset; + } + } + + /* Check and parse 3-4 digit model number */ + for (uint32_t i = 3; i < length; i++) { + const uint32_t digit = (uint32_t) (uint8_t) start[i] - '0'; + if (digit >= 10) { + /* Not really a digit */ + return false; + } + model = model * 10 + digit; + } + + /* Return parsed chipset. */ +write_chipset: + *chipset = (struct cpuinfo_arm_chipset) { + .vendor = cpuinfo_arm_chipset_vendor_marvell, + .series = cpuinfo_arm_chipset_series_marvell_pxa, + .model = model, + }; + return true; +} + +/** + * Tries to match /BCM\d{4}$/ signature for Broadcom BCM chipsets. + * If match successful, extracts model information into \p chipset argument. + * + * @param start - start of the /proc/cpuinfo Hardware string to match. + * @param end - end of the /proc/cpuinfo Hardware string to match. + * @param[out] chipset - location where chipset information will be stored upon a successful match. + * + * @returns true if signature matched, false otherwise. + */ +static bool match_bcm( + const char* start, const char* end, + struct cpuinfo_arm_chipset chipset[restrict static 1]) +{ + /* Expect exactly 7 symbols: "BCM" (3 symbols) + 4-digit model number */ + if (start + 7 != end) { + return false; + } + + /* Check that the string starts with "BCM". + * The first three characters are loaded and compared as a 24-bit little endian word. + */ + const uint32_t expected_bcm = load_u24le(start); + if (expected_bcm != UINT32_C(0x004D4342) /* "MCB" = reverse("BCM") */) { + return false; + } + + /* Validate and parse 4-digit model number */ + uint32_t model = 0; + for (uint32_t i = 3; i < 7; i++) { + const uint32_t digit = (uint32_t) (uint8_t) start[i] - '0'; + if (digit >= 10) { + /* Not really a digit */ + return false; + } + model = model * 10 + digit; + } + + /* Return parsed chipset. */ + *chipset = (struct cpuinfo_arm_chipset) { + .vendor = cpuinfo_arm_chipset_vendor_broadcom, + .series = cpuinfo_arm_chipset_series_broadcom_bcm, + .model = model, + }; + return true; +} + +/** + * Tries to match /OMAP\d{4}$/ signature for Texas Instruments OMAP chipsets. + * If match successful, extracts model information into \p chipset argument. + * + * @param start - start of the /proc/cpuinfo Hardware string to match. + * @param end - end of the /proc/cpuinfo Hardware string to match. + * @param[out] chipset - location where chipset information will be stored upon a successful match. + * + * @returns true if signature matched, false otherwise. + */ +static bool match_omap( + const char* start, const char* end, + struct cpuinfo_arm_chipset chipset[restrict static 1]) +{ + /* Expect exactly 8 symbols: "OMAP" (4 symbols) + 4-digit model number */ + if (start + 8 != end) { + return false; + } + + /* Check that the string starts with "OMAP". Symbols 0-4 are loaded and compared as little-endian 32-bit word. */ + const uint32_t expected_omap = load_u32le(start); + if (expected_omap != UINT32_C(0x50414D4F) /* "PAMO" = reverse("OMAP") */) { + return false; + } + + /* Validate and parse 4-digit model number */ + uint32_t model = 0; + for (uint32_t i = 4; i < 8; i++) { + const uint32_t digit = (uint32_t) (uint8_t) start[i] - '0'; + if (digit >= 10) { + /* Not really a digit */ + return false; + } + model = model * 10 + digit; + } + + /* Return parsed chipset. */ + *chipset = (struct cpuinfo_arm_chipset) { + .vendor = cpuinfo_arm_chipset_vendor_texas_instruments, + .series = cpuinfo_arm_chipset_series_texas_instruments_omap, + .model = model, + }; + return true; +} + +/** + * Compares platform identifier string to known values for Broadcom chipsets. + * If the string matches one of the known values, the function decodes Broadcom chipset from frequency and number of + * cores into \p chipset argument. + * + * @param start - start of the platform identifier (ro.product.board or ro.board.platform) to match. + * @param end - end of the platform identifier (ro.product.board or ro.board.platform) to match. + * @param cores - number of cores in the chipset. + * @param max_cpu_freq_max - maximum of /sys/devices/system/cpu/cpu/cpofreq/cpu_freq_max values. + * @param[out] chipset - location where chipset information will be stored upon a successful match and decoding. + * + * @returns true if signature matched (even if exact model can't be decoded), false otherwise. + */ +static bool match_and_parse_broadcom( + const char* start, const char* end, uint32_t cores, uint32_t max_cpu_freq_max, + struct cpuinfo_arm_chipset chipset[restrict static 1]) +{ + /* Expect 4-6 symbols: "java" (4 symbols), "rhea" (4 symbols), "capri" (5 symbols), or "hawaii" (6 symbols) */ + const size_t length = end - start; + switch (length) { + case 4: + case 5: + case 6: + break; + default: + return false; + } + + /* + * Compare the platform identifier to known values for Broadcom chipsets: + * - "rhea" + * - "java" + * - "capri" + * - "hawaii" + * Upon a successful match, decode chipset name from frequency and number of cores. + */ + uint32_t model = 0; + char suffix = 0; + const uint32_t expected_platform = load_u32le(start); + switch (expected_platform) { + case UINT32_C(0x61656872): /* "aehr" = reverse("rhea") */ + if (length == 4) { + /* + * Detected "rhea" platform: + * - 1 core @ 849999 KHz -> BCM21654 + * - 1 core @ 999999 KHz -> BCM21654G + */ + if (cores == 1) { + model = 21654; + if (max_cpu_freq_max >= 999999) { + suffix = 'G'; + } + } + } + break; + case UINT32_C(0x6176616A): /* "avaj" = reverse("java") */ + if (length == 4) { + /* + * Detected "java" platform: + * - 4 cores -> BCM23550 + */ + if (cores == 4) { + model = 23550; + } + } + break; + case UINT32_C(0x61776168): /* "awah" = reverse("hawa") */ + if (length == 6) { + /* Check that string equals "hawaii" */ + const uint16_t expected_ii = load_u16le(start + 4); + if (expected_ii == UINT16_C(0x6969) /* "ii" */ ) { + /* + * Detected "hawaii" platform: + * - 1 core -> BCM21663 + * - 2 cores @ 999999 KHz -> BCM21664 + * - 2 cores @ 1200000 KHz -> BCM21664T + */ + switch (cores) { + case 1: + model = 21663; + break; + case 2: + model = 21664; + if (max_cpu_freq_max >= 1200000) { + suffix = 'T'; + } + break; + } + } + } + break; + case UINT32_C(0x72706163): /* "rpac" = reverse("capr") */ + if (length == 5) { + /* Check that string equals "capri" */ + if (start[4] == 'i') { + /* + * Detected "capri" platform: + * - 2 cores -> BCM28155 + */ + if (cores == 2) { + model = 28155; + } + } + } + break; + } + + if (model != 0) { + /* Chipset was successfully decoded */ + *chipset = (struct cpuinfo_arm_chipset) { + .vendor = cpuinfo_arm_chipset_vendor_broadcom, + .series = cpuinfo_arm_chipset_series_broadcom_bcm, + .model = model, + .suffix = { + [0] = suffix, + }, + }; + } + return model != 0; +} + +struct sunxi_map_entry { + uint8_t sunxi; + uint8_t cores; + uint8_t model; + char suffix; +}; + +static const struct sunxi_map_entry sunxi_map_entries[] = { +#if CPUINFO_ARCH_ARM + { + /* ("sun4i", 1) -> "A10" */ + .sunxi = 4, + .cores = 1, + .model = 10, + }, + { + /* ("sun5i", 1) -> "A13" */ + .sunxi = 5, + .cores = 1, + .model = 13, + }, + { + /* ("sun6i", 4) -> "A31" */ + .sunxi = 6, + .cores = 4, + .model = 31, + }, + { + /* ("sun7i", 2) -> "A20" */ + .sunxi = 7, + .cores = 2, + .model = 20, + + }, + { + /* ("sun8i", 2) -> "A23" */ + .sunxi = 8, + .cores = 2, + .model = 23, + }, + { + /* ("sun8i", 4) -> "A33" */ + .sunxi = 8, + .cores = 4, + .model = 33, + }, + { + /* ("sun8i", 8) -> "A83T" */ + .sunxi = 8, + .cores = 8, + .model = 83, + .suffix = 'T', + }, + { + /* ("sun9i", 8) -> "A80" */ + .sunxi = 9, + .cores = 8, + .model = 80, + }, +#endif /* CPUINFO_ARCH_ARM */ + { + /* ("sun50i", 4) -> "A64" */ + .sunxi = 50, + .cores = 4, + .model = 64, + }, +}; + +/** + * Tries to match /proc/cpuinfo Hardware string to Allwinner /sun\d+i/ signature. + * If the string matches signature, the function decodes Allwinner chipset from the number in the signature and the + * number of cores, and stores it in \p chipset argument. + * + * @param start - start of the /proc/cpuinfo Hardware string to match. + * @param end - end of the /proc/cpuinfo Hardware string to match. + * @param cores - number of cores in the chipset. + * @param[out] chipset - location where chipset information will be stored upon a successful match and decoding. + * + * @returns true if signature matched (even if exact model can't be decoded), false otherwise. + */ +static bool match_and_parse_sunxi( + const char* start, const char* end, uint32_t cores, + struct cpuinfo_arm_chipset chipset[restrict static 1]) +{ + /* Expect at least 5 symbols: "sun" (3 symbols) + platform id (1-2 digits) + "i" (1 symbol) */ + if (start + 5 > end) { + return false; + } + + /* Compare the first 3 characters to "sun" */ + if (start[0] != 's') { + return false; + } + const uint16_t expected_un = load_u16le(start + 1); + if (expected_un != UINT16_C(0x6E75) /* "nu" = reverse("un") */) { + return false; + } + + /* Check and parse the first (required) digit of the sunXi platform id */ + uint32_t sunxi_platform = 0; + { + const uint32_t digit = (uint32_t) (uint8_t) start[3] - '0'; + if (digit >= 10) { + /* Not really a digit */ + return false; + } + sunxi_platform = digit; + } + + /* Parse optional second digit of the sunXi platform id */ + const char* pos = start + 4; + { + const uint32_t digit = (uint32_t) (uint8_t) (*pos) - '0'; + if (digit < 10) { + sunxi_platform = sunxi_platform * 10 + digit; + if (++pos == end) { + /* Expected one more character, final 'i' letter */ + return false; + } + } + } + + /* Validate the final 'i' letter */ + if (*pos != 'i') { + return false; + } + + /* Compare sunXi platform id and number of cores to tabluted values to decode chipset name */ + uint32_t model = 0; + char suffix = 0; + for (size_t i = 0; i < CPUINFO_COUNT_OF(sunxi_map_entries); i++) { + if (sunxi_platform == sunxi_map_entries[i].sunxi && cores == sunxi_map_entries[i].cores) { + model = sunxi_map_entries[i].model; + suffix = sunxi_map_entries[i].suffix; + break; + } + } + + if (model == 0) { + cpuinfo_log_info("unrecognized %"PRIu32"-core Allwinner sun%"PRIu32" platform", cores, sunxi_platform); + } + /* Create chipset name from decoded data */ + *chipset = (struct cpuinfo_arm_chipset) { + .vendor = cpuinfo_arm_chipset_vendor_allwinner, + .series = cpuinfo_arm_chipset_series_allwinner_a, + .model = model, + .suffix = { + [0] = suffix, + }, + }; + return true; +} + +/** + * Compares /proc/cpuinfo Hardware string to "WMT" signature. + * If the string matches signature, the function decodes WonderMedia chipset from frequency and number of cores into + * \p chipset argument. + * + * @param start - start of the /proc/cpuinfo Hardware string to match. + * @param end - end of the /proc/cpuinfo Hardware string to match. + * @param cores - number of cores in the chipset. + * @param max_cpu_freq_max - maximum of /sys/devices/system/cpu/cpu/cpofreq/cpu_freq_max values. + * @param[out] chipset - location where chipset information will be stored upon a successful match and decoding. + * + * @returns true if signature matched (even if exact model can't be decoded), false otherwise. + */ +static bool match_and_parse_wmt( + const char* start, const char* end, uint32_t cores, uint32_t max_cpu_freq_max, + struct cpuinfo_arm_chipset chipset[restrict static 1]) +{ + /* Expected 3 symbols: "WMT" */ + if (start + 3 != end) { + return false; + } + + /* Compare string to "WMT" */ + if (start[0] != 'W') { + return false; + } + const uint16_t expected_mt = load_u16le(start + 1); + if (expected_mt != UINT16_C(0x544D) /* "TM" = reverse("MT") */) { + return false; + } + + /* Decode chipset name from frequency and number of cores */ + uint32_t model = 0; + switch (cores) { + case 1: + switch (max_cpu_freq_max) { + case 1008000: + /* 1 core @ 1008000 KHz -> WM8950 */ + model = 8950; + break; + case 1200000: + /* 1 core @ 1200000 KHz -> WM8850 */ + model = 8850; + break; + } + break; + case 2: + if (max_cpu_freq_max == 1500000) { + /* 2 cores @ 1500000 KHz -> WM8880 */ + model = 8880; + } + break; + } + + if (model == 0) { + cpuinfo_log_info("unrecognized WonderMedia platform with %"PRIu32" cores at %"PRIu32" KHz", + cores, max_cpu_freq_max); + } + *chipset = (struct cpuinfo_arm_chipset) { + .vendor = cpuinfo_arm_chipset_vendor_wondermedia, + .series = cpuinfo_arm_chipset_series_wondermedia_wm, + .model = model, + }; + return true; +} + +struct huawei_map_entry { + uint32_t platform; + uint32_t model; +}; + +static const struct huawei_map_entry huawei_platform_map[] = { + { + /* "ALP" -> Kirin 970 */ + .platform = UINT32_C(0x00504C41), /* "\0PLA" = reverse("ALP\0") */ + .model = 970, + }, + { + /* "BAC" -> Kirin 659 */ + .platform = UINT32_C(0x00434142), /* "\0CAB" = reverse("BAC\0") */ + .model = 659, + }, + { + /* "BLA" -> Kirin 970 */ + .platform = UINT32_C(0x00414C42), /* "\0ALB" = reverse("BLA\0") */ + .model = 970, + }, + { + /* "BKL" -> Kirin 970 */ + .platform = UINT32_C(0x004C4B42), /* "\0LKB" = reverse("BKL\0") */ + .model = 970, + }, + { + /* "CLT" -> Kirin 970 */ + .platform = UINT32_C(0x00544C43), /* "\0TLC" = reverse("CLT\0") */ + .model = 970, + }, + { + /* "COL" -> Kirin 970 */ + .platform = UINT32_C(0x004C4F43), /* "\0LOC" = reverse("COL\0") */ + .model = 970, + }, + { + /* "COR" -> Kirin 970 */ + .platform = UINT32_C(0x00524F43), /* "\0ROC" = reverse("COR\0") */ + .model = 970, + }, + { + /* "DUK" -> Kirin 960 */ + .platform = UINT32_C(0x004B5544), /* "\0KUD" = reverse("DUK\0") */ + .model = 960, + }, + { + /* "EML" -> Kirin 970 */ + .platform = UINT32_C(0x004C4D45), /* "\0LME" = reverse("EML\0") */ + .model = 970, + }, + { + /* "EVA" -> Kirin 955 */ + .platform = UINT32_C(0x00415645), /* "\0AVE" = reverse("EVA\0") */ + .model = 955, + }, + { + /* "FRD" -> Kirin 950 */ + .platform = UINT32_C(0x00445246), /* "\0DRF" = reverse("FRD\0") */ + .model = 950, + }, + { + /* "INE" -> Kirin 710 */ + .platform = UINT32_C(0x00454E49), /* "\0ENI" = reverse("INE\0") */ + .model = 710, + }, + { + /* "KNT" -> Kirin 950 */ + .platform = UINT32_C(0x00544E4B), /* "\0TNK" = reverse("KNT\0") */ + .model = 950, + }, + { + /* "LON" -> Kirin 960 */ + .platform = UINT32_C(0x004E4F4C), /* "\0NOL" = reverse("LON\0") */ + .model = 960, + }, + { + /* "LYA" -> Kirin 980 */ + .platform = UINT32_C(0x0041594C), /* "\0AYL" = reverse("LYA\0") */ + .model = 980, + }, + { + /* "MCN" -> Kirin 980 */ + .platform = UINT32_C(0x004E434D), /* "\0NCM" = reverse("MCN\0") */ + .model = 980, + }, + { + /* "MHA" -> Kirin 960 */ + .platform = UINT32_C(0x0041484D), /* "\0AHM" = reverse("MHA\0") */ + .model = 960, + }, + { + /* "NEO" -> Kirin 970 */ + .platform = UINT32_C(0x004F454E), /* "\0OEN" = reverse("NEO\0") */ + .model = 970, + }, + { + /* "NXT" -> Kirin 950 */ + .platform = UINT32_C(0x0054584E), /* "\0TXN" = reverse("NXT\0") */ + .model = 950, + }, + { + /* "PAN" -> Kirin 980 */ + .platform = UINT32_C(0x004E4150), /* "\0NAP" = reverse("PAN\0") */ + .model = 980, + }, + { + /* "PAR" -> Kirin 970 */ + .platform = UINT32_C(0x00524150), /* "\0RAP" = reverse("PAR\0") */ + .model = 970, + }, + { + /* "RVL" -> Kirin 970 */ + .platform = UINT32_C(0x004C5652), /* "\0LVR" = reverse("RVL\0") */ + .model = 970, + }, + { + /* "STF" -> Kirin 960 */ + .platform = UINT32_C(0x00465453), /* "\0FTS" = reverse("STF\0") */ + .model = 960, + }, + { + /* "SUE" -> Kirin 980 */ + .platform = UINT32_C(0x00455553), /* "\0EUS" = reverse("SUE\0") */ + .model = 980, + }, + { + /* "VIE" -> Kirin 955 */ + .platform = UINT32_C(0x00454956), /* "\0EIV" = reverse("VIE\0") */ + .model = 955, + }, + { + /* "VKY" -> Kirin 960 */ + .platform = UINT32_C(0x00594B56), /* "\0YKV" = reverse("VKY\0") */ + .model = 960, + }, + { + /* "VTR" -> Kirin 960 */ + .platform = UINT32_C(0x00525456), /* "\0RTV" = reverse("VTR\0") */ + .model = 960, + }, +}; + +/** + * Tries to match ro.product.board string to Huawei /([A-Z]{3})(\-[A-Z]?L\d{2})$/ signature where \1 is one of the + * known values for Huawei devices, which do not report chipset name elsewhere. + * If the string matches signature, the function decodes chipset (always HiSilicon Kirin for matched devices) from + * the Huawei platform ID in the signature and stores it in \p chipset argument. + * + * @param start - start of the ro.product.board string to match. + * @param end - end of the ro.product.board string to match. + * @param[out] chipset - location where chipset information will be stored upon a successful match and decoding. + * + * @returns true if signature matched, false otherwise. + */ +static bool match_and_parse_huawei( + const char* start, const char* end, + struct cpuinfo_arm_chipset chipset[restrict static 1]) +{ + /* + * Expect length of either 3, 7 or 8, exactly: + * - 3-letter platform identifier (see huawei_platform_map) + * - 3-letter platform identifier + '-' + 'L' + two digits + * - 3-letter platform identifier + '-' + capital letter + 'L' + two digits + */ + const size_t length = end - start; + switch (length) { + case 3: + case 7: + case 8: + break; + default: + return false; + } + + /* + * Try to find the first three-letter substring in among the tabulated entries for Huawei devices. + * The first three letters are loaded and compared as a little-endian 24-bit word. + */ + uint32_t model = 0; + const uint32_t target_platform_id = load_u24le(start); + for (uint32_t i = 0; i < CPUINFO_COUNT_OF(huawei_platform_map); i++) { + if (huawei_platform_map[i].platform == target_platform_id) { + model = huawei_platform_map[i].model; + break; + } + } + + if (model == 0) { + /* Platform does not match the tabulated Huawei entries */ + return false; + } + + if (length > 3) { + /* + * Check that: + * - The symbol after platform id is a dash + * - The symbol after it is an uppercase letter. For 7-symbol strings, the symbol is just 'L'. + */ + if (start[3] != '-' || !is_ascii_alphabetic_uppercase(start[4])) { + return false; + } + + /* Check that the last 3 entries are /L\d\d/ */ + if (end[-3] != 'L' || !is_ascii_numeric(end[-2]) || !is_ascii_numeric(end[-1])) { + return false; + } + } + + /* All checks succeeded, commit chipset name */ + *chipset = (struct cpuinfo_arm_chipset) { + .vendor = cpuinfo_arm_chipset_vendor_hisilicon, + .series = cpuinfo_arm_chipset_series_hisilicon_kirin, + .model = model, + }; + return true; +} + +/** + * Tries to match /tcc\d{3}x$/ signature for Telechips TCCXXXx chipsets. + * If match successful, extracts model information into \p chipset argument. + * + * @param start - start of the /proc/cpuinfo Hardware string to match. + * @param end - end of the /proc/cpuinfo Hardware string to match. + * @param[out] chipset - location where chipset information will be stored upon a successful match. + * + * @returns true if signature matched, false otherwise. + */ +static bool match_tcc( + const char* start, const char* end, + struct cpuinfo_arm_chipset chipset[restrict static 1]) +{ + /* Expect exactly 7 symbols: "tcc" (3 symbols) + 3-digit model number + fixed "x" suffix */ + if (start + 7 != end) { + return false; + } + + /* Quick check for the first character */ + if (start[0] != 't') { + return false; + } + + /* Load the next 2 bytes as little endian 16-bit word */ + const uint16_t expected_cc = load_u16le(start + 1); + if (expected_cc != UINT16_C(0x6363) /* "cc" */ ) { + return false; + } + + /* Check and parse 3-digit model number */ + uint32_t model = 0; + for (uint32_t i = 3; i < 6; i++) { + const uint32_t digit = (uint32_t) (uint8_t) start[i] - '0'; + if (digit >= 10) { + /* Not really a digit */ + return false; + } + model = model * 10 + digit; + } + + /* Check the fixed 'x' suffix in the end */ + if (start[6] != 'x') { + return false; + } + + /* Commit parsed chipset. */ + *chipset = (struct cpuinfo_arm_chipset) { + .vendor = cpuinfo_arm_chipset_vendor_telechips, + .series = cpuinfo_arm_chipset_series_telechips_tcc, + .model = model, + .suffix = { + [0] = 'X' + }, + }; + return true; +} + +/* + * Compares ro.board.platform string to Nvidia Tegra signatures ("tegra" and "tegra3") + * This check has effect on how /proc/cpuinfo Hardware string is interpreted. + * + * @param start - start of the ro.board.platform string to check. + * @param end - end of the ro.board.platform string to check. + * + * @returns true if the string matches an Nvidia Tegra signature, and false otherwise + */ +static bool is_tegra(const char* start, const char* end) { + /* Expect 5 ("tegra") or 6 ("tegra3") symbols */ + const size_t length = end - start; + switch (length) { + case 5: + case 6: + break; + default: + return false; + } + + /* Check that the first 5 characters match "tegra" */ + if (start[0] != 't') { + return false; + } + const uint32_t expected_egra = load_u32le(start + 1); + if (expected_egra != UINT32_C(0x61726765) /* "arge" = reverse("egra") */) { + return false; + } + + /* Check if the string is either "tegra" (length = 5) or "tegra3" (length != 5) and last character is '3' */ + return (length == 5 || start[5] == '3'); +} + +struct special_map_entry { + const char* platform; + uint16_t model; + uint8_t series; + char suffix; +}; + +static const struct special_map_entry special_hardware_map_entries[] = { +#if CPUINFO_ARCH_ARM + { + /* "k3v2oem1" -> HiSilicon K3V2 */ + .platform = "k3v2oem1", + .series = cpuinfo_arm_chipset_series_hisilicon_k3v, + .model = 2, + }, + { + /* "hi6620oem" -> HiSilicon Kirin 910T */ + .platform = "hi6620oem", + .series = cpuinfo_arm_chipset_series_hisilicon_kirin, + .model = 910, + .suffix = 'T' + }, +#endif /* CPUINFO_ARCH_ARM */ + { + /* "hi6250" -> HiSilicon Kirin 650 */ + .platform = "hi6250", + .series = cpuinfo_arm_chipset_series_hisilicon_kirin, + .model = 650, + }, + { + /* "hi6210sft" -> HiSilicon Kirin 620 */ + .platform = "hi6210sft", + .series = cpuinfo_arm_chipset_series_hisilicon_kirin, + .model = 620, + }, + { + /* "hi3751" -> HiSilicon Hi3751 */ + .platform = "hi3751", + .series = cpuinfo_arm_chipset_series_hisilicon_hi, + .model = 3751, + }, +#if CPUINFO_ARCH_ARM + { + /* "hi3630" -> HiSilicon Kirin 920 */ + .platform = "hi3630", + .series = cpuinfo_arm_chipset_series_hisilicon_kirin, + .model = 920, + }, +#endif /* CPUINFO_ARCH_ARM */ + { + /* "hi3635" -> HiSilicon Kirin 930 */ + .platform = "hi3635", + .series = cpuinfo_arm_chipset_series_hisilicon_kirin, + .model = 930, + }, +#if CPUINFO_ARCH_ARM + { + /* "gs702a" -> Actions ATM7029 (Cortex-A5 + GC1000) */ + .platform = "gs702a", + .series = cpuinfo_arm_chipset_series_actions_atm, + .model = 7029, + }, + { + /* "gs702c" -> Actions ATM7029B (Cortex-A5 + SGX540) */ + .platform = "gs702c", + .series = cpuinfo_arm_chipset_series_actions_atm, + .model = 7029, + .suffix = 'B', + }, + { + /* "gs703d" -> Actions ATM7039S */ + .platform = "gs703d", + .series = cpuinfo_arm_chipset_series_actions_atm, + .model = 7039, + .suffix = 'S', + }, + { + /* "gs705a" -> Actions ATM7059A */ + .platform = "gs705a", + .series = cpuinfo_arm_chipset_series_actions_atm, + .model = 7059, + .suffix = 'A', + }, + { + /* "Amlogic Meson8" -> Amlogic S812 */ + .platform = "Amlogic Meson8", + .series = cpuinfo_arm_chipset_series_amlogic_s, + .model = 812, + }, + { + /* "Amlogic Meson8B" -> Amlogic S805 */ + .platform = "Amlogic Meson8B", + .series = cpuinfo_arm_chipset_series_amlogic_s, + .model = 805, + }, + { + /* "mapphone_CDMA" -> Texas Instruments OMAP4430 */ + .platform = "mapphone_CDMA", + .series = cpuinfo_arm_chipset_series_texas_instruments_omap, + .model = 4430, + }, + { + /* "Superior" -> Texas Instruments OMAP4470 */ + .platform = "Superior", + .series = cpuinfo_arm_chipset_series_texas_instruments_omap, + .model = 4470, + }, + { + /* "Tuna" (Samsung Galaxy Nexus) -> Texas Instruments OMAP4460 */ + .platform = "Tuna", + .series = cpuinfo_arm_chipset_series_texas_instruments_omap, + .model = 4460, + }, + { + /* "Manta" (Samsung Nexus 10) -> Samsung Exynos 5250 */ + .platform = "Manta", + .series = cpuinfo_arm_chipset_series_samsung_exynos, + .model = 5250, + }, + { + /* "Odin" -> LG Nuclun 7111 */ + .platform = "Odin", + .series = cpuinfo_arm_chipset_series_lg_nuclun, + .model = 7111, + }, + { + /* "Madison" -> MStar 6A338 */ + .platform = "Madison", + .series = cpuinfo_arm_chipset_series_mstar_6a, + .model = 338, + }, +#endif /* CPUINFO_ARCH_ARM */ +}; + +static const struct special_map_entry tegra_hardware_map_entries[] = { +#if CPUINFO_ARCH_ARM + { + /* "cardhu" (Nvidia Cardhu developer tablet) -> Tegra T30 */ + .platform = "cardhu", + .series = cpuinfo_arm_chipset_series_nvidia_tegra_t, + .model = 30, + }, + { + /* "kai" -> Tegra T30L */ + .platform = "kai", + .series = cpuinfo_arm_chipset_series_nvidia_tegra_t, + .model = 30, + .suffix = 'L', + }, + { + /* "p3" (Samsung Galaxy Tab 8.9) -> Tegra T20 */ + .platform = "p3", + .series = cpuinfo_arm_chipset_series_nvidia_tegra_t, + .model = 20, + }, + { + /* "n1" (Samsung Galaxy R / Samsung Captivate Glide) -> Tegra AP20H */ + .platform = "n1", + .series = cpuinfo_arm_chipset_series_nvidia_tegra_ap, + .model = 20, + .suffix = 'H', + }, + { + /* "SHW-M380S" (Samsung Galaxy Tab 10.1) -> Tegra T20 */ + .platform = "SHW-M380S", + .series = cpuinfo_arm_chipset_series_nvidia_tegra_t, + .model = 20, + }, + { + /* "m470" (Hisense Sero 7 Pro) -> Tegra T30L */ + .platform = "m470", + .series = cpuinfo_arm_chipset_series_nvidia_tegra_t, + .model = 30, + .suffix = 'L', + }, + { + /* "endeavoru" (HTC One X) -> Tegra AP33 */ + .platform = "endeavoru", + .series = cpuinfo_arm_chipset_series_nvidia_tegra_ap, + .model = 33, + }, + { + /* "evitareul" (HTC One X+) -> Tegra T33 */ + .platform = "evitareul", + .series = cpuinfo_arm_chipset_series_nvidia_tegra_t, + .model = 33, + }, + { + /* "enrc2b" (HTC One X+) -> Tegra T33 */ + .platform = "enrc2b", + .series = cpuinfo_arm_chipset_series_nvidia_tegra_t, + .model = 33, + }, + { + /* "mozart" (Asus Transformer Pad TF701T) -> Tegra T114 */ + .platform = "mozart", + .series = cpuinfo_arm_chipset_series_nvidia_tegra_t, + .model = 114, + }, + { + /* "tegratab" (Tegra Note 7) -> Tegra T114 */ + .platform = "tegratab", + .series = cpuinfo_arm_chipset_series_nvidia_tegra_t, + .model = 114, + }, + { + /* "tn8" (Nvidia Shield Tablet K1) -> Tegra T124 */ + .platform = "tn8", + .series = cpuinfo_arm_chipset_series_nvidia_tegra_t, + .model = 124, + }, + { + /* "roth" (Nvidia Shield Portable) -> Tegra T114 */ + .platform = "roth", + .series = cpuinfo_arm_chipset_series_nvidia_tegra_t, + .model = 114, + }, + { + /* "pisces" (Xiaomi Mi 3) -> Tegra T114 */ + .platform = "pisces", + .series = cpuinfo_arm_chipset_series_nvidia_tegra_t, + .model = 114, + }, + { + /* "mocha" (Xiaomi Mi Pad) -> Tegra T124 */ + .platform = "mocha", + .series = cpuinfo_arm_chipset_series_nvidia_tegra_t, + .model = 124, + }, + { + /* "stingray" (Motorola XOOM) -> Tegra AP20H */ + .platform = "stingray", + .series = cpuinfo_arm_chipset_series_nvidia_tegra_ap, + .model = 20, + .suffix = 'H', + }, + { + /* "Ceres" (Wiko Highway 4G) -> Tegra SL460N */ + .platform = "Ceres", + .series = cpuinfo_arm_chipset_series_nvidia_tegra_sl, + .model = 460, + .suffix = 'N', + }, + { + /* "MT799" (nabi 2 Tablet) -> Tegra T30 */ + .platform = "MT799", + .series = cpuinfo_arm_chipset_series_nvidia_tegra_t, + .model = 30, + }, + { + /* "t8400n" (nabi DreamTab HD8) -> Tegra T114 */ + .platform = "t8400n", + .series = cpuinfo_arm_chipset_series_nvidia_tegra_t, + .model = 114, + }, + { + /* "chagall" (Fujitsu Stylistic M532) -> Tegra T30 */ + .platform = "chagall", + .series = cpuinfo_arm_chipset_series_nvidia_tegra_t, + .model = 30, + }, + { + /* "ventana" (Asus Transformer TF101) -> Tegra T20 */ + .platform = "ventana", + .series = cpuinfo_arm_chipset_series_nvidia_tegra_t, + .model = 20, + }, + { + /* "bobsleigh" (Fujitsu Arrows Tab F-05E) -> Tegra T33 */ + .platform = "bobsleigh", + .series = cpuinfo_arm_chipset_series_nvidia_tegra_t, + .model = 33, + }, + { + /* "tegra_fjdev101" (Fujitsu Arrows X F-10D) -> Tegra AP33 */ + .platform = "tegra_fjdev101", + .series = cpuinfo_arm_chipset_series_nvidia_tegra_ap, + .model = 33, + }, + { + /* "tegra_fjdev103" (Fujitsu Arrows V F-04E) -> Tegra T33 */ + .platform = "tegra_fjdev103", + .series = cpuinfo_arm_chipset_series_nvidia_tegra_t, + .model = 33, + }, + { + /* "nbx03" (Sony Tablet S) -> Tegra T20 */ + .platform = "nbx03", + .series = cpuinfo_arm_chipset_series_nvidia_tegra_t, + .model = 20, + }, + { + /* "txs03" (Sony Xperia Tablet S) -> Tegra T30L */ + .platform = "txs03", + .series = cpuinfo_arm_chipset_series_nvidia_tegra_t, + .model = 30, + .suffix = 'L', + }, + { + /* "x3" (LG Optimus 4X HD P880) -> Tegra AP33 */ + .platform = "x3", + .series = cpuinfo_arm_chipset_series_nvidia_tegra_ap, + .model = 33, + }, + { + /* "vu10" (LG Optimus Vu P895) -> Tegra AP33 */ + .platform = "vu10", + .series = cpuinfo_arm_chipset_series_nvidia_tegra_ap, + .model = 33, + }, + { + /* "BIRCH" (HP Slate 7 Plus) -> Tegra T30L */ + .platform = "BIRCH", + .series = cpuinfo_arm_chipset_series_nvidia_tegra_t, + .model = 30, + .suffix = 'L', + }, + { + /* "macallan" (HP Slate 8 Pro) -> Tegra T114 */ + .platform = "macallan", + .series = cpuinfo_arm_chipset_series_nvidia_tegra_t, + .model = 114, + }, + { + /* "maya" (HP SlateBook 10 x2) -> Tegra T114 */ + .platform = "maya", + .series = cpuinfo_arm_chipset_series_nvidia_tegra_t, + .model = 114, + }, + { + /* "antares" (Toshiba AT100) -> Tegra T20 */ + .platform = "antares", + .series = cpuinfo_arm_chipset_series_nvidia_tegra_t, + .model = 20, + }, + { + /* "tostab12AL" (Toshiba AT300SE "Excite 10 SE") -> Tegra T30L */ + .platform = "tostab12AL", + .series = cpuinfo_arm_chipset_series_nvidia_tegra_t, + .model = 30, + .suffix = 'L', + }, + { + /* "tostab12BL" (Toshiba AT10-A "Excite Pure") -> Tegra T30L */ + .platform = "tostab12BL", + .series = cpuinfo_arm_chipset_series_nvidia_tegra_t, + .model = 30, + .suffix = 'L', + }, + { + /* "sphinx" (Toshiba AT270 "Excite 7.7") -> Tegra T30 */ + .platform = "sphinx", + .series = cpuinfo_arm_chipset_series_nvidia_tegra_t, + .model = 30, + }, + { + /* "tostab11BS" (Toshiba AT570 "Regza 7.7") -> Tegra T30 */ + .platform = "tostab11BS", + .series = cpuinfo_arm_chipset_series_nvidia_tegra_t, + .model = 30, + }, + { + /* "tostab12BA" (Toshiba AT10-LE-A "Excite Pro") -> Tegra T114 */ + .platform = "tostab12BA", + .series = cpuinfo_arm_chipset_series_nvidia_tegra_t, + .model = 114, + }, + { + /* "vangogh" (Acer Iconia Tab A100) -> Tegra T20 */ + .platform = "vangogh", + .series = cpuinfo_arm_chipset_series_nvidia_tegra_t, + .model = 20, + }, + { + /* "a110" (Acer Iconia Tab A110) -> Tegra T30L */ + .platform = "a110", + .series = cpuinfo_arm_chipset_series_nvidia_tegra_t, + .model = 30, + .suffix = 'L', + }, + { + /* "picasso_e" (Acer Iconia Tab A200) -> Tegra AP20H */ + .platform = "picasso_e", + .series = cpuinfo_arm_chipset_series_nvidia_tegra_ap, + .model = 20, + .suffix = 'H', + }, + { + /* "picasso_e2" (Acer Iconia Tab A210) -> Tegra T30L */ + .platform = "picasso_e2", + .series = cpuinfo_arm_chipset_series_nvidia_tegra_t, + .model = 30, + .suffix = 'L', + }, + { + /* "picasso" (Acer Iconia Tab A500) -> Tegra AP20H */ + .platform = "picasso", + .series = cpuinfo_arm_chipset_series_nvidia_tegra_ap, + .model = 20, + .suffix = 'H', + }, + { + /* "picasso_m" (Acer Iconia Tab A510) -> Tegra T30 */ + .platform = "picasso_m", + .series = cpuinfo_arm_chipset_series_nvidia_tegra_t, + .model = 30, + }, + { + /* "picasso_mf" (Acer Iconia Tab A700) -> Tegra T30 */ + .platform = "picasso_mf", + .series = cpuinfo_arm_chipset_series_nvidia_tegra_t, + .model = 30, + }, + { + /* "avalon" (Toshiba AT300 "Excite 10") -> Tegra T30L */ + .platform = "avalon", + .series = cpuinfo_arm_chipset_series_nvidia_tegra_t, + .model = 30, + .suffix = 'L', + }, + { + /* "NS_14T004" (iRiver NS-14T004) -> Tegra T30L */ + .platform = "NS_14T004", + .series = cpuinfo_arm_chipset_series_nvidia_tegra_t, + .model = 30, + .suffix = 'L', + }, + { + /* "WIKIPAD" (Wikipad) -> Tegra T30 */ + .platform = "WIKIPAD", + .series = cpuinfo_arm_chipset_series_nvidia_tegra_t, + .model = 30, + }, + { + /* "kb" (Pegatron Q00Q) -> Tegra T114 */ + .platform = "kb", + .series = cpuinfo_arm_chipset_series_nvidia_tegra_t, + .model = 114, + }, +#endif /* CPUINFO_ARCH_ARM */ + { + /* "foster_e" (Nvidia Shield TV, Flash) -> Tegra T210 */ + .platform = "foster_e", + .series = cpuinfo_arm_chipset_series_nvidia_tegra_t, + .model = 210, + }, + { + /* "foster_e_hdd" (Nvidia Shield TV, HDD) -> Tegra T210 */ + .platform = "foster_e_hdd", + .series = cpuinfo_arm_chipset_series_nvidia_tegra_t, + .model = 210, + }, + { + /* "darcy" (Nvidia Shield TV 2017) -> Tegra T210 */ + .platform = "darcy", + .series = cpuinfo_arm_chipset_series_nvidia_tegra_t, + .model = 210, + }, +}; + +/* + * Decodes chipset name from /proc/cpuinfo Hardware string. + * For some chipsets, the function relies frequency and on number of cores for chipset detection. + * + * @param[in] platform - /proc/cpuinfo Hardware string. + * @param cores - number of cores in the chipset. + * @param max_cpu_freq_max - maximum of /sys/devices/system/cpu/cpu/cpofreq/cpu_freq_max values. + * + * @returns Decoded chipset name. If chipset could not be decoded, the resulting structure would use `unknown` vendor + * and series identifiers. + */ +struct cpuinfo_arm_chipset cpuinfo_arm_linux_decode_chipset_from_proc_cpuinfo_hardware( + const char hardware[restrict static CPUINFO_HARDWARE_VALUE_MAX], + uint32_t cores, uint32_t max_cpu_freq_max, bool is_tegra) +{ + struct cpuinfo_arm_chipset chipset; + const size_t hardware_length = strnlen(hardware, CPUINFO_HARDWARE_VALUE_MAX); + const char* hardware_end = hardware + hardware_length; + + if (is_tegra) { + /* + * Nvidia Tegra-specific path: compare /proc/cpuinfo Hardware string to + * tabulated Hardware values for popular chipsets/devices with Tegra chipsets. + * This path is only used when ro.board.platform indicates a Tegra chipset + * (albeit does not indicate which exactly Tegra chipset). + */ + for (size_t i = 0; i < CPUINFO_COUNT_OF(tegra_hardware_map_entries); i++) { + if (strncmp(tegra_hardware_map_entries[i].platform, hardware, hardware_length) == 0 && + tegra_hardware_map_entries[i].platform[hardware_length] == 0) + { + cpuinfo_log_debug( + "found /proc/cpuinfo Hardware string \"%.*s\" in Nvidia Tegra chipset table", + (int) hardware_length, hardware); + /* Create chipset name from entry */ + return (struct cpuinfo_arm_chipset) { + .vendor = chipset_series_vendor[tegra_hardware_map_entries[i].series], + .series = (enum cpuinfo_arm_chipset_series) tegra_hardware_map_entries[i].series, + .model = tegra_hardware_map_entries[i].model, + .suffix = { + [0] = tegra_hardware_map_entries[i].suffix, + }, + }; + } + } + } else { + /* Generic path: consider all other vendors */ + + bool word_start = true; + for (const char* pos = hardware; pos != hardware_end; pos++) { + const char c = *pos; + switch (c) { + case ' ': + case '\t': + case ',': + word_start = true; + break; + default: + if (word_start && is_ascii_alphabetic(c)) { + /* Check Qualcomm MSM/APQ signature */ + if (match_msm_apq(pos, hardware_end, &chipset)) { + cpuinfo_log_debug( + "matched Qualcomm MSM/APQ signature in /proc/cpuinfo Hardware string \"%.*s\"", + (int) hardware_length, hardware); + return chipset; + } + + /* Check SDMxxx (Qualcomm Snapdragon) signature */ + if (match_sdm(pos, hardware_end, &chipset)) { + cpuinfo_log_debug( + "matched Qualcomm SDM signature in /proc/cpuinfo Hardware string \"%.*s\"", + (int) hardware_length, hardware); + return chipset; + } + + /* Check SMxxxx (Qualcomm Snapdragon) signature */ + if (match_sm(pos, hardware_end, &chipset)) { + cpuinfo_log_debug( + "matched Qualcomm SM signature in /proc/cpuinfo Hardware string \"%.*s\"", + (int) hardware_length, hardware); + return chipset; + } + + /* Check MediaTek MT signature */ + if (match_mt(pos, hardware_end, true, &chipset)) { + cpuinfo_log_debug( + "matched MediaTek MT signature in /proc/cpuinfo Hardware string \"%.*s\"", + (int) hardware_length, hardware); + return chipset; + } + + /* Check HiSilicon Kirin signature */ + if (match_kirin(pos, hardware_end, &chipset)) { + cpuinfo_log_debug( + "matched HiSilicon Kirin signature in /proc/cpuinfo Hardware string \"%.*s\"", + (int) hardware_length, hardware); + return chipset; + } + + /* Check Rockchip RK signature */ + if (match_rk(pos, hardware_end, &chipset)) { + cpuinfo_log_debug( + "matched Rockchip RK signature in /proc/cpuinfo Hardware string \"%.*s\"", + (int) hardware_length, hardware); + return chipset; + } + } + word_start = false; + break; + } + } + + /* Check Samsung Exynos signature */ + if (match_samsung_exynos(hardware, hardware_end, &chipset)) { + cpuinfo_log_debug( + "matched Samsung Exynos signature in /proc/cpuinfo Hardware string \"%.*s\"", + (int) hardware_length, hardware); + return chipset; + } + + /* Check universalXXXX (Samsung Exynos) signature */ + if (match_universal(hardware, hardware_end, &chipset)) { + cpuinfo_log_debug( + "matched UNIVERSAL (Samsung Exynos) signature in /proc/cpuinfo Hardware string \"%.*s\"", + (int) hardware_length, hardware); + return chipset; + } + + #if CPUINFO_ARCH_ARM + /* Match /SMDK(4410|4x12)$/ */ + if (match_and_parse_smdk(hardware, hardware_end, cores, &chipset)) { + cpuinfo_log_debug( + "matched SMDK (Samsung Exynos) signature in /proc/cpuinfo Hardware string \"%.*s\"", + (int) hardware_length, hardware); + return chipset; + } + #endif + + /* Check Spreadtrum SC signature */ + if (match_sc(hardware, hardware_end, &chipset)) { + cpuinfo_log_debug( + "matched Spreadtrum SC signature in /proc/cpuinfo Hardware string \"%.*s\"", + (int) hardware_length, hardware); + return chipset; + } + + #if CPUINFO_ARCH_ARM + /* Check Marvell PXA signature */ + if (match_pxa(hardware, hardware_end, &chipset)) { + cpuinfo_log_debug( + "matched Marvell PXA signature in /proc/cpuinfo Hardware string \"%.*s\"", + (int) hardware_length, hardware); + return chipset; + } + #endif + + /* Match /sun\d+i/ signature and map to Allwinner chipset name */ + if (match_and_parse_sunxi(hardware, hardware_end, cores, &chipset)) { + cpuinfo_log_debug( + "matched sunxi (Allwinner Ax) signature in /proc/cpuinfo Hardware string \"%.*s\"", + (int) hardware_length, hardware); + return chipset; + } + + /* Check Broadcom BCM signature */ + if (match_bcm(hardware, hardware_end, &chipset)) { + cpuinfo_log_debug( + "matched Broadcom BCM signature in /proc/cpuinfo Hardware string \"%.*s\"", + (int) hardware_length, hardware); + return chipset; + } + + #if CPUINFO_ARCH_ARM + /* Check Texas Instruments OMAP signature */ + if (match_omap(hardware, hardware_end, &chipset)) { + cpuinfo_log_debug( + "matched Texas Instruments OMAP signature in /proc/cpuinfo Hardware string \"%.*s\"", + (int) hardware_length, hardware); + return chipset; + } + + /* Check WonderMedia WMT signature and decode chipset from frequency and number of cores */ + if (match_and_parse_wmt(hardware, hardware_end, cores, max_cpu_freq_max, &chipset)) { + cpuinfo_log_debug( + "matched WonderMedia WMT signature in /proc/cpuinfo Hardware string \"%.*s\"", + (int) hardware_length, hardware); + return chipset; + } + + #endif + + /* Check Telechips TCC signature */ + if (match_tcc(hardware, hardware_end, &chipset)) { + cpuinfo_log_debug( + "matched Telechips TCC signature in /proc/cpuinfo Hardware string \"%.*s\"", + (int) hardware_length, hardware); + return chipset; + } + + /* Compare to tabulated Hardware values for popular chipsets/devices which can't be otherwise detected */ + for (size_t i = 0; i < CPUINFO_COUNT_OF(special_hardware_map_entries); i++) { + if (strncmp(special_hardware_map_entries[i].platform, hardware, hardware_length) == 0 && + special_hardware_map_entries[i].platform[hardware_length] == 0) + { + cpuinfo_log_debug( + "found /proc/cpuinfo Hardware string \"%.*s\" in special chipset table", + (int) hardware_length, hardware); + /* Create chipset name from entry */ + return (struct cpuinfo_arm_chipset) { + .vendor = chipset_series_vendor[special_hardware_map_entries[i].series], + .series = (enum cpuinfo_arm_chipset_series) special_hardware_map_entries[i].series, + .model = special_hardware_map_entries[i].model, + .suffix = { + [0] = special_hardware_map_entries[i].suffix, + }, + }; + } + } + } + + return (struct cpuinfo_arm_chipset) { + .vendor = cpuinfo_arm_chipset_vendor_unknown, + .series = cpuinfo_arm_chipset_series_unknown, + }; +} + +#ifdef __ANDROID__ + static const struct special_map_entry special_board_map_entries[] = { + { + /* "hi6250" -> HiSilicon Kirin 650 */ + .platform = "hi6250", + .series = cpuinfo_arm_chipset_series_hisilicon_kirin, + .model = 650, + }, + { + /* "hi6210sft" -> HiSilicon Kirin 620 */ + .platform = "hi6210sft", + .series = cpuinfo_arm_chipset_series_hisilicon_kirin, + .model = 620, + }, +#if CPUINFO_ARCH_ARM + { + /* "hi3630" -> HiSilicon Kirin 920 */ + .platform = "hi3630", + .series = cpuinfo_arm_chipset_series_hisilicon_kirin, + .model = 920, + }, +#endif /* CPUINFO_ARCH_ARM */ + { + /* "hi3635" -> HiSilicon Kirin 930 */ + .platform = "hi3635", + .series = cpuinfo_arm_chipset_series_hisilicon_kirin, + .model = 930, + }, + { + /* "hi3650" -> HiSilicon Kirin 950 */ + .platform = "hi3650", + .series = cpuinfo_arm_chipset_series_hisilicon_kirin, + .model = 950, + }, + { + /* "hi3660" -> HiSilicon Kirin 960 */ + .platform = "hi3660", + .series = cpuinfo_arm_chipset_series_hisilicon_kirin, + .model = 960, + }, +#if CPUINFO_ARCH_ARM + { + /* "mp523x" -> Renesas MP5232 */ + .platform = "mp523x", + .series = cpuinfo_arm_chipset_series_renesas_mp, + .model = 5232, + }, +#endif /* CPUINFO_ARCH_ARM */ + { + /* "BEETHOVEN" (Huawei MadiaPad M3) -> HiSilicon Kirin 950 */ + .platform = "BEETHOVEN", + .series = cpuinfo_arm_chipset_series_hisilicon_kirin, + .model = 950, + }, +#if CPUINFO_ARCH_ARM + { + /* "hws7701u" (Huawei MediaPad 7 Youth) -> Rockchip RK3168 */ + .platform = "hws7701u", + .series = cpuinfo_arm_chipset_series_rockchip_rk, + .model = 3168, + }, + { + /* "g2mv" (LG G2 mini LTE) -> Nvidia Tegra SL460N */ + .platform = "g2mv", + .series = cpuinfo_arm_chipset_series_nvidia_tegra_sl, + .model = 460, + .suffix = 'N', + }, + { + /* "K00F" (Asus MeMO Pad 10) -> Rockchip RK3188 */ + .platform = "K00F", + .series = cpuinfo_arm_chipset_series_rockchip_rk, + .model = 3188, + }, + { + /* "T7H" (HP Slate 7) -> Rockchip RK3066 */ + .platform = "T7H", + .series = cpuinfo_arm_chipset_series_rockchip_rk, + .model = 3066, + }, + { + /* "tuna" (Samsung Galaxy Nexus) -> Texas Instruments OMAP4460 */ + .platform = "tuna", + .series = cpuinfo_arm_chipset_series_texas_instruments_omap, + .model = 4460, + }, + { + /* "grouper" (Asus Nexus 7 2012) -> Nvidia Tegra T30L */ + .platform = "grouper", + .series = cpuinfo_arm_chipset_series_nvidia_tegra_t, + .model = 30, + .suffix = 'L', + }, +#endif /* CPUINFO_ARCH_ARM */ + { + /* "flounder" (HTC Nexus 9) -> Nvidia Tegra T132 */ + .platform = "flounder", + .series = cpuinfo_arm_chipset_series_nvidia_tegra_t, + .model = 132, + }, + { + /* "dragon" (Google Pixel C) -> Nvidia Tegra T210 */ + .platform = "dragon", + .series = cpuinfo_arm_chipset_series_nvidia_tegra_t, + .model = 210, + }, + { + /* "sailfish" (Google Pixel) -> Qualcomm MSM8996PRO */ + .platform = "sailfish", + .series = cpuinfo_arm_chipset_series_qualcomm_msm, + .model = 8996, + .suffix = 'P', + }, + { + /* "marlin" (Google Pixel XL) -> Qualcomm MSM8996PRO */ + .platform = "marlin", + .series = cpuinfo_arm_chipset_series_qualcomm_msm, + .model = 8996, + .suffix = 'P', + }, + }; + + /* + * Decodes chipset name from ro.product.board Android system property. + * For some chipsets, the function relies frequency and on number of cores for chipset detection. + * + * @param[in] platform - ro.product.board value. + * @param cores - number of cores in the chipset. + * @param max_cpu_freq_max - maximum of /sys/devices/system/cpu/cpu/cpofreq/cpu_freq_max values. + * + * @returns Decoded chipset name. If chipset could not be decoded, the resulting structure would use `unknown` vendor + * and series identifiers. + */ + struct cpuinfo_arm_chipset cpuinfo_arm_android_decode_chipset_from_ro_product_board( + const char ro_product_board[restrict static CPUINFO_BUILD_PROP_VALUE_MAX], + uint32_t cores, uint32_t max_cpu_freq_max) + { + struct cpuinfo_arm_chipset chipset; + const char* board = ro_product_board; + const size_t board_length = strnlen(ro_product_board, CPUINFO_BUILD_PROP_VALUE_MAX); + const char* board_end = ro_product_board + board_length; + + /* Check Qualcomm MSM/APQ signature */ + if (match_msm_apq(board, board_end, &chipset)) { + cpuinfo_log_debug( + "matched Qualcomm MSM/APQ signature in ro.product.board string \"%.*s\"", (int) board_length, board); + return chipset; + } + + /* Check universaXXXX (Samsung Exynos) signature */ + if (match_universal(board, board_end, &chipset)) { + cpuinfo_log_debug( + "matched UNIVERSAL (Samsung Exynos) signature in ro.product.board string \"%.*s\"", + (int) board_length, board); + return chipset; + } + + #if CPUINFO_ARCH_ARM + /* Check SMDK (Samsung Exynos) signature */ + if (match_and_parse_smdk(board, board_end, cores, &chipset)) { + cpuinfo_log_debug( + "matched SMDK (Samsung Exynos) signature in ro.product.board string \"%.*s\"", + (int) board_length, board); + return chipset; + } + #endif + + /* Check MediaTek MT signature */ + if (match_mt(board, board_end, true, &chipset)) { + cpuinfo_log_debug( + "matched MediaTek MT signature in ro.product.board string \"%.*s\"", + (int) board_length, board); + return chipset; + } + + /* Check Spreadtrum SC signature */ + if (match_sc(board, board_end, &chipset)) { + cpuinfo_log_debug( + "matched Spreadtrum SC signature in ro.product.board string \"%.*s\"", + (int) board_length, board); + return chipset; + } + + #if CPUINFO_ARCH_ARM + /* Check Marvell PXA signature */ + if (match_pxa(board, board_end, &chipset)) { + cpuinfo_log_debug( + "matched Marvell PXA signature in ro.product.board string \"%.*s\"", + (int) board_length, board); + return chipset; + } + + /* Check Leadcore LCxxxx signature */ + if (match_lc(board, board_end, &chipset)) { + cpuinfo_log_debug( + "matched Leadcore LC signature in ro.product.board string \"%.*s\"", + (int) board_length, board); + return chipset; + } + + /* + * Compare to tabulated ro.product.board values for Broadcom chipsets and decode chipset from frequency and + * number of cores. + */ + if (match_and_parse_broadcom(board, board_end, cores, max_cpu_freq_max, &chipset)) { + cpuinfo_log_debug( + "found ro.product.board string \"%.*s\" in Broadcom chipset table", + (int) board_length, board); + return chipset; + } + #endif + + /* Compare to tabulated ro.product.board values for Huawei devices which don't report chipset elsewhere */ + if (match_and_parse_huawei(board, board_end, &chipset)) { + cpuinfo_log_debug( + "found ro.product.board string \"%.*s\" in Huawei chipset table", + (int) board_length, board); + return chipset; + } + + /* Compare to tabulated ro.product.board values for popular chipsets/devices which can't be otherwise detected */ + for (size_t i = 0; i < CPUINFO_COUNT_OF(special_board_map_entries); i++) { + if (strncmp(special_board_map_entries[i].platform, board, board_length) == 0 && + special_board_map_entries[i].platform[board_length] == 0) + { + cpuinfo_log_debug( + "found ro.product.board string \"%.*s\" in special chipset table", + (int) board_length, board); + /* Create chipset name from entry */ + return (struct cpuinfo_arm_chipset) { + .vendor = chipset_series_vendor[special_board_map_entries[i].series], + .series = (enum cpuinfo_arm_chipset_series) special_board_map_entries[i].series, + .model = special_board_map_entries[i].model, + .suffix = { + [0] = special_board_map_entries[i].suffix, + /* The suffix of MSM8996PRO is truncated at the first letter, reconstruct it here. */ + [1] = special_board_map_entries[i].suffix == 'P' ? 'R' : 0, + [2] = special_board_map_entries[i].suffix == 'P' ? 'O' : 0, + }, + }; + } + } + + return (struct cpuinfo_arm_chipset) { + .vendor = cpuinfo_arm_chipset_vendor_unknown, + .series = cpuinfo_arm_chipset_series_unknown, + }; + } + + struct amlogic_map_entry { + char ro_board_platform[6]; + uint16_t model; + uint8_t series; + char suffix[3]; + }; + + static const struct amlogic_map_entry amlogic_map_entries[] = { +#if CPUINFO_ARCH_ARM + { + /* "meson3" -> Amlogic AML8726-M */ + .ro_board_platform = "meson3", + .series = cpuinfo_arm_chipset_series_amlogic_aml, + .model = 8726, + .suffix = "-M", + }, + { + /* "meson6" -> Amlogic AML8726-MX */ + .ro_board_platform = "meson6", + .series = cpuinfo_arm_chipset_series_amlogic_aml, + .model = 8726, + .suffix = "-MX", + }, + { + /* "meson8" -> Amlogic S805 */ + .ro_board_platform = "meson8", + .series = cpuinfo_arm_chipset_series_amlogic_s, + .model = 805, + }, +#endif /* CPUINFO_ARCH_ARM */ + { + /* "gxbaby" -> Amlogic S905 */ + .ro_board_platform = "gxbaby", + .series = cpuinfo_arm_chipset_series_amlogic_s, + .model = 905, + }, + { + /* "gxl" -> Amlogic S905X */ + .ro_board_platform = "gxl", + .series = cpuinfo_arm_chipset_series_amlogic_s, + .model = 905, + .suffix = "X", + }, + { + /* "gxm" -> Amlogic S912 */ + .ro_board_platform = "gxm", + .series = cpuinfo_arm_chipset_series_amlogic_s, + .model = 912, + }, + }; + + static const struct special_map_entry special_platform_map_entries[] = { +#if CPUINFO_ARCH_ARM + { + /* "hi6620oem" -> HiSilicon Kirin 910T */ + .platform = "hi6620oem", + .series = cpuinfo_arm_chipset_series_hisilicon_kirin, + .model = 910, + .suffix = 'T', + }, +#endif /* CPUINFO_ARCH_ARM */ + { + /* "hi6250" -> HiSilicon Kirin 650 */ + .platform = "hi6250", + .series = cpuinfo_arm_chipset_series_hisilicon_kirin, + .model = 650, + }, + { + /* "hi6210sft" -> HiSilicon Kirin 620 */ + .platform = "hi6210sft", + .series = cpuinfo_arm_chipset_series_hisilicon_kirin, + .model = 620, + }, +#if CPUINFO_ARCH_ARM + { + /* "hi3630" -> HiSilicon Kirin 920 */ + .platform = "hi3630", + .series = cpuinfo_arm_chipset_series_hisilicon_kirin, + .model = 920, + }, +#endif /* CPUINFO_ARCH_ARM */ + { + /* "hi3635" -> HiSilicon Kirin 930 */ + .platform = "hi3635", + .series = cpuinfo_arm_chipset_series_hisilicon_kirin, + .model = 930, + }, + { + /* "hi3650" -> HiSilicon Kirin 950 */ + .platform = "hi3650", + .series = cpuinfo_arm_chipset_series_hisilicon_kirin, + .model = 950, + }, + { + /* "hi3660" -> HiSilicon Kirin 960 */ + .platform = "hi3660", + .series = cpuinfo_arm_chipset_series_hisilicon_kirin, + .model = 960, + }, +#if CPUINFO_ARCH_ARM + { + /* "k3v2oem1" -> HiSilicon K3V2 */ + .platform = "k3v2oem1", + .series = cpuinfo_arm_chipset_series_hisilicon_k3v, + .model = 2, + }, + { + /* "k3v200" -> HiSilicon K3V2 */ + .platform = "k3v200", + .series = cpuinfo_arm_chipset_series_hisilicon_k3v, + .model = 2, + }, + { + /* "montblanc" -> NovaThor U8500 */ + .platform = "montblanc", + .series = cpuinfo_arm_chipset_series_novathor_u, + .model = 8500, + }, +#endif /* CPUINFO_ARCH_ARM */ + { + /* "song" -> Pinecone Surge S1 */ + .platform = "song", + .series = cpuinfo_arm_chipset_series_pinecone_surge_s, + .model = 1, + }, +#if CPUINFO_ARCH_ARM + { + /* "rk322x" -> RockChip RK3229 */ + .platform = "rk322x", + .series = cpuinfo_arm_chipset_series_rockchip_rk, + .model = 3229, + }, +#endif /* CPUINFO_ARCH_ARM */ + { + /* "tegra132" -> Nvidia Tegra T132 */ + .platform = "tegra132", + .series = cpuinfo_arm_chipset_series_nvidia_tegra_t, + .model = 132, + }, + { + /* "tegra210_dragon" -> Nvidia Tegra T210 */ + .platform = "tegra210_dragon", + .series = cpuinfo_arm_chipset_series_nvidia_tegra_t, + .model = 210, + }, +#if CPUINFO_ARCH_ARM + { + /* "tegra4" -> Nvidia Tegra T114 */ + .platform = "tegra4", + .series = cpuinfo_arm_chipset_series_nvidia_tegra_t, + .model = 114, + }, + { + /* "s5pc110" -> Samsung Exynos 3110 */ + .platform = "s5pc110", + .series = cpuinfo_arm_chipset_series_samsung_exynos, + .model = 3110, + }, +#endif /* CPUINFO_ARCH_ARM */ + }; + + /* + * Decodes chipset name from ro.board.platform Android system property. + * For some chipsets, the function relies frequency and on number of cores for chipset detection. + * + * @param[in] platform - ro.board.platform value. + * @param cores - number of cores in the chipset. + * @param max_cpu_freq_max - maximum of /sys/devices/system/cpu/cpu/cpofreq/cpu_freq_max values. + * + * @returns Decoded chipset name. If chipset could not be decoded, the resulting structure would use `unknown` vendor + * and series identifiers. + */ + struct cpuinfo_arm_chipset cpuinfo_arm_android_decode_chipset_from_ro_board_platform( + const char platform[restrict static CPUINFO_BUILD_PROP_VALUE_MAX], + uint32_t cores, uint32_t max_cpu_freq_max) + { + struct cpuinfo_arm_chipset chipset; + const size_t platform_length = strnlen(platform, CPUINFO_BUILD_PROP_VALUE_MAX); + const char* platform_end = platform + platform_length; + + /* Check Qualcomm MSM/APQ signature */ + if (match_msm_apq(platform, platform_end, &chipset)) { + cpuinfo_log_debug( + "matched Qualcomm MSM/APQ signature in ro.board.platform string \"%.*s\"", + (int) platform_length, platform); + return chipset; + } + + /* Check exynosXXXX (Samsung Exynos) signature */ + if (match_exynos(platform, platform_end, &chipset)) { + cpuinfo_log_debug( + "matched exynosXXXX (Samsung Exynos) signature in ro.board.platform string \"%.*s\"", + (int) platform_length, platform); + return chipset; + } + + /* Check MediaTek MT signature */ + if (match_mt(platform, platform_end, true, &chipset)) { + cpuinfo_log_debug( + "matched MediaTek MT signature in ro.board.platform string \"%.*s\"", (int) platform_length, platform); + return chipset; + } + + /* Check HiSilicon Kirin signature */ + if (match_kirin(platform, platform_end, &chipset)) { + cpuinfo_log_debug( + "matched HiSilicon Kirin signature in ro.board.platform string \"%.*s\"", (int) platform_length, platform); + return chipset; + } + + /* Check Spreadtrum SC signature */ + if (match_sc(platform, platform_end, &chipset)) { + cpuinfo_log_debug( + "matched Spreadtrum SC signature in ro.board.platform string \"%.*s\"", (int) platform_length, platform); + return chipset; + } + + /* Check Rockchip RK signature */ + if (match_rk(platform, platform_end, &chipset)) { + cpuinfo_log_debug( + "matched Rockchip RK signature in ro.board.platform string \"%.*s\"", (int) platform_length, platform); + return chipset; + } + + #if CPUINFO_ARCH_ARM + /* Check Leadcore LCxxxx signature */ + if (match_lc(platform, platform_end, &chipset)) { + cpuinfo_log_debug( + "matched Leadcore LC signature in ro.board.platform string \"%.*s\"", (int) platform_length, platform); + return chipset; + } + #endif + + /* Compare to tabulated ro.board.platform values for Huawei devices which don't report chipset elsewhere */ + if (match_and_parse_huawei(platform, platform_end, &chipset)) { + cpuinfo_log_debug( + "found ro.board.platform string \"%.*s\" in Huawei chipset table", + (int) platform_length, platform); + return chipset; + } + + #if CPUINFO_ARCH_ARM + /* + * Compare to known ro.board.platform values for Broadcom devices and + * detect chipset from frequency and number of cores + */ + if (match_and_parse_broadcom(platform, platform_end, cores, max_cpu_freq_max, &chipset)) { + cpuinfo_log_debug( + "found ro.board.platform string \"%.*s\" in Broadcom chipset table", + (int) platform_length, platform); + return chipset; + } + + /* + * Compare to ro.board.platform value ("omap4") for OMAP4xxx chipsets. + * Upon successful match, detect OMAP4430 from frequency and number of cores. + */ + if (platform_length == 5 && cores == 2 && max_cpu_freq_max == 1008000 && memcmp(platform, "omap4", 5) == 0) { + cpuinfo_log_debug( + "matched Texas Instruments OMAP4 signature in ro.board.platform string \"%.*s\"", + (int) platform_length, platform); + + return (struct cpuinfo_arm_chipset) { + .vendor = cpuinfo_arm_chipset_vendor_texas_instruments, + .series = cpuinfo_arm_chipset_series_texas_instruments_omap, + .model = 4430, + }; + } + #endif + + /* + * Compare to tabulated ro.board.platform values for Amlogic chipsets/devices which can't be otherwise detected. + * The tabulated Amlogic ro.board.platform values have not more than 6 characters. + */ + if (platform_length <= 6) { + for (size_t i = 0; i < CPUINFO_COUNT_OF(amlogic_map_entries); i++) { + if (strncmp(amlogic_map_entries[i].ro_board_platform, platform, 6) == 0) { + cpuinfo_log_debug( + "found ro.board.platform string \"%.*s\" in Amlogic chipset table", + (int) platform_length, platform); + /* Create chipset name from entry */ + return (struct cpuinfo_arm_chipset) { + .vendor = cpuinfo_arm_chipset_vendor_amlogic, + .series = (enum cpuinfo_arm_chipset_series) amlogic_map_entries[i].series, + .model = amlogic_map_entries[i].model, + .suffix = { + [0] = amlogic_map_entries[i].suffix[0], + [1] = amlogic_map_entries[i].suffix[1], + [2] = amlogic_map_entries[i].suffix[2], + }, + }; + } + } + } + + /* Compare to tabulated ro.board.platform values for popular chipsets/devices which can't be otherwise detected */ + for (size_t i = 0; i < CPUINFO_COUNT_OF(special_platform_map_entries); i++) { + if (strncmp(special_platform_map_entries[i].platform, platform, platform_length) == 0 && + special_platform_map_entries[i].platform[platform_length] == 0) + { + /* Create chipset name from entry */ + cpuinfo_log_debug( + "found ro.board.platform string \"%.*s\" in special chipset table", (int) platform_length, platform); + return (struct cpuinfo_arm_chipset) { + .vendor = chipset_series_vendor[special_platform_map_entries[i].series], + .series = (enum cpuinfo_arm_chipset_series) special_platform_map_entries[i].series, + .model = special_platform_map_entries[i].model, + .suffix = { + [0] = special_platform_map_entries[i].suffix, + }, + }; + } + } + + /* None of the ro.board.platform signatures matched, indicate unknown chipset */ + return (struct cpuinfo_arm_chipset) { + .vendor = cpuinfo_arm_chipset_vendor_unknown, + .series = cpuinfo_arm_chipset_series_unknown, + }; + } + + /* + * Decodes chipset name from ro.mediatek.platform Android system property. + * + * @param[in] platform - ro.mediatek.platform value. + * + * @returns Decoded chipset name. If chipset could not be decoded, the resulting structure would use `unknown` + * vendor and series identifiers. + */ + struct cpuinfo_arm_chipset cpuinfo_arm_android_decode_chipset_from_ro_mediatek_platform( + const char platform[restrict static CPUINFO_BUILD_PROP_VALUE_MAX]) + { + struct cpuinfo_arm_chipset chipset; + const char* platform_end = platform + strnlen(platform, CPUINFO_BUILD_PROP_VALUE_MAX); + + /* Check MediaTek MT signature */ + if (match_mt(platform, platform_end, false, &chipset)) { + return chipset; + } + + return (struct cpuinfo_arm_chipset) { + .vendor = cpuinfo_arm_chipset_vendor_unknown, + .series = cpuinfo_arm_chipset_series_unknown, + }; + } + + + /* + * Decodes chipset name from ro.arch Android system property. + * + * The ro.arch property is matched only against Samsung Exynos signature. Systems with other chipset rarely + * configure ro.arch Android system property, and can be decoded through other properties, but some Exynos + * chipsets are identified only in ro.arch. + * + * @param[in] arch - ro.arch value. + * + * @returns Decoded chipset name. If chipset could not be decoded, the resulting structure would use `unknown` + * vendor and series identifiers. + */ + struct cpuinfo_arm_chipset cpuinfo_arm_android_decode_chipset_from_ro_arch( + const char arch[restrict static CPUINFO_BUILD_PROP_VALUE_MAX]) + { + struct cpuinfo_arm_chipset chipset; + const char* arch_end = arch + strnlen(arch, CPUINFO_BUILD_PROP_VALUE_MAX); + + /* Check Samsung exynosXXXX signature */ + if (match_exynos(arch, arch_end, &chipset)) { + return chipset; + } + + return (struct cpuinfo_arm_chipset) { + .vendor = cpuinfo_arm_chipset_vendor_unknown, + .series = cpuinfo_arm_chipset_series_unknown, + }; + } + + /* + * Decodes chipset name from ro.chipname or ro.hardware.chipname Android system property. + * + * @param[in] chipname - ro.chipname or ro.hardware.chipname value. + * + * @returns Decoded chipset name. If chipset could not be decoded, the resulting structure would use `unknown` vendor + * and series identifiers. + */ + + struct cpuinfo_arm_chipset cpuinfo_arm_android_decode_chipset_from_ro_chipname( + const char chipname[restrict static CPUINFO_BUILD_PROP_VALUE_MAX]) + { + struct cpuinfo_arm_chipset chipset; + const size_t chipname_length = strnlen(chipname, CPUINFO_BUILD_PROP_VALUE_MAX); + const char* chipname_end = chipname + chipname_length; + + /* Check Qualcomm MSM/APQ signatures */ + if (match_msm_apq(chipname, chipname_end, &chipset)) { + cpuinfo_log_debug( + "matched Qualcomm MSM/APQ signature in ro.chipname string \"%.*s\"", + (int) chipname_length, chipname); + return chipset; + } + + /* Check SMxxxx (Qualcomm Snapdragon) signature */ + if (match_sm(chipname, chipname_end, &chipset)) { + cpuinfo_log_debug( + "matched Qualcomm SM signature in /proc/cpuinfo Hardware string \"%.*s\"", + (int) chipname_length, chipname); + return chipset; + } + + /* Check exynosXXXX (Samsung Exynos) signature */ + if (match_exynos(chipname, chipname_end, &chipset)) { + cpuinfo_log_debug( + "matched exynosXXXX (Samsung Exynos) signature in ro.chipname string \"%.*s\"", + (int) chipname_length, chipname); + return chipset; + } + + /* Check universalXXXX (Samsung Exynos) signature */ + if (match_universal(chipname, chipname_end, &chipset)) { + cpuinfo_log_debug( + "matched UNIVERSAL (Samsung Exynos) signature in ro.chipname Hardware string \"%.*s\"", + (int) chipname_length, chipname); + return chipset; + } + + /* Check MediaTek MT signature */ + if (match_mt(chipname, chipname_end, true, &chipset)) { + cpuinfo_log_debug( + "matched MediaTek MT signature in ro.chipname string \"%.*s\"", + (int) chipname_length, chipname); + return chipset; + } + + /* Check Spreadtrum SC signature */ + if (match_sc(chipname, chipname_end, &chipset)) { + cpuinfo_log_debug( + "matched Spreadtrum SC signature in ro.chipname string \"%.*s\"", + (int) chipname_length, chipname); + return chipset; + } + + #if CPUINFO_ARCH_ARM + /* Check Marvell PXA signature */ + if (match_pxa(chipname, chipname_end, &chipset)) { + cpuinfo_log_debug( + "matched Marvell PXA signature in ro.chipname string \"%.*s\"", + (int) chipname_length, chipname); + return chipset; + } + + /* Compare to ro.chipname value ("mp523x") for Renesas MP5232 which can't be otherwise detected */ + if (chipname_length == 6 && memcmp(chipname, "mp523x", 6) == 0) { + cpuinfo_log_debug( + "matched Renesas MP5232 signature in ro.chipname string \"%.*s\"", + (int) chipname_length, chipname); + + return (struct cpuinfo_arm_chipset) { + .vendor = cpuinfo_arm_chipset_vendor_renesas, + .series = cpuinfo_arm_chipset_series_renesas_mp, + .model = 5232, + }; + } + #endif + + return (struct cpuinfo_arm_chipset) { + .vendor = cpuinfo_arm_chipset_vendor_unknown, + .series = cpuinfo_arm_chipset_series_unknown, + }; + } +#endif /* __ANDROID__ */ + +/* + * Fix common bugs, typos, and renames in chipset name. + * + * @param[in,out] chipset - chipset name to fix. + * @param cores - number of cores in the chipset. + * @param max_cpu_freq_max - maximum of /sys/devices/system/cpu/cpu/cpofreq/cpu_freq_max values. + */ +void cpuinfo_arm_fixup_chipset( + struct cpuinfo_arm_chipset chipset[restrict static 1], uint32_t cores, uint32_t max_cpu_freq_max) +{ + switch (chipset->series) { + case cpuinfo_arm_chipset_series_qualcomm_msm: + /* Check if there is suffix */ + if (chipset->suffix[0] == 0) { + /* No suffix, but the model may be misreported */ + switch (chipset->model) { + case 8216: + /* MSM8216 was renamed to MSM8916 */ + cpuinfo_log_info("reinterpreted MSM8216 chipset as MSM8916"); + chipset->model = 8916; + break; + case 8916: + /* Common bug: MSM8939 (Octa-core) reported as MSM8916 (Quad-core) */ + switch (cores) { + case 4: + break; + case 8: + cpuinfo_log_info("reinterpreted MSM8916 chipset with 8 cores as MSM8939"); + chipset->model = 8939; + break; + default: + cpuinfo_log_warning("system reported invalid %"PRIu32"-core MSM%"PRIu32" chipset", + cores, chipset->model); + chipset->model = 0; + } + break; + case 8937: + /* Common bug: MSM8917 (Quad-core) reported as MSM8937 (Octa-core) */ + switch (cores) { + case 4: + cpuinfo_log_info("reinterpreted MSM8937 chipset with 4 cores as MSM8917"); + chipset->model = 8917; + break; + case 8: + break; + default: + cpuinfo_log_warning("system reported invalid %"PRIu32"-core MSM%"PRIu32" chipset", + cores, chipset->model); + chipset->model = 0; + } + break; + case 8960: + /* Common bug: APQ8064 (Quad-core) reported as MSM8960 (Dual-core) */ + switch (cores) { + case 2: + break; + case 4: + cpuinfo_log_info("reinterpreted MSM8960 chipset with 4 cores as APQ8064"); + chipset->series = cpuinfo_arm_chipset_series_qualcomm_apq; + chipset->model = 8064; + break; + default: + cpuinfo_log_warning("system reported invalid %"PRIu32"-core MSM%"PRIu32" chipset", + cores, chipset->model); + chipset->model = 0; + } + break; + case 8996: + /* Common bug: MSM8994 (Octa-core) reported as MSM8996 (Quad-core) */ + switch (cores) { + case 4: + break; + case 8: + cpuinfo_log_info("reinterpreted MSM8996 chipset with 8 cores as MSM8994"); + chipset->model = 8994; + break; + default: + cpuinfo_log_warning("system reported invalid %"PRIu32"-core MSM%"PRIu32" chipset", + cores, chipset->model); + chipset->model = 0; + } + break; +#if CPUINFO_ARCH_ARM + case 8610: + /* Common bug: MSM8612 (Quad-core) reported as MSM8610 (Dual-core) */ + switch (cores) { + case 2: + break; + case 4: + cpuinfo_log_info("reinterpreted MSM8610 chipset with 4 cores as MSM8612"); + chipset->model = 8612; + break; + default: + cpuinfo_log_warning("system reported invalid %"PRIu32"-core MSM%"PRIu32" chipset", + cores, chipset->model); + chipset->model = 0; + } + break; +#endif /* CPUINFO_ARCH_ARM */ + } + } else { + /* Suffix may need correction */ + const uint32_t suffix_word = load_u32le(chipset->suffix); + if (suffix_word == UINT32_C(0x004D534D) /* "\0MSM" = reverse("MSM\0") */) { + /* + * Common bug: model name repeated twice, e.g. "MSM8916MSM8916" + * In this case, model matching code parses the second "MSM" as a suffix + */ + chipset->suffix[0] = 0; + chipset->suffix[1] = 0; + chipset->suffix[2] = 0; + } else { + switch (chipset->model) { + case 8976: + /* MSM8976SG -> MSM8976PRO */ + if (suffix_word == UINT32_C(0x00004753) /* "\0\0GS" = reverse("SG\0\0") */ ) { + chipset->suffix[0] = 'P'; + chipset->suffix[1] = 'R'; + chipset->suffix[2] = 'O'; + } + break; + case 8996: + /* MSM8996PRO -> MSM8996PRO-AB or MSM8996PRO-AC */ + if (suffix_word == UINT32_C(0x004F5250) /* "\0ORP" = reverse("PRO\0") */ ) { + chipset->suffix[3] = '-'; + chipset->suffix[4] = 'A'; + chipset->suffix[5] = 'B' + (char) (max_cpu_freq_max >= 2188800); + } + break; + } + } + } + break; + case cpuinfo_arm_chipset_series_qualcomm_apq: + { + /* Suffix may need correction */ + const uint32_t expected_apq = load_u32le(chipset->suffix); + if (expected_apq == UINT32_C(0x00515041) /* "\0QPA" = reverse("APQ\0") */) { + /* + * Common bug: model name repeated twice, e.g. "APQ8016APQ8016" + * In this case, model matching code parses the second "APQ" as a suffix + */ + chipset->suffix[0] = 0; + chipset->suffix[1] = 0; + chipset->suffix[2] = 0; + } + break; + } + case cpuinfo_arm_chipset_series_samsung_exynos: + switch (chipset->model) { +#if CPUINFO_ARCH_ARM + case 4410: + /* Exynos 4410 was renamed to Exynos 4412 */ + chipset->model = 4412; + break; + case 5420: + /* Common bug: Exynos 5260 (Hexa-core) reported as Exynos 5420 (Quad-core) */ + switch (cores) { + case 4: + break; + case 6: + cpuinfo_log_info("reinterpreted Exynos 5420 chipset with 6 cores as Exynos 5260"); + chipset->model = 5260; + break; + default: + cpuinfo_log_warning("system reported invalid %"PRIu32"-core Exynos 5420 chipset", cores); + chipset->model = 0; + } + break; +#endif /* CPUINFO_ARCH_ARM */ + case 7580: + /* Common bug: Exynos 7578 (Quad-core) reported as Exynos 7580 (Octa-core) */ + switch (cores) { + case 4: + cpuinfo_log_info("reinterpreted Exynos 7580 chipset with 4 cores as Exynos 7578"); + chipset->model = 7578; + break; + case 8: + break; + default: + cpuinfo_log_warning("system reported invalid %"PRIu32"-core Exynos 7580 chipset", cores); + chipset->model = 0; + } + break; + } + break; + case cpuinfo_arm_chipset_series_mediatek_mt: + if (chipset->model == 6752) { + /* Common bug: MT6732 (Quad-core) reported as MT6752 (Octa-core) */ + switch (cores) { + case 4: + cpuinfo_log_info("reinterpreted MT6752 chipset with 4 cores as MT6732"); + chipset->model = 6732; + break; + case 8: + break; + default: + cpuinfo_log_warning("system reported invalid %"PRIu32"-core MT6752 chipset", cores); + chipset->model = 0; + } + } + if (chipset->suffix[0] == 'T') { + /* Normalization: "TURBO" and "TRUBO" (apparently a typo) -> "T" */ + const uint32_t suffix_word = load_u32le(chipset->suffix + 1); + switch (suffix_word) { + case UINT32_C(0x4F425255): /* "OBRU" = reverse("URBO") */ + case UINT32_C(0x4F425552): /* "OBUR" = reverse("RUBO") */ + if (chipset->suffix[5] == 0) { + chipset->suffix[1] = 0; + chipset->suffix[2] = 0; + chipset->suffix[3] = 0; + chipset->suffix[4] = 0; + } + break; + } + } + break; + case cpuinfo_arm_chipset_series_rockchip_rk: + if (chipset->model == 3288) { + /* Common bug: Rockchip RK3399 (Hexa-core) always reported as RK3288 (Quad-core) */ + switch (cores) { + case 4: + break; + case 6: + cpuinfo_log_info("reinterpreted RK3288 chipset with 6 cores as RK3399"); + chipset->model = 3399; + break; + default: + cpuinfo_log_warning("system reported invalid %"PRIu32"-core RK3288 chipset", cores); + chipset->model = 0; + } + } + break; + default: + break; + } +} + +/* Map from ARM chipset vendor ID to its string representation */ +static const char* chipset_vendor_string[cpuinfo_arm_chipset_vendor_max] = { + [cpuinfo_arm_chipset_vendor_unknown] = "Unknown", + [cpuinfo_arm_chipset_vendor_qualcomm] = "Qualcomm", + [cpuinfo_arm_chipset_vendor_mediatek] = "MediaTek", + [cpuinfo_arm_chipset_vendor_samsung] = "Samsung", + [cpuinfo_arm_chipset_vendor_hisilicon] = "HiSilicon", + [cpuinfo_arm_chipset_vendor_actions] = "Actions", + [cpuinfo_arm_chipset_vendor_allwinner] = "Allwinner", + [cpuinfo_arm_chipset_vendor_amlogic] = "Amlogic", + [cpuinfo_arm_chipset_vendor_broadcom] = "Broadcom", + [cpuinfo_arm_chipset_vendor_lg] = "LG", + [cpuinfo_arm_chipset_vendor_leadcore] = "Leadcore", + [cpuinfo_arm_chipset_vendor_marvell] = "Marvell", + [cpuinfo_arm_chipset_vendor_mstar] = "MStar", + [cpuinfo_arm_chipset_vendor_novathor] = "NovaThor", + [cpuinfo_arm_chipset_vendor_nvidia] = "Nvidia", + [cpuinfo_arm_chipset_vendor_pinecone] = "Pinecone", + [cpuinfo_arm_chipset_vendor_renesas] = "Renesas", + [cpuinfo_arm_chipset_vendor_rockchip] = "Rockchip", + [cpuinfo_arm_chipset_vendor_spreadtrum] = "Spreadtrum", + [cpuinfo_arm_chipset_vendor_telechips] = "Telechips", + [cpuinfo_arm_chipset_vendor_texas_instruments] = "Texas Instruments", + [cpuinfo_arm_chipset_vendor_wondermedia] = "WonderMedia", +}; + +/* Map from ARM chipset series ID to its string representation */ +static const char* chipset_series_string[cpuinfo_arm_chipset_series_max] = { + [cpuinfo_arm_chipset_series_unknown] = NULL, + [cpuinfo_arm_chipset_series_qualcomm_qsd] = "QSD", + [cpuinfo_arm_chipset_series_qualcomm_msm] = "MSM", + [cpuinfo_arm_chipset_series_qualcomm_apq] = "APQ", + [cpuinfo_arm_chipset_series_qualcomm_snapdragon] = "Snapdragon ", + [cpuinfo_arm_chipset_series_mediatek_mt] = "MT", + [cpuinfo_arm_chipset_series_samsung_exynos] = "Exynos ", + [cpuinfo_arm_chipset_series_hisilicon_k3v] = "K3V", + [cpuinfo_arm_chipset_series_hisilicon_hi] = "Hi", + [cpuinfo_arm_chipset_series_hisilicon_kirin] = "Kirin ", + [cpuinfo_arm_chipset_series_actions_atm] = "ATM", + [cpuinfo_arm_chipset_series_allwinner_a] = "A", + [cpuinfo_arm_chipset_series_amlogic_aml] = "AML", + [cpuinfo_arm_chipset_series_amlogic_s] = "S", + [cpuinfo_arm_chipset_series_broadcom_bcm] = "BCM", + [cpuinfo_arm_chipset_series_lg_nuclun] = "Nuclun ", + [cpuinfo_arm_chipset_series_leadcore_lc] = "LC", + [cpuinfo_arm_chipset_series_marvell_pxa] = "PXA", + [cpuinfo_arm_chipset_series_mstar_6a] = "6A", + [cpuinfo_arm_chipset_series_novathor_u] = "U", + [cpuinfo_arm_chipset_series_nvidia_tegra_t] = "Tegra T", + [cpuinfo_arm_chipset_series_nvidia_tegra_ap] = "Tegra AP", + [cpuinfo_arm_chipset_series_nvidia_tegra_sl] = "Tegra SL", + [cpuinfo_arm_chipset_series_pinecone_surge_s] = "Surge S", + [cpuinfo_arm_chipset_series_renesas_mp] = "MP", + [cpuinfo_arm_chipset_series_rockchip_rk] = "RK", + [cpuinfo_arm_chipset_series_spreadtrum_sc] = "SC", + [cpuinfo_arm_chipset_series_telechips_tcc] = "TCC", + [cpuinfo_arm_chipset_series_texas_instruments_omap] = "OMAP", + [cpuinfo_arm_chipset_series_wondermedia_wm] = "WM", +}; + +/* Convert chipset name represented by cpuinfo_arm_chipset structure to a string representation */ +void cpuinfo_arm_chipset_to_string( + const struct cpuinfo_arm_chipset chipset[restrict static 1], + char name[restrict static CPUINFO_ARM_CHIPSET_NAME_MAX]) +{ + enum cpuinfo_arm_chipset_vendor vendor = chipset->vendor; + if (vendor >= cpuinfo_arm_chipset_vendor_max) { + vendor = cpuinfo_arm_chipset_vendor_unknown; + } + enum cpuinfo_arm_chipset_series series = chipset->series; + if (series >= cpuinfo_arm_chipset_series_max) { + series = cpuinfo_arm_chipset_series_unknown; + } + const char* vendor_string = chipset_vendor_string[vendor]; + const char* series_string = chipset_series_string[series]; + const uint32_t model = chipset->model; + if (model == 0) { + if (series == cpuinfo_arm_chipset_series_unknown) { + strncpy(name, vendor_string, CPUINFO_ARM_CHIPSET_NAME_MAX); + } else { + snprintf(name, CPUINFO_ARM_CHIPSET_NAME_MAX, + "%s %s", vendor_string, series_string); + } + } else { + const size_t suffix_length = strnlen(chipset->suffix, CPUINFO_ARM_CHIPSET_SUFFIX_MAX); + snprintf(name, CPUINFO_ARM_CHIPSET_NAME_MAX, + "%s %s%"PRIu32"%.*s", vendor_string, series_string, model, (int) suffix_length, chipset->suffix); + } +} + +#ifdef __ANDROID__ + static inline struct cpuinfo_arm_chipset disambiguate_qualcomm_chipset( + const struct cpuinfo_arm_chipset proc_cpuinfo_hardware_chipset[restrict static 1], + const struct cpuinfo_arm_chipset ro_product_board_chipset[restrict static 1], + const struct cpuinfo_arm_chipset ro_board_platform_chipset[restrict static 1], + const struct cpuinfo_arm_chipset ro_chipname_chipset[restrict static 1], + const struct cpuinfo_arm_chipset ro_hardware_chipname_chipset[restrict static 1]) + { + if (ro_hardware_chipname_chipset->series != cpuinfo_arm_chipset_series_unknown) { + return *ro_hardware_chipname_chipset; + } + if (ro_chipname_chipset->series != cpuinfo_arm_chipset_series_unknown) { + return *ro_chipname_chipset; + } + if (proc_cpuinfo_hardware_chipset->series != cpuinfo_arm_chipset_series_unknown) { + return *proc_cpuinfo_hardware_chipset; + } + if (ro_product_board_chipset->series != cpuinfo_arm_chipset_series_unknown) { + return *ro_product_board_chipset; + } + return *ro_board_platform_chipset; + } + + static inline struct cpuinfo_arm_chipset disambiguate_mediatek_chipset( + const struct cpuinfo_arm_chipset proc_cpuinfo_hardware_chipset[restrict static 1], + const struct cpuinfo_arm_chipset ro_product_board_chipset[restrict static 1], + const struct cpuinfo_arm_chipset ro_board_platform_chipset[restrict static 1], + const struct cpuinfo_arm_chipset ro_mediatek_platform_chipset[restrict static 1], + const struct cpuinfo_arm_chipset ro_chipname_chipset[restrict static 1]) + { + if (ro_chipname_chipset->series != cpuinfo_arm_chipset_series_unknown) { + return *ro_chipname_chipset; + } + if (proc_cpuinfo_hardware_chipset->series != cpuinfo_arm_chipset_series_unknown) { + return *proc_cpuinfo_hardware_chipset; + } + if (ro_product_board_chipset->series != cpuinfo_arm_chipset_series_unknown) { + return *ro_product_board_chipset; + } + if (ro_board_platform_chipset->series != cpuinfo_arm_chipset_series_unknown) { + return *ro_board_platform_chipset; + } + return *ro_mediatek_platform_chipset; + } + + static inline struct cpuinfo_arm_chipset disambiguate_hisilicon_chipset( + const struct cpuinfo_arm_chipset proc_cpuinfo_hardware_chipset[restrict static 1], + const struct cpuinfo_arm_chipset ro_product_board_chipset[restrict static 1], + const struct cpuinfo_arm_chipset ro_board_platform_chipset[restrict static 1]) + { + if (proc_cpuinfo_hardware_chipset->series != cpuinfo_arm_chipset_series_unknown) { + return *proc_cpuinfo_hardware_chipset; + } + if (ro_product_board_chipset->series != cpuinfo_arm_chipset_series_unknown) { + return *ro_product_board_chipset; + } + return *ro_board_platform_chipset; + } + + static inline struct cpuinfo_arm_chipset disambiguate_amlogic_chipset( + const struct cpuinfo_arm_chipset proc_cpuinfo_hardware_chipset[restrict static 1], + const struct cpuinfo_arm_chipset ro_board_platform_chipset[restrict static 1]) + { + if (proc_cpuinfo_hardware_chipset->series != cpuinfo_arm_chipset_series_unknown) { + return *proc_cpuinfo_hardware_chipset; + } + return *ro_board_platform_chipset; + } + + static inline struct cpuinfo_arm_chipset disambiguate_marvell_chipset( + const struct cpuinfo_arm_chipset proc_cpuinfo_hardware_chipset[restrict static 1], + const struct cpuinfo_arm_chipset ro_product_board_chipset[restrict static 1], + const struct cpuinfo_arm_chipset ro_chipname_chipset[restrict static 1]) + { + if (ro_chipname_chipset->series != cpuinfo_arm_chipset_series_unknown) { + return *ro_chipname_chipset; + } + if (ro_product_board_chipset->series != cpuinfo_arm_chipset_series_unknown) { + return *ro_product_board_chipset; + } + return *proc_cpuinfo_hardware_chipset; + } + + static inline struct cpuinfo_arm_chipset disambiguate_rockchip_chipset( + const struct cpuinfo_arm_chipset proc_cpuinfo_hardware_chipset[restrict static 1], + const struct cpuinfo_arm_chipset ro_product_board_chipset[restrict static 1], + const struct cpuinfo_arm_chipset ro_board_platform_chipset[restrict static 1]) + { + if (ro_product_board_chipset->series != cpuinfo_arm_chipset_series_unknown) { + return *ro_product_board_chipset; + } + if (proc_cpuinfo_hardware_chipset->series != cpuinfo_arm_chipset_series_unknown) { + return *proc_cpuinfo_hardware_chipset; + } + return *ro_board_platform_chipset; + } + + static inline struct cpuinfo_arm_chipset disambiguate_spreadtrum_chipset( + const struct cpuinfo_arm_chipset proc_cpuinfo_hardware_chipset[restrict static 1], + const struct cpuinfo_arm_chipset ro_product_board_chipset[restrict static 1], + const struct cpuinfo_arm_chipset ro_board_platform_chipset[restrict static 1], + const struct cpuinfo_arm_chipset ro_chipname_chipset[restrict static 1]) + { + if (ro_chipname_chipset->series != cpuinfo_arm_chipset_series_unknown) { + return *ro_chipname_chipset; + } + if (ro_product_board_chipset->series != cpuinfo_arm_chipset_series_unknown) { + return *ro_product_board_chipset; + } + if (proc_cpuinfo_hardware_chipset->series != cpuinfo_arm_chipset_series_unknown) { + return *proc_cpuinfo_hardware_chipset; + } + return *ro_board_platform_chipset; + } + + /* + * Decodes chipset name from Android system properties: + * - /proc/cpuinfo Hardware string + * - ro.product.board + * - ro.board.platform + * - ro.mediatek.platform + * - ro.chipname + * For some chipsets, the function relies frequency and on number of cores for chipset detection. + * + * @param[in] properties - structure with the Android system properties described above. + * @param cores - number of cores in the chipset. + * @param max_cpu_freq_max - maximum of /sys/devices/system/cpu/cpu/cpofreq/cpu_freq_max values. + * + * @returns Decoded chipset name. If chipset could not be decoded, the resulting structure would use `unknown` vendor + * and series identifiers. + */ + struct cpuinfo_arm_chipset cpuinfo_arm_android_decode_chipset( + const struct cpuinfo_android_properties properties[restrict static 1], + uint32_t cores, + uint32_t max_cpu_freq_max) + { + struct cpuinfo_arm_chipset chipset = { + .vendor = cpuinfo_arm_chipset_vendor_unknown, + .series = cpuinfo_arm_chipset_series_unknown, + }; + + const bool tegra_platform = is_tegra( + properties->ro_board_platform, + properties->ro_board_platform + strnlen(properties->ro_board_platform, CPUINFO_BUILD_PROP_VALUE_MAX)); + + struct cpuinfo_arm_chipset chipsets[cpuinfo_android_chipset_property_max] = { + [cpuinfo_android_chipset_property_proc_cpuinfo_hardware] = + cpuinfo_arm_linux_decode_chipset_from_proc_cpuinfo_hardware( + properties->proc_cpuinfo_hardware, cores, max_cpu_freq_max, tegra_platform), + [cpuinfo_android_chipset_property_ro_product_board] = + cpuinfo_arm_android_decode_chipset_from_ro_product_board( + properties->ro_product_board, cores, max_cpu_freq_max), + [cpuinfo_android_chipset_property_ro_board_platform] = + cpuinfo_arm_android_decode_chipset_from_ro_board_platform( + properties->ro_board_platform, cores, max_cpu_freq_max), + [cpuinfo_android_chipset_property_ro_mediatek_platform] = + cpuinfo_arm_android_decode_chipset_from_ro_mediatek_platform(properties->ro_mediatek_platform), + [cpuinfo_android_chipset_property_ro_arch] = + cpuinfo_arm_android_decode_chipset_from_ro_arch(properties->ro_arch), + [cpuinfo_android_chipset_property_ro_chipname] = + cpuinfo_arm_android_decode_chipset_from_ro_chipname(properties->ro_chipname), + [cpuinfo_android_chipset_property_ro_hardware_chipname] = + cpuinfo_arm_android_decode_chipset_from_ro_chipname(properties->ro_hardware_chipname), + }; + enum cpuinfo_arm_chipset_vendor vendor = cpuinfo_arm_chipset_vendor_unknown; + for (size_t i = 0; i < cpuinfo_android_chipset_property_max; i++) { + const enum cpuinfo_arm_chipset_vendor decoded_vendor = chipsets[i].vendor; + if (decoded_vendor != cpuinfo_arm_chipset_vendor_unknown) { + if (vendor == cpuinfo_arm_chipset_vendor_unknown) { + vendor = decoded_vendor; + } else if (vendor != decoded_vendor) { + /* Parsing different system properties produces different chipset vendors. This situation is rare. */ + cpuinfo_log_error( + "chipset detection failed: different chipset vendors reported in different system properties"); + goto finish; + } + } + } + if (vendor == cpuinfo_arm_chipset_vendor_unknown) { + cpuinfo_log_warning( + "chipset detection failed: none of the system properties matched known signatures"); + goto finish; + } + + /* Fix common bugs in reported chipsets */ + for (size_t i = 0; i < cpuinfo_android_chipset_property_max; i++) { + cpuinfo_arm_fixup_chipset(&chipsets[i], cores, max_cpu_freq_max); + } + + /* + * Propagate suffixes: consider all pairs of chipsets, if both chipsets in the pair are from the same series, + * and one's suffix is a prefix of another's chipset suffix, use the longest suffix. + */ + for (size_t i = 0; i < cpuinfo_android_chipset_property_max; i++) { + const size_t chipset_i_suffix_length = strnlen(chipsets[i].suffix, CPUINFO_ARM_CHIPSET_SUFFIX_MAX); + for (size_t j = 0; j < i; j++) { + if (chipsets[i].series == chipsets[j].series) { + const size_t chipset_j_suffix_length = strnlen(chipsets[j].suffix, CPUINFO_ARM_CHIPSET_SUFFIX_MAX); + if (chipset_i_suffix_length != chipset_j_suffix_length) { + const size_t common_prefix_length = (chipset_i_suffix_length < chipset_j_suffix_length) ? + chipset_i_suffix_length : chipset_j_suffix_length; + if (common_prefix_length == 0 || + memcmp(chipsets[i].suffix, chipsets[j].suffix, common_prefix_length) == 0) + { + if (chipset_i_suffix_length > chipset_j_suffix_length) { + memcpy(chipsets[j].suffix, chipsets[i].suffix, chipset_i_suffix_length); + } else { + memcpy(chipsets[i].suffix, chipsets[j].suffix, chipset_j_suffix_length); + } + } + } + } + } + } + + for (size_t i = 0; i < cpuinfo_android_chipset_property_max; i++) { + if (chipsets[i].series != cpuinfo_arm_chipset_series_unknown) { + if (chipset.series == cpuinfo_arm_chipset_series_unknown) { + chipset = chipsets[i]; + } else if (chipsets[i].series != chipset.series || chipsets[i].model != chipset.model || + strncmp(chipsets[i].suffix, chipset.suffix, CPUINFO_ARM_CHIPSET_SUFFIX_MAX) != 0) + { + cpuinfo_log_info( + "different chipsets reported in different system properties; " + "vendor-specific disambiguation heuristic would be used"); + switch (vendor) { + case cpuinfo_arm_chipset_vendor_qualcomm: + return disambiguate_qualcomm_chipset( + &chipsets[cpuinfo_android_chipset_property_proc_cpuinfo_hardware], + &chipsets[cpuinfo_android_chipset_property_ro_product_board], + &chipsets[cpuinfo_android_chipset_property_ro_board_platform], + &chipsets[cpuinfo_android_chipset_property_ro_chipname], + &chipsets[cpuinfo_android_chipset_property_ro_hardware_chipname]); + case cpuinfo_arm_chipset_vendor_mediatek: + return disambiguate_mediatek_chipset( + &chipsets[cpuinfo_android_chipset_property_proc_cpuinfo_hardware], + &chipsets[cpuinfo_android_chipset_property_ro_product_board], + &chipsets[cpuinfo_android_chipset_property_ro_board_platform], + &chipsets[cpuinfo_android_chipset_property_ro_mediatek_platform], + &chipsets[cpuinfo_android_chipset_property_ro_chipname]); + case cpuinfo_arm_chipset_vendor_hisilicon: + return disambiguate_hisilicon_chipset( + &chipsets[cpuinfo_android_chipset_property_proc_cpuinfo_hardware], + &chipsets[cpuinfo_android_chipset_property_ro_product_board], + &chipsets[cpuinfo_android_chipset_property_ro_board_platform]); + case cpuinfo_arm_chipset_vendor_amlogic: + return disambiguate_amlogic_chipset( + &chipsets[cpuinfo_android_chipset_property_proc_cpuinfo_hardware], + &chipsets[cpuinfo_android_chipset_property_ro_board_platform]); + case cpuinfo_arm_chipset_vendor_marvell: + return disambiguate_marvell_chipset( + &chipsets[cpuinfo_android_chipset_property_proc_cpuinfo_hardware], + &chipsets[cpuinfo_android_chipset_property_ro_product_board], + &chipsets[cpuinfo_android_chipset_property_ro_chipname]); + case cpuinfo_arm_chipset_vendor_rockchip: + return disambiguate_rockchip_chipset( + &chipsets[cpuinfo_android_chipset_property_proc_cpuinfo_hardware], + &chipsets[cpuinfo_android_chipset_property_ro_product_board], + &chipsets[cpuinfo_android_chipset_property_ro_board_platform]); + case cpuinfo_arm_chipset_vendor_spreadtrum: + return disambiguate_spreadtrum_chipset( + &chipsets[cpuinfo_android_chipset_property_proc_cpuinfo_hardware], + &chipsets[cpuinfo_android_chipset_property_ro_product_board], + &chipsets[cpuinfo_android_chipset_property_ro_board_platform], + &chipsets[cpuinfo_android_chipset_property_ro_chipname]); + default: + cpuinfo_log_error( + "chipset detection failed: " + "could not disambiguate different chipsets reported in different system properties"); + /* chipset variable contains valid, but inconsistent chipset information, overwrite it */ + chipset = (struct cpuinfo_arm_chipset) { + .vendor = cpuinfo_arm_chipset_vendor_unknown, + .series = cpuinfo_arm_chipset_series_unknown, + }; + goto finish; + } + } + } + } + + finish: + return chipset; + } +#else /* !defined(__ANDROID__) */ + /* + * Fix commonly misreported Broadcom BCM models on Raspberry Pi boards. + * + * @param[in,out] chipset - chipset name to fix. + * @param[in] revision - /proc/cpuinfo Revision string. + */ + void cpuinfo_arm_fixup_raspberry_pi_chipset( + struct cpuinfo_arm_chipset chipset[restrict static 1], + const char revision[restrict static CPUINFO_HARDWARE_VALUE_MAX]) + { + const size_t revision_length = strnlen(revision, CPUINFO_REVISION_VALUE_MAX); + + /* Parse revision codes according to https://www.raspberrypi.org/documentation/hardware/raspberrypi/revision-codes/README.md */ + #if CPUINFO_ARCH_ARM + if (revision_length == 4) { + /* + * Old-style revision codes. + * All Raspberry Pi models with old-style revision code use Broadcom BCM2835. + */ + + /* BCM2835 often misreported as BCM2708 */ + if (chipset->model == 2708) { + chipset->model = 2835; + } + return; + } + #endif + if ((size_t) (revision_length - 5) <= (size_t) (8 - 5) /* 5 <= length(revision) <= 8 */) { + /* New-style revision codes */ + + uint32_t model = 0; + switch (revision[revision_length - 4]) { + case '0': + /* BCM2835 */ + model = 2835; + break; + case '1': + /* BCM2836 */ + model = 2836; + break; + case '2': + /* BCM2837 */ + model = 2837; + break; + case '3': + /* BCM2711 */ + model = 2711; + break; + } + + if (model != 0) { + chipset->model = model; + chipset->suffix[0] = 0; + } + } + } + + /* + * Decodes chipset name from /proc/cpuinfo Hardware string. + * For some chipsets, the function relies frequency and on number of cores for chipset detection. + * + * @param[in] hardware - /proc/cpuinfo Hardware string. + * @param cores - number of cores in the chipset. + * @param max_cpu_freq_max - maximum of /sys/devices/system/cpu/cpu/cpofreq/cpu_freq_max values. + * + * @returns Decoded chipset name. If chipset could not be decoded, the resulting structure would use `unknown` vendor + * and series identifiers. + */ + struct cpuinfo_arm_chipset cpuinfo_arm_linux_decode_chipset( + const char hardware[restrict static CPUINFO_HARDWARE_VALUE_MAX], + const char revision[restrict static CPUINFO_REVISION_VALUE_MAX], + uint32_t cores, + uint32_t max_cpu_freq_max) + { + struct cpuinfo_arm_chipset chipset = + cpuinfo_arm_linux_decode_chipset_from_proc_cpuinfo_hardware( + hardware, cores, max_cpu_freq_max, false); + if (chipset.vendor == cpuinfo_arm_chipset_vendor_unknown) { + cpuinfo_log_warning( + "chipset detection failed: /proc/cpuinfo Hardware string did not match known signatures"); + } else if (chipset.vendor == cpuinfo_arm_chipset_vendor_broadcom) { + /* Raspberry Pi kernel reports bogus chipset models; detect chipset from RPi revision */ + cpuinfo_arm_fixup_raspberry_pi_chipset(&chipset, revision); + } else { + cpuinfo_arm_fixup_chipset(&chipset, cores, max_cpu_freq_max); + } + return chipset; + } + +#endif diff --git a/dep/cpuinfo/src/arm/linux/clusters.c b/dep/cpuinfo/src/arm/linux/clusters.c new file mode 100644 index 000000000..c7a40457d --- /dev/null +++ b/dep/cpuinfo/src/arm/linux/clusters.c @@ -0,0 +1,493 @@ +#include +#include +#include +#include + +#include +#include +#if defined(__ANDROID__) + #include +#endif +#include +#include +#include +#include +#include + +static inline bool bitmask_all(uint32_t bitfield, uint32_t mask) { + return (bitfield & mask) == mask; +} + +/* + * Assigns logical processors to clusters of cores using heuristic based on the typical configuration of clusters for + * 5, 6, 8, and 10 cores: + * - 5 cores (ARM32 Android only): 2 clusters of 4+1 cores + * - 6 cores: 2 clusters of 4+2 cores + * - 8 cores: 2 clusters of 4+4 cores + * - 10 cores: 3 clusters of 4+4+2 cores + * + * The function must be called after parsing OS-provided information on core clusters. + * Its purpose is to detect clusters of cores when OS-provided information is lacking or incomplete, i.e. + * - Linux kernel is not configured to report information in sysfs topology leaf. + * - Linux kernel reports topology information only for online cores, and only cores on one cluster are online, e.g.: + * - Exynos 8890 has 8 cores in 4+4 clusters, but only the first cluster of 4 cores is reported, and cluster + * configuration of logical processors 4-7 is not reported (all remaining processors 4-7 form cluster 1) + * - MT6797 has 10 cores in 4+4+2, but only the first cluster of 4 cores is reported, and cluster configuration + * of logical processors 4-9 is not reported (processors 4-7 form cluster 1, and processors 8-9 form cluster 2). + * + * Heuristic assignment of processors to the above pre-defined clusters fails if such assignment would contradict + * information provided by the operating system: + * - Any of the OS-reported processor clusters is different than the corresponding heuristic cluster. + * - Processors in a heuristic cluster have no OS-provided cluster siblings information, but have known and different + * minimum/maximum frequency. + * - Processors in a heuristic cluster have no OS-provided cluster siblings information, but have known and different + * MIDR components. + * + * If the heuristic assignment of processors to clusters of cores fails, all processors' clusters are unchanged. + * + * @param usable_processors - number of processors in the @p processors array with CPUINFO_LINUX_FLAG_VALID flags. + * @param max_processors - number of elements in the @p processors array. + * @param[in,out] processors - processor descriptors with pre-parsed POSSIBLE and PRESENT flags, minimum/maximum + * frequency, MIDR infromation, and core cluster (package siblings list) information. + * + * @retval true if the heuristic successfully assigned all processors into clusters of cores. + * @retval false if known details about processors contradict the heuristic configuration of core clusters. + */ +bool cpuinfo_arm_linux_detect_core_clusters_by_heuristic( + uint32_t usable_processors, + uint32_t max_processors, + struct cpuinfo_arm_linux_processor processors[restrict static max_processors]) +{ + uint32_t cluster_processors[3]; + switch (usable_processors) { + case 10: + cluster_processors[0] = 4; + cluster_processors[1] = 4; + cluster_processors[2] = 2; + break; + case 8: + cluster_processors[0] = 4; + cluster_processors[1] = 4; + break; + case 6: + cluster_processors[0] = 4; + cluster_processors[1] = 2; + break; +#if defined(__ANDROID__) && CPUINFO_ARCH_ARM + case 5: + /* + * The only processor with 5 cores is Leadcore L1860C (ARMv7, mobile), + * but this configuration is not too unreasonable for a virtualized ARM server. + */ + cluster_processors[0] = 4; + cluster_processors[1] = 1; + break; +#endif + default: + return false; + } + + /* + * Assignment of processors to core clusters is done in two passes: + * 1. Verify that the clusters proposed by heuristic are compatible with known details about processors. + * 2. If verification passed, update core clusters for the processors. + */ + + uint32_t cluster = 0; + uint32_t expected_cluster_processors = 0; + uint32_t cluster_start, cluster_flags, cluster_midr, cluster_max_frequency, cluster_min_frequency; + bool expected_cluster_exists; + for (uint32_t i = 0; i < max_processors; i++) { + if (bitmask_all(processors[i].flags, CPUINFO_LINUX_FLAG_VALID)) { + if (expected_cluster_processors == 0) { + /* Expect this processor to start a new cluster */ + + expected_cluster_exists = !!(processors[i].flags & CPUINFO_LINUX_FLAG_PACKAGE_CLUSTER); + if (expected_cluster_exists) { + if (processors[i].package_leader_id != i) { + cpuinfo_log_debug( + "heuristic detection of core clusters failed: " + "processor %"PRIu32" is expected to start a new cluster #%"PRIu32" with %"PRIu32" cores, " + "but system siblings lists reported it as a sibling of processor %"PRIu32, + i, cluster, cluster_processors[cluster], processors[i].package_leader_id); + return false; + } + } else { + cluster_flags = 0; + } + + cluster_start = i; + expected_cluster_processors = cluster_processors[cluster++]; + } else { + /* Expect this processor to belong to the same cluster as processor */ + + if (expected_cluster_exists) { + /* + * The cluster suggested by the heuristic was already parsed from system siblings lists. + * For all processors we expect in the cluster, check that: + * - They have pre-assigned cluster from siblings lists (CPUINFO_LINUX_FLAG_PACKAGE_CLUSTER flag). + * - They were assigned to the same cluster based on siblings lists + * (package_leader_id points to the first processor in the cluster). + */ + + if ((processors[i].flags & CPUINFO_LINUX_FLAG_PACKAGE_CLUSTER) == 0) { + cpuinfo_log_debug( + "heuristic detection of core clusters failed: " + "processor %"PRIu32" is expected to belong to the cluster of processor %"PRIu32", " + "but system siblings lists did not report it as a sibling of processor %"PRIu32, + i, cluster_start, cluster_start); + return false; + } + if (processors[i].package_leader_id != cluster_start) { + cpuinfo_log_debug( + "heuristic detection of core clusters failed: " + "processor %"PRIu32" is expected to belong to the cluster of processor %"PRIu32", " + "but system siblings lists reported it to belong to the cluster of processor %"PRIu32, + i, cluster_start, cluster_start); + return false; + } + } else { + /* + * The cluster suggest by the heuristic was not parsed from system siblings lists. + * For all processors we expect in the cluster, check that: + * - They have no pre-assigned cluster from siblings lists. + * - If their min/max CPU frequency is known, it is the same. + * - If any part of their MIDR (Implementer, Variant, Part, Revision) is known, it is the same. + */ + + if (processors[i].flags & CPUINFO_LINUX_FLAG_PACKAGE_CLUSTER) { + cpuinfo_log_debug( + "heuristic detection of core clusters failed: " + "processor %"PRIu32" is expected to be unassigned to any cluster, " + "but system siblings lists reported it to belong to the cluster of processor %"PRIu32, + i, processors[i].package_leader_id); + return false; + } + + if (processors[i].flags & CPUINFO_LINUX_FLAG_MIN_FREQUENCY) { + if (cluster_flags & CPUINFO_LINUX_FLAG_MIN_FREQUENCY) { + if (cluster_min_frequency != processors[i].min_frequency) { + cpuinfo_log_debug( + "heuristic detection of core clusters failed: " + "minimum frequency of processor %"PRIu32" (%"PRIu32" KHz) is different than of its expected cluster (%"PRIu32" KHz)", + i, processors[i].min_frequency, cluster_min_frequency); + return false; + } + } else { + cluster_min_frequency = processors[i].min_frequency; + cluster_flags |= CPUINFO_LINUX_FLAG_MIN_FREQUENCY; + } + } + + if (processors[i].flags & CPUINFO_LINUX_FLAG_MAX_FREQUENCY) { + if (cluster_flags & CPUINFO_LINUX_FLAG_MAX_FREQUENCY) { + if (cluster_max_frequency != processors[i].max_frequency) { + cpuinfo_log_debug( + "heuristic detection of core clusters failed: " + "maximum frequency of processor %"PRIu32" (%"PRIu32" KHz) is different than of its expected cluster (%"PRIu32" KHz)", + i, processors[i].max_frequency, cluster_max_frequency); + return false; + } + } else { + cluster_max_frequency = processors[i].max_frequency; + cluster_flags |= CPUINFO_LINUX_FLAG_MAX_FREQUENCY; + } + } + + if (processors[i].flags & CPUINFO_ARM_LINUX_VALID_IMPLEMENTER) { + if (cluster_flags & CPUINFO_ARM_LINUX_VALID_IMPLEMENTER) { + if ((cluster_midr & CPUINFO_ARM_MIDR_IMPLEMENTER_MASK) != (processors[i].midr & CPUINFO_ARM_MIDR_IMPLEMENTER_MASK)) { + cpuinfo_log_debug( + "heuristic detection of core clusters failed: " + "CPU Implementer of processor %"PRIu32" (0x%02"PRIx32") is different than of its expected cluster (0x%02"PRIx32")", + i, midr_get_implementer(processors[i].midr), midr_get_implementer(cluster_midr)); + return false; + } + } else { + cluster_midr = midr_copy_implementer(cluster_midr, processors[i].midr); + cluster_flags |= CPUINFO_ARM_LINUX_VALID_IMPLEMENTER; + } + } + + if (processors[i].flags & CPUINFO_ARM_LINUX_VALID_VARIANT) { + if (cluster_flags & CPUINFO_ARM_LINUX_VALID_VARIANT) { + if ((cluster_midr & CPUINFO_ARM_MIDR_VARIANT_MASK) != (processors[i].midr & CPUINFO_ARM_MIDR_VARIANT_MASK)) { + cpuinfo_log_debug( + "heuristic detection of core clusters failed: " + "CPU Variant of processor %"PRIu32" (0x%"PRIx32") is different than of its expected cluster (0x%"PRIx32")", + i, midr_get_variant(processors[i].midr), midr_get_variant(cluster_midr)); + return false; + } + } else { + cluster_midr = midr_copy_variant(cluster_midr, processors[i].midr); + cluster_flags |= CPUINFO_ARM_LINUX_VALID_VARIANT; + } + } + + if (processors[i].flags & CPUINFO_ARM_LINUX_VALID_PART) { + if (cluster_flags & CPUINFO_ARM_LINUX_VALID_PART) { + if ((cluster_midr & CPUINFO_ARM_MIDR_PART_MASK) != (processors[i].midr & CPUINFO_ARM_MIDR_PART_MASK)) { + cpuinfo_log_debug( + "heuristic detection of core clusters failed: " + "CPU Part of processor %"PRIu32" (0x%03"PRIx32") is different than of its expected cluster (0x%03"PRIx32")", + i, midr_get_part(processors[i].midr), midr_get_part(cluster_midr)); + return false; + } + } else { + cluster_midr = midr_copy_part(cluster_midr, processors[i].midr); + cluster_flags |= CPUINFO_ARM_LINUX_VALID_PART; + } + } + + if (processors[i].flags & CPUINFO_ARM_LINUX_VALID_REVISION) { + if (cluster_flags & CPUINFO_ARM_LINUX_VALID_REVISION) { + if ((cluster_midr & CPUINFO_ARM_MIDR_REVISION_MASK) != (processors[i].midr & CPUINFO_ARM_MIDR_REVISION_MASK)) { + cpuinfo_log_debug( + "heuristic detection of core clusters failed: " + "CPU Revision of processor %"PRIu32" (0x%"PRIx32") is different than of its expected cluster (0x%"PRIx32")", + i, midr_get_revision(cluster_midr), midr_get_revision(processors[i].midr)); + return false; + } + } else { + cluster_midr = midr_copy_revision(cluster_midr, processors[i].midr); + cluster_flags |= CPUINFO_ARM_LINUX_VALID_REVISION; + } + } + } + } + expected_cluster_processors--; + } + } + + /* Verification passed, assign all processors to new clusters */ + cluster = 0; + expected_cluster_processors = 0; + for (uint32_t i = 0; i < max_processors; i++) { + if (bitmask_all(processors[i].flags, CPUINFO_LINUX_FLAG_VALID)) { + if (expected_cluster_processors == 0) { + /* Expect this processor to start a new cluster */ + + cluster_start = i; + expected_cluster_processors = cluster_processors[cluster++]; + } else { + /* Expect this processor to belong to the same cluster as processor */ + + if (!(processors[i].flags & CPUINFO_LINUX_FLAG_PACKAGE_CLUSTER)) { + cpuinfo_log_debug("assigned processor %"PRIu32" to cluster of processor %"PRIu32" based on heuristic", + i, cluster_start); + } + + processors[i].package_leader_id = cluster_start; + processors[i].flags |= CPUINFO_LINUX_FLAG_PACKAGE_CLUSTER; + } + expected_cluster_processors--; + } + } + return true; +} + +/* + * Assigns logical processors to clusters of cores in sequential manner: + * - Clusters detected from OS-provided information are unchanged: + * - Processors assigned to these clusters stay assigned to the same clusters + * - No new processors are added to these clusters + * - Processors without pre-assigned cluster are clustered in one sequential scan: + * - If known details (min/max frequency, MIDR components) of a processor are compatible with a preceeding + * processor, without pre-assigned cluster, the processor is assigned to the cluster of the preceeding processor. + * - If known details (min/max frequency, MIDR components) of a processor are not compatible with a preceeding + * processor, the processor is assigned to a newly created cluster. + * + * The function must be called after parsing OS-provided information on core clusters, and usually is called only + * if heuristic assignment of processors to clusters (cpuinfo_arm_linux_cluster_processors_by_heuristic) failed. + * + * Its purpose is to detect clusters of cores when OS-provided information is lacking or incomplete, i.e. + * - Linux kernel is not configured to report information in sysfs topology leaf. + * - Linux kernel reports topology information only for online cores, and all cores on some of the clusters are offline. + * + * Sequential assignment of processors to clusters always succeeds, and upon exit, all usable processors in the + * @p processors array have cluster information. + * + * @param max_processors - number of elements in the @p processors array. + * @param[in,out] processors - processor descriptors with pre-parsed POSSIBLE and PRESENT flags, minimum/maximum + * frequency, MIDR infromation, and core cluster (package siblings list) information. + * + * @retval true if the heuristic successfully assigned all processors into clusters of cores. + * @retval false if known details about processors contradict the heuristic configuration of core clusters. + */ +void cpuinfo_arm_linux_detect_core_clusters_by_sequential_scan( + uint32_t max_processors, + struct cpuinfo_arm_linux_processor processors[restrict static max_processors]) +{ + uint32_t cluster_flags = 0; + uint32_t cluster_processors = 0; + uint32_t cluster_start, cluster_midr, cluster_max_frequency, cluster_min_frequency; + for (uint32_t i = 0; i < max_processors; i++) { + if ((processors[i].flags & (CPUINFO_LINUX_FLAG_VALID | CPUINFO_LINUX_FLAG_PACKAGE_CLUSTER)) == CPUINFO_LINUX_FLAG_VALID) { + if (cluster_processors == 0) { + goto new_cluster; + } + + if (processors[i].flags & CPUINFO_LINUX_FLAG_MIN_FREQUENCY) { + if (cluster_flags & CPUINFO_LINUX_FLAG_MIN_FREQUENCY) { + if (cluster_min_frequency != processors[i].min_frequency) { + cpuinfo_log_info( + "minimum frequency of processor %"PRIu32" (%"PRIu32" KHz) is different than of preceeding cluster (%"PRIu32" KHz); " + "processor %"PRIu32" starts to a new cluster", + i, processors[i].min_frequency, cluster_min_frequency, i); + goto new_cluster; + } + } else { + cluster_min_frequency = processors[i].min_frequency; + cluster_flags |= CPUINFO_LINUX_FLAG_MIN_FREQUENCY; + } + } + + if (processors[i].flags & CPUINFO_LINUX_FLAG_MAX_FREQUENCY) { + if (cluster_flags & CPUINFO_LINUX_FLAG_MAX_FREQUENCY) { + if (cluster_max_frequency != processors[i].max_frequency) { + cpuinfo_log_debug( + "maximum frequency of processor %"PRIu32" (%"PRIu32" KHz) is different than of preceeding cluster (%"PRIu32" KHz); " + "processor %"PRIu32" starts a new cluster", + i, processors[i].max_frequency, cluster_max_frequency, i); + goto new_cluster; + } + } else { + cluster_max_frequency = processors[i].max_frequency; + cluster_flags |= CPUINFO_LINUX_FLAG_MAX_FREQUENCY; + } + } + + if (processors[i].flags & CPUINFO_ARM_LINUX_VALID_IMPLEMENTER) { + if (cluster_flags & CPUINFO_ARM_LINUX_VALID_IMPLEMENTER) { + if ((cluster_midr & CPUINFO_ARM_MIDR_IMPLEMENTER_MASK) != (processors[i].midr & CPUINFO_ARM_MIDR_IMPLEMENTER_MASK)) { + cpuinfo_log_debug( + "CPU Implementer of processor %"PRIu32" (0x%02"PRIx32") is different than of preceeding cluster (0x%02"PRIx32"); " + "processor %"PRIu32" starts to a new cluster", + i, midr_get_implementer(processors[i].midr), midr_get_implementer(cluster_midr), i); + goto new_cluster; + } + } else { + cluster_midr = midr_copy_implementer(cluster_midr, processors[i].midr); + cluster_flags |= CPUINFO_ARM_LINUX_VALID_IMPLEMENTER; + } + } + + if (processors[i].flags & CPUINFO_ARM_LINUX_VALID_VARIANT) { + if (cluster_flags & CPUINFO_ARM_LINUX_VALID_VARIANT) { + if ((cluster_midr & CPUINFO_ARM_MIDR_VARIANT_MASK) != (processors[i].midr & CPUINFO_ARM_MIDR_VARIANT_MASK)) { + cpuinfo_log_debug( + "CPU Variant of processor %"PRIu32" (0x%"PRIx32") is different than of its expected cluster (0x%"PRIx32")" + "processor %"PRIu32" starts to a new cluster", + i, midr_get_variant(processors[i].midr), midr_get_variant(cluster_midr), i); + goto new_cluster; + } + } else { + cluster_midr = midr_copy_variant(cluster_midr, processors[i].midr); + cluster_flags |= CPUINFO_ARM_LINUX_VALID_VARIANT; + } + } + + if (processors[i].flags & CPUINFO_ARM_LINUX_VALID_PART) { + if (cluster_flags & CPUINFO_ARM_LINUX_VALID_PART) { + if ((cluster_midr & CPUINFO_ARM_MIDR_PART_MASK) != (processors[i].midr & CPUINFO_ARM_MIDR_PART_MASK)) { + cpuinfo_log_debug( + "CPU Part of processor %"PRIu32" (0x%03"PRIx32") is different than of its expected cluster (0x%03"PRIx32")" + "processor %"PRIu32" starts to a new cluster", + i, midr_get_part(processors[i].midr), midr_get_part(cluster_midr), i); + goto new_cluster; + } + } else { + cluster_midr = midr_copy_part(cluster_midr, processors[i].midr); + cluster_flags |= CPUINFO_ARM_LINUX_VALID_PART; + } + } + + if (processors[i].flags & CPUINFO_ARM_LINUX_VALID_REVISION) { + if (cluster_flags & CPUINFO_ARM_LINUX_VALID_REVISION) { + if ((cluster_midr & CPUINFO_ARM_MIDR_REVISION_MASK) != (processors[i].midr & CPUINFO_ARM_MIDR_REVISION_MASK)) { + cpuinfo_log_debug( + "CPU Revision of processor %"PRIu32" (0x%"PRIx32") is different than of its expected cluster (0x%"PRIx32")" + "processor %"PRIu32" starts to a new cluster", + i, midr_get_revision(cluster_midr), midr_get_revision(processors[i].midr), i); + goto new_cluster; + } + } else { + cluster_midr = midr_copy_revision(cluster_midr, processors[i].midr); + cluster_flags |= CPUINFO_ARM_LINUX_VALID_REVISION; + } + } + + /* All checks passed, attach processor to the preceeding cluster */ + cluster_processors++; + processors[i].package_leader_id = cluster_start; + processors[i].flags |= CPUINFO_LINUX_FLAG_PACKAGE_CLUSTER; + cpuinfo_log_debug("assigned processor %"PRIu32" to preceeding cluster of processor %"PRIu32, i, cluster_start); + continue; + +new_cluster: + /* Create a new cluster starting with processor i */ + cluster_start = i; + processors[i].package_leader_id = i; + processors[i].flags |= CPUINFO_LINUX_FLAG_PACKAGE_CLUSTER; + cluster_processors = 1; + + /* Copy known information from processor to cluster, and set the flags accordingly */ + cluster_flags = 0; + if (processors[i].flags & CPUINFO_LINUX_FLAG_MIN_FREQUENCY) { + cluster_min_frequency = processors[i].min_frequency; + cluster_flags |= CPUINFO_LINUX_FLAG_MIN_FREQUENCY; + } + if (processors[i].flags & CPUINFO_LINUX_FLAG_MAX_FREQUENCY) { + cluster_max_frequency = processors[i].max_frequency; + cluster_flags |= CPUINFO_LINUX_FLAG_MAX_FREQUENCY; + } + if (processors[i].flags & CPUINFO_ARM_LINUX_VALID_IMPLEMENTER) { + cluster_midr = midr_copy_implementer(cluster_midr, processors[i].midr); + cluster_flags |= CPUINFO_ARM_LINUX_VALID_IMPLEMENTER; + } + if (processors[i].flags & CPUINFO_ARM_LINUX_VALID_VARIANT) { + cluster_midr = midr_copy_variant(cluster_midr, processors[i].midr); + cluster_flags |= CPUINFO_ARM_LINUX_VALID_VARIANT; + } + if (processors[i].flags & CPUINFO_ARM_LINUX_VALID_PART) { + cluster_midr = midr_copy_part(cluster_midr, processors[i].midr); + cluster_flags |= CPUINFO_ARM_LINUX_VALID_PART; + } + if (processors[i].flags & CPUINFO_ARM_LINUX_VALID_REVISION) { + cluster_midr = midr_copy_revision(cluster_midr, processors[i].midr); + cluster_flags |= CPUINFO_ARM_LINUX_VALID_REVISION; + } + } + } +} + +/* + * Counts the number of logical processors in each core cluster. + * This function should be called after all processors are assigned to core clusters. + * + * @param max_processors - number of elements in the @p processors array. + * @param[in,out] processors - processor descriptors with pre-parsed POSSIBLE and PRESENT flags, + * and decoded core cluster (package_leader_id) information. + * The function expects the value of processors[i].package_processor_count to be zero. + * Upon return, processors[i].package_processor_count will contain the number of logical + * processors in the respective core cluster. + */ +void cpuinfo_arm_linux_count_cluster_processors( + uint32_t max_processors, + struct cpuinfo_arm_linux_processor processors[restrict static max_processors]) +{ + /* First pass: accumulate the number of processors at the group leader's package_processor_count */ + for (uint32_t i = 0; i < max_processors; i++) { + if (bitmask_all(processors[i].flags, CPUINFO_LINUX_FLAG_VALID)) { + const uint32_t package_leader_id = processors[i].package_leader_id; + processors[package_leader_id].package_processor_count += 1; + } + } + /* Second pass: copy the package_processor_count from the group leader processor */ + for (uint32_t i = 0; i < max_processors; i++) { + if (bitmask_all(processors[i].flags, CPUINFO_LINUX_FLAG_VALID)) { + const uint32_t package_leader_id = processors[i].package_leader_id; + processors[i].package_processor_count = processors[package_leader_id].package_processor_count; + } + } +} diff --git a/dep/cpuinfo/src/arm/linux/cp.h b/dep/cpuinfo/src/arm/linux/cp.h new file mode 100644 index 000000000..63940ec5d --- /dev/null +++ b/dep/cpuinfo/src/arm/linux/cp.h @@ -0,0 +1,44 @@ +#include + + +#if CPUINFO_MOCK + extern uint32_t cpuinfo_arm_fpsid; + extern uint32_t cpuinfo_arm_mvfr0; + extern uint32_t cpuinfo_arm_wcid; + + static inline uint32_t read_fpsid(void) { + return cpuinfo_arm_fpsid; + } + + static inline uint32_t read_mvfr0(void) { + return cpuinfo_arm_mvfr0; + } + + static inline uint32_t read_wcid(void) { + return cpuinfo_arm_wcid; + } +#else + #if !defined(__ARM_ARCH_7A__) && !defined(__ARM_ARCH_8A__) && !(defined(__ARM_ARCH) && (__ARM_ARCH >= 7)) + /* + * CoProcessor 10 is inaccessible from user mode since ARMv7, + * and clang refuses to compile inline assembly when targeting ARMv7+ + */ + static inline uint32_t read_fpsid(void) { + uint32_t fpsid; + __asm__ __volatile__("MRC p10, 0x7, %[fpsid], cr0, cr0, 0" : [fpsid] "=r" (fpsid)); + return fpsid; + } + + static inline uint32_t read_mvfr0(void) { + uint32_t mvfr0; + __asm__ __volatile__("MRC p10, 0x7, %[mvfr0], cr7, cr0, 0" : [mvfr0] "=r" (mvfr0)); + return mvfr0; + } + #endif + + static inline uint32_t read_wcid(void) { + uint32_t wcid; + __asm__ __volatile__("MRC p1, 0, %[wcid], c0, c0" : [wcid] "=r" (wcid)); + return wcid; + } +#endif diff --git a/dep/cpuinfo/src/arm/linux/cpuinfo.c b/dep/cpuinfo/src/arm/linux/cpuinfo.c new file mode 100644 index 000000000..90e1631ee --- /dev/null +++ b/dep/cpuinfo/src/arm/linux/cpuinfo.c @@ -0,0 +1,908 @@ +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +/* + * Size, in chars, of the on-stack buffer used for parsing lines of /proc/cpuinfo. + * This is also the limit on the length of a single line. + */ +#define BUFFER_SIZE 1024 + + +static uint32_t parse_processor_number( + const char* processor_start, + const char* processor_end) +{ + const size_t processor_length = (size_t) (processor_end - processor_start); + + if (processor_length == 0) { + cpuinfo_log_warning("Processor number in /proc/cpuinfo is ignored: string is empty"); + return 0; + } + + uint32_t processor_number = 0; + for (const char* digit_ptr = processor_start; digit_ptr != processor_end; digit_ptr++) { + const uint32_t digit = (uint32_t) (*digit_ptr - '0'); + if (digit > 10) { + cpuinfo_log_warning("non-decimal suffix %.*s in /proc/cpuinfo processor number is ignored", + (int) (processor_end - digit_ptr), digit_ptr); + break; + } + + processor_number = processor_number * 10 + digit; + } + + return processor_number; +} + +/* + * Full list of ARM features reported in /proc/cpuinfo: + * + * * swp - support for SWP instruction (deprecated in ARMv7, can be removed in future) + * * half - support for half-word loads and stores. These instruction are part of ARMv4, + * so no need to check it on supported CPUs. + * * thumb - support for 16-bit Thumb instruction set. Note that BX instruction is detected + * by ARMv4T architecture, not by this flag. + * * 26bit - old CPUs merged 26-bit PC and program status register (flags) into 32-bit PC + * and had special instructions for working with packed PC. Now it is all deprecated. + * * fastmult - most old ARM CPUs could only compute 2 bits of multiplication result per clock + * cycle, but CPUs with M suffix (e.g. ARM7TDMI) could compute 4 bits per cycle. + * Of course, now it makes no sense. + * * fpa - floating point accelerator available. On original ARM ABI all floating-point operations + * generated FPA instructions. If FPA was not available, these instructions generated + * "illegal operation" interrupts, and the OS processed them by emulating the FPA instructions. + * Debian used this ABI before it switched to EABI. Now FPA is deprecated. + * * vfp - vector floating point instructions. Available on most modern CPUs (as part of VFPv3). + * Required by Android ARMv7A ABI and by Ubuntu on ARM. + * Note: there is no flag for VFPv2. + * * edsp - V5E instructions: saturating add/sub and 16-bit x 16-bit -> 32/64-bit multiplications. + * Required on Android, supported by all CPUs in production. + * * java - Jazelle extension. Supported on most CPUs. + * * iwmmxt - Intel/Marvell Wireless MMX instructions. 64-bit integer SIMD. + * Supported on XScale (Since PXA270) and Sheeva (PJ1, PJ4) architectures. + * Note that there is no flag for WMMX2 instructions. + * * crunch - Maverick Crunch instructions. Junk. + * * thumbee - ThumbEE instructions. Almost no documentation is available. + * * neon - NEON instructions (aka Advanced SIMD). MVFR1 register gives more + * fine-grained information on particular supported features, but + * the Linux kernel exports only a single flag for all of them. + * According to ARMv7A docs it also implies the availability of VFPv3 + * (with 32 double-precision registers d0-d31). + * * vfpv3 - VFPv3 instructions. Available on most modern CPUs. Augment VFPv2 by + * conversion to/from integers and load constant instructions. + * Required by Android ARMv7A ABI and by Ubuntu on ARM. + * * vfpv3d16 - VFPv3 instructions with only 16 double-precision registers (d0-d15). + * * tls - software thread ID registers. + * Used by kernel (and likely libc) for efficient implementation of TLS. + * * vfpv4 - fused multiply-add instructions. + * * idiva - DIV instructions available in ARM mode. + * * idivt - DIV instructions available in Thumb mode. + * * vfpd32 - VFP (of any version) with 32 double-precision registers d0-d31. + * * lpae - Large Physical Address Extension (physical address up to 40 bits). + * * evtstrm - generation of Event Stream by timer. + * * aes - AES instructions. + * * pmull - Polinomial Multiplication instructions. + * * sha1 - SHA1 instructions. + * * sha2 - SHA2 instructions. + * * crc32 - CRC32 instructions. + * + * /proc/cpuinfo on ARM is populated in file arch/arm/kernel/setup.c in Linux kernel + * Note that some devices may use patched Linux kernels with different feature names. + * However, the names above were checked on a large number of /proc/cpuinfo listings. + */ +static void parse_features( + const char* features_start, + const char* features_end, + struct cpuinfo_arm_linux_processor processor[restrict static 1]) +{ + const char* feature_start = features_start; + const char* feature_end; + + /* Mark the features as valid */ + processor->flags |= CPUINFO_ARM_LINUX_VALID_FEATURES | CPUINFO_ARM_LINUX_VALID_PROCESSOR; + + do { + feature_end = feature_start + 1; + for (; feature_end != features_end; feature_end++) { + if (*feature_end == ' ') { + break; + } + } + const size_t feature_length = (size_t) (feature_end - feature_start); + + switch (feature_length) { + case 2: + if (memcmp(feature_start, "fp", feature_length) == 0) { +#if CPUINFO_ARCH_ARM64 + processor->features |= CPUINFO_ARM_LINUX_FEATURE_FP; +#endif +#if CPUINFO_ARCH_ARM + } else if (memcmp(feature_start, "wp", feature_length) == 0) { + /* + * Some AArch64 kernels, including the one on Nexus 5X, + * erroneously report "swp" as "wp" to AArch32 programs + */ + processor->features |= CPUINFO_ARM_LINUX_FEATURE_SWP; +#endif + } else { + goto unexpected; + } + break; + case 3: + if (memcmp(feature_start, "aes", feature_length) == 0) { + #if CPUINFO_ARCH_ARM + processor->features2 |= CPUINFO_ARM_LINUX_FEATURE2_AES; + #elif CPUINFO_ARCH_ARM64 + processor->features |= CPUINFO_ARM_LINUX_FEATURE_AES; + #endif +#if CPUINFO_ARCH_ARM + } else if (memcmp(feature_start, "swp", feature_length) == 0) { + processor->features |= CPUINFO_ARM_LINUX_FEATURE_SWP; + } else if (memcmp(feature_start, "fpa", feature_length) == 0) { + processor->features |= CPUINFO_ARM_LINUX_FEATURE_FPA; + } else if (memcmp(feature_start, "vfp", feature_length) == 0) { + processor->features |= CPUINFO_ARM_LINUX_FEATURE_VFP; + } else if (memcmp(feature_start, "tls", feature_length) == 0) { + processor->features |= CPUINFO_ARM_LINUX_FEATURE_TLS; +#endif /* CPUINFO_ARCH_ARM */ + } else { + goto unexpected; + } + break; + case 4: + if (memcmp(feature_start, "sha1", feature_length) == 0) { + #if CPUINFO_ARCH_ARM + processor->features2 |= CPUINFO_ARM_LINUX_FEATURE2_SHA1; + #elif CPUINFO_ARCH_ARM64 + processor->features |= CPUINFO_ARM_LINUX_FEATURE_SHA1; + #endif + } else if (memcmp(feature_start, "sha2", feature_length) == 0) { + #if CPUINFO_ARCH_ARM + processor->features2 |= CPUINFO_ARM_LINUX_FEATURE2_SHA2; + #elif CPUINFO_ARCH_ARM64 + processor->features |= CPUINFO_ARM_LINUX_FEATURE_SHA2; + #endif + } else if (memcmp(feature_start, "fphp", feature_length) == 0) { + #if CPUINFO_ARCH_ARM64 + processor->features |= CPUINFO_ARM_LINUX_FEATURE_FPHP; + #endif + } else if (memcmp(feature_start, "fcma", feature_length) == 0) { + #if CPUINFO_ARCH_ARM64 + processor->features |= CPUINFO_ARM_LINUX_FEATURE_FCMA; + #endif +#if CPUINFO_ARCH_ARM + } else if (memcmp(feature_start, "half", feature_length) == 0) { + processor->features |= CPUINFO_ARM_LINUX_FEATURE_HALF; + } else if (memcmp(feature_start, "edsp", feature_length) == 0) { + processor->features |= CPUINFO_ARM_LINUX_FEATURE_EDSP; + } else if (memcmp(feature_start, "java", feature_length) == 0) { + processor->features |= CPUINFO_ARM_LINUX_FEATURE_JAVA; + } else if (memcmp(feature_start, "neon", feature_length) == 0) { + processor->features |= CPUINFO_ARM_LINUX_FEATURE_NEON; + } else if (memcmp(feature_start, "lpae", feature_length) == 0) { + processor->features |= CPUINFO_ARM_LINUX_FEATURE_LPAE; + } else if (memcmp(feature_start, "tlsi", feature_length) == 0) { + /* + * Some AArch64 kernels, including the one on Nexus 5X, + * erroneously report "tls" as "tlsi" to AArch32 programs + */ + processor->features |= CPUINFO_ARM_LINUX_FEATURE_TLS; +#endif /* CPUINFO_ARCH_ARM */ + } else { + goto unexpected; + } + break; + case 5: + if (memcmp(feature_start, "pmull", feature_length) == 0) { + #if CPUINFO_ARCH_ARM + processor->features2 |= CPUINFO_ARM_LINUX_FEATURE2_PMULL; + #elif CPUINFO_ARCH_ARM64 + processor->features |= CPUINFO_ARM_LINUX_FEATURE_PMULL; + #endif + } else if (memcmp(feature_start, "crc32", feature_length) == 0) { + #if CPUINFO_ARCH_ARM + processor->features2 |= CPUINFO_ARM_LINUX_FEATURE2_CRC32; + #elif CPUINFO_ARCH_ARM64 + processor->features |= CPUINFO_ARM_LINUX_FEATURE_CRC32; + #endif + } else if (memcmp(feature_start, "asimd", feature_length) == 0) { + #if CPUINFO_ARCH_ARM64 + processor->features |= CPUINFO_ARM_LINUX_FEATURE_ASIMD; + #endif + } else if (memcmp(feature_start, "cpuid", feature_length) == 0) { + #if CPUINFO_ARCH_ARM64 + processor->features |= CPUINFO_ARM_LINUX_FEATURE_CPUID; + #endif + } else if (memcmp(feature_start, "jscvt", feature_length) == 0) { + #if CPUINFO_ARCH_ARM64 + processor->features |= CPUINFO_ARM_LINUX_FEATURE_JSCVT; + #endif + } else if (memcmp(feature_start, "lrcpc", feature_length) == 0) { + #if CPUINFO_ARCH_ARM64 + processor->features |= CPUINFO_ARM_LINUX_FEATURE_LRCPC; + #endif +#if CPUINFO_ARCH_ARM + } else if (memcmp(feature_start, "thumb", feature_length) == 0) { + processor->features |= CPUINFO_ARM_LINUX_FEATURE_THUMB; + } else if (memcmp(feature_start, "26bit", feature_length) == 0) { + processor->features |= CPUINFO_ARM_LINUX_FEATURE_26BIT; + } else if (memcmp(feature_start, "vfpv3", feature_length) == 0) { + processor->features |= CPUINFO_ARM_LINUX_FEATURE_VFPV3; + } else if (memcmp(feature_start, "vfpv4", feature_length) == 0) { + processor->features |= CPUINFO_ARM_LINUX_FEATURE_VFPV4; + } else if (memcmp(feature_start, "idiva", feature_length) == 0) { + processor->features |= CPUINFO_ARM_LINUX_FEATURE_IDIVA; + } else if (memcmp(feature_start, "idivt", feature_length) == 0) { + processor->features |= CPUINFO_ARM_LINUX_FEATURE_IDIVT; +#endif /* CPUINFO_ARCH_ARM */ + } else { + goto unexpected; + } + break; +#if CPUINFO_ARCH_ARM + case 6: + if (memcmp(feature_start, "iwmmxt", feature_length) == 0) { + processor->features |= CPUINFO_ARM_LINUX_FEATURE_IWMMXT; + } else if (memcmp(feature_start, "crunch", feature_length) == 0) { + processor->features |= CPUINFO_ARM_LINUX_FEATURE_CRUNCH; + } else if (memcmp(feature_start, "vfpd32", feature_length) == 0) { + processor->features |= CPUINFO_ARM_LINUX_FEATURE_VFPD32; + } else { + goto unexpected; + } + break; +#endif /* CPUINFO_ARCH_ARM */ + case 7: + if (memcmp(feature_start, "evtstrm", feature_length) == 0) { + processor->features |= CPUINFO_ARM_LINUX_FEATURE_EVTSTRM; + } else if (memcmp(feature_start, "atomics", feature_length) == 0) { + #if CPUINFO_ARCH_ARM64 + processor->features |= CPUINFO_ARM_LINUX_FEATURE_ATOMICS; + #endif + } else if (memcmp(feature_start, "asimdhp", feature_length) == 0) { + #if CPUINFO_ARCH_ARM64 + processor->features |= CPUINFO_ARM_LINUX_FEATURE_ASIMDHP; + #endif +#if CPUINFO_ARCH_ARM + } else if (memcmp(feature_start, "thumbee", feature_length) == 0) { + processor->features |= CPUINFO_ARM_LINUX_FEATURE_THUMBEE; +#endif /* CPUINFO_ARCH_ARM */ + } else { + goto unexpected; + } + break; + case 8: + if (memcmp(feature_start, "asimdrdm", feature_length) == 0) { + #if CPUINFO_ARCH_ARM64 + processor->features |= CPUINFO_ARM_LINUX_FEATURE_ASIMDRDM; + #endif +#if CPUINFO_ARCH_ARM + } else if (memcmp(feature_start, "fastmult", feature_length) == 0) { + processor->features |= CPUINFO_ARM_LINUX_FEATURE_FASTMULT; + } else if (memcmp(feature_start, "vfpv3d16", feature_length) == 0) { + processor->features |= CPUINFO_ARM_LINUX_FEATURE_VFPV3D16; +#endif /* CPUINFO_ARCH_ARM */ + } else { + goto unexpected; + } + break; + default: + unexpected: + cpuinfo_log_warning("unexpected /proc/cpuinfo feature \"%.*s\" is ignored", + (int) feature_length, feature_start); + break; + } + feature_start = feature_end; + for (; feature_start != features_end; feature_start++) { + if (*feature_start != ' ') { + break; + } + } + } while (feature_start != feature_end); +} + +static void parse_cpu_architecture( + const char* cpu_architecture_start, + const char* cpu_architecture_end, + struct cpuinfo_arm_linux_processor processor[restrict static 1]) +{ + const size_t cpu_architecture_length = (size_t) (cpu_architecture_end - cpu_architecture_start); + /* Early AArch64 kernels report "CPU architecture: AArch64" instead of a numeric value 8 */ + if (cpu_architecture_length == 7) { + if (memcmp(cpu_architecture_start, "AArch64", cpu_architecture_length) == 0) { + processor->midr = midr_set_architecture(processor->midr, UINT32_C(0xF)); + processor->architecture_version = 8; + processor->flags |= CPUINFO_ARM_LINUX_VALID_ARCHITECTURE | CPUINFO_ARM_LINUX_VALID_PROCESSOR; + return; + } + } + + + uint32_t architecture = 0; + const char* cpu_architecture_ptr = cpu_architecture_start; + for (; cpu_architecture_ptr != cpu_architecture_end; cpu_architecture_ptr++) { + const uint32_t digit = (*cpu_architecture_ptr) - '0'; + + /* Verify that CPU architecture is a decimal number */ + if (digit >= 10) { + break; + } + + architecture = architecture * 10 + digit; + } + + if (cpu_architecture_ptr == cpu_architecture_start) { + cpuinfo_log_warning("CPU architecture %.*s in /proc/cpuinfo is ignored due to non-digit at the beginning of the string", + (int) cpu_architecture_length, cpu_architecture_start); + } else { + if (architecture != 0) { + processor->architecture_version = architecture; + processor->flags |= CPUINFO_ARM_LINUX_VALID_ARCHITECTURE | CPUINFO_ARM_LINUX_VALID_PROCESSOR; + + for (; cpu_architecture_ptr != cpu_architecture_end; cpu_architecture_ptr++) { + const char feature = *cpu_architecture_ptr; + switch (feature) { +#if CPUINFO_ARCH_ARM + case 'T': + processor->architecture_flags |= CPUINFO_ARM_LINUX_ARCH_T; + break; + case 'E': + processor->architecture_flags |= CPUINFO_ARM_LINUX_ARCH_E; + break; + case 'J': + processor->architecture_flags |= CPUINFO_ARM_LINUX_ARCH_J; + break; +#endif /* CPUINFO_ARCH_ARM */ + case ' ': + case '\t': + /* Ignore whitespace at the end */ + break; + default: + cpuinfo_log_warning("skipped unknown architectural feature '%c' for ARMv%"PRIu32, + feature, architecture); + break; + } + } + } else { + cpuinfo_log_warning("CPU architecture %.*s in /proc/cpuinfo is ignored due to invalid value (0)", + (int) cpu_architecture_length, cpu_architecture_start); + } + } + + uint32_t midr_architecture = UINT32_C(0xF); +#if CPUINFO_ARCH_ARM + switch (processor->architecture_version) { + case 6: + midr_architecture = UINT32_C(0x7); /* ARMv6 */ + break; + case 5: + if ((processor->architecture_flags & CPUINFO_ARM_LINUX_ARCH_TEJ) == CPUINFO_ARM_LINUX_ARCH_TEJ) { + midr_architecture = UINT32_C(0x6); /* ARMv5TEJ */ + } else if ((processor->architecture_flags & CPUINFO_ARM_LINUX_ARCH_TE) == CPUINFO_ARM_LINUX_ARCH_TE) { + midr_architecture = UINT32_C(0x5); /* ARMv5TE */ + } else { + midr_architecture = UINT32_C(0x4); /* ARMv5T */ + } + break; + } +#endif + processor->midr = midr_set_architecture(processor->midr, midr_architecture); +} + +static void parse_cpu_part( + const char* cpu_part_start, + const char* cpu_part_end, + struct cpuinfo_arm_linux_processor processor[restrict static 1]) +{ + const size_t cpu_part_length = (size_t) (cpu_part_end - cpu_part_start); + + /* + * CPU part should contain hex prefix (0x) and one to three hex digits. + * I have never seen less than three digits as a value of this field, + * but I don't think it is impossible to see such values in future. + * Value can not contain more than three hex digits since + * Main ID Register (MIDR) assigns only a 12-bit value for CPU part. + */ + if (cpu_part_length < 3 || cpu_part_length > 5) { + cpuinfo_log_warning("CPU part %.*s in /proc/cpuinfo is ignored due to unexpected length (%zu)", + (int) cpu_part_length, cpu_part_start, cpu_part_length); + return; + } + + /* Verify the presence of hex prefix */ + if (cpu_part_start[0] != '0' || cpu_part_start[1] != 'x') { + cpuinfo_log_warning("CPU part %.*s in /proc/cpuinfo is ignored due to lack of 0x prefix", + (int) cpu_part_length, cpu_part_start); + return; + } + + /* Verify that characters after hex prefix are hexadecimal digits and decode them */ + uint32_t cpu_part = 0; + for (const char* digit_ptr = cpu_part_start + 2; digit_ptr != cpu_part_end; digit_ptr++) { + const char digit_char = *digit_ptr; + uint32_t digit; + if (digit_char >= '0' && digit_char <= '9') { + digit = digit_char - '0'; + } else if ((uint32_t) (digit_char - 'A') < 6) { + digit = 10 + (digit_char - 'A'); + } else if ((uint32_t) (digit_char - 'a') < 6) { + digit = 10 + (digit_char - 'a'); + } else { + cpuinfo_log_warning("CPU part %.*s in /proc/cpuinfo is ignored due to unexpected non-hex character %c at offset %zu", + (int) cpu_part_length, cpu_part_start, digit_char, (size_t) (digit_ptr - cpu_part_start)); + return; + } + cpu_part = cpu_part * 16 + digit; + } + + processor->midr = midr_set_part(processor->midr, cpu_part); + processor->flags |= CPUINFO_ARM_LINUX_VALID_PART | CPUINFO_ARM_LINUX_VALID_PROCESSOR; +} + +static void parse_cpu_implementer( + const char* cpu_implementer_start, + const char* cpu_implementer_end, + struct cpuinfo_arm_linux_processor processor[restrict static 1]) +{ + const size_t cpu_implementer_length = cpu_implementer_end - cpu_implementer_start; + + /* + * Value should contain hex prefix (0x) and one or two hex digits. + * I have never seen single hex digit as a value of this field, + * but I don't think it is impossible in future. + * Value can not contain more than two hex digits since + * Main ID Register (MIDR) assigns only an 8-bit value for CPU implementer. + */ + switch (cpu_implementer_length) { + case 3: + case 4: + break; + default: + cpuinfo_log_warning("CPU implementer %.*s in /proc/cpuinfo is ignored due to unexpected length (%zu)", + (int) cpu_implementer_length, cpu_implementer_start, cpu_implementer_length); + return; + } + + /* Verify the presence of hex prefix */ + if (cpu_implementer_start[0] != '0' || cpu_implementer_start[1] != 'x') { + cpuinfo_log_warning("CPU implementer %.*s in /proc/cpuinfo is ignored due to lack of 0x prefix", + (int) cpu_implementer_length, cpu_implementer_start); + return; + } + + /* Verify that characters after hex prefix are hexadecimal digits and decode them */ + uint32_t cpu_implementer = 0; + for (const char* digit_ptr = cpu_implementer_start + 2; digit_ptr != cpu_implementer_end; digit_ptr++) { + const char digit_char = *digit_ptr; + uint32_t digit; + if (digit_char >= '0' && digit_char <= '9') { + digit = digit_char - '0'; + } else if ((uint32_t) (digit_char - 'A') < 6) { + digit = 10 + (digit_char - 'A'); + } else if ((uint32_t) (digit_char - 'a') < 6) { + digit = 10 + (digit_char - 'a'); + } else { + cpuinfo_log_warning("CPU implementer %.*s in /proc/cpuinfo is ignored due to unexpected non-hex character '%c' at offset %zu", + (int) cpu_implementer_length, cpu_implementer_start, digit_char, (size_t) (digit_ptr - cpu_implementer_start)); + return; + } + cpu_implementer = cpu_implementer * 16 + digit; + } + + processor->midr = midr_set_implementer(processor->midr, cpu_implementer); + processor->flags |= CPUINFO_ARM_LINUX_VALID_IMPLEMENTER | CPUINFO_ARM_LINUX_VALID_PROCESSOR; +} + +static void parse_cpu_variant( + const char* cpu_variant_start, + const char* cpu_variant_end, + struct cpuinfo_arm_linux_processor processor[restrict static 1]) +{ + const size_t cpu_variant_length = cpu_variant_end - cpu_variant_start; + + /* + * Value should contain hex prefix (0x) and one hex digit. + * Value can not contain more than one hex digits since + * Main ID Register (MIDR) assigns only a 4-bit value for CPU variant. + */ + if (cpu_variant_length != 3) { + cpuinfo_log_warning("CPU variant %.*s in /proc/cpuinfo is ignored due to unexpected length (%zu)", + (int) cpu_variant_length, cpu_variant_start, cpu_variant_length); + return; + } + + /* Skip if there is no hex prefix (0x) */ + if (cpu_variant_start[0] != '0' || cpu_variant_start[1] != 'x') { + cpuinfo_log_warning("CPU variant %.*s in /proc/cpuinfo is ignored due to lack of 0x prefix", + (int) cpu_variant_length, cpu_variant_start); + return; + } + + /* Check if the value after hex prefix is indeed a hex digit and decode it. */ + const char digit_char = cpu_variant_start[2]; + uint32_t cpu_variant; + if ((uint32_t) (digit_char - '0') < 10) { + cpu_variant = (uint32_t) (digit_char - '0'); + } else if ((uint32_t) (digit_char - 'A') < 6) { + cpu_variant = 10 + (uint32_t) (digit_char - 'A'); + } else if ((uint32_t) (digit_char - 'a') < 6) { + cpu_variant = 10 + (uint32_t) (digit_char - 'a'); + } else { + cpuinfo_log_warning("CPU variant %.*s in /proc/cpuinfo is ignored due to unexpected non-hex character '%c'", + (int) cpu_variant_length, cpu_variant_start, digit_char); + return; + } + + processor->midr = midr_set_variant(processor->midr, cpu_variant); + processor->flags |= CPUINFO_ARM_LINUX_VALID_VARIANT | CPUINFO_ARM_LINUX_VALID_PROCESSOR; +} + +static void parse_cpu_revision( + const char* cpu_revision_start, + const char* cpu_revision_end, + struct cpuinfo_arm_linux_processor processor[restrict static 1]) +{ + uint32_t cpu_revision = 0; + for (const char* digit_ptr = cpu_revision_start; digit_ptr != cpu_revision_end; digit_ptr++) { + const uint32_t digit = (uint32_t) (*digit_ptr - '0'); + + /* Verify that the character in CPU revision is a decimal digit */ + if (digit >= 10) { + cpuinfo_log_warning("CPU revision %.*s in /proc/cpuinfo is ignored due to unexpected non-digit character '%c' at offset %zu", + (int) (cpu_revision_end - cpu_revision_start), cpu_revision_start, + *digit_ptr, (size_t) (digit_ptr - cpu_revision_start)); + return; + } + + cpu_revision = cpu_revision * 10 + digit; + } + + processor->midr = midr_set_revision(processor->midr, cpu_revision); + processor->flags |= CPUINFO_ARM_LINUX_VALID_REVISION | CPUINFO_ARM_LINUX_VALID_PROCESSOR; +} + +#if CPUINFO_ARCH_ARM +/* + * Decode one of the cache-related numbers reported by Linux kernel + * for pre-ARMv7 architecture. + * An example cache-related information in /proc/cpuinfo: + * + * I size : 32768 + * I assoc : 4 + * I line length : 32 + * I sets : 256 + * D size : 16384 + * D assoc : 4 + * D line length : 32 + * D sets : 128 + * + */ +static void parse_cache_number( + const char* number_start, + const char* number_end, + const char* number_name, + uint32_t number_ptr[restrict static 1], + uint32_t flags[restrict static 1], + uint32_t number_mask) +{ + uint32_t number = 0; + for (const char* digit_ptr = number_start; digit_ptr != number_end; digit_ptr++) { + const uint32_t digit = *digit_ptr - '0'; + if (digit >= 10) { + cpuinfo_log_warning("%s %.*s in /proc/cpuinfo is ignored due to unexpected non-digit character '%c' at offset %zu", + number_name, (int) (number_end - number_start), number_start, + *digit_ptr, (size_t) (digit_ptr - number_start)); + return; + } + + number = number * 10 + digit; + } + + if (number == 0) { + cpuinfo_log_warning("%s %.*s in /proc/cpuinfo is ignored due to invalid value of zero reported by the kernel", + number_name, (int) (number_end - number_start), number_start); + } + + /* If the number specifies a cache line size, verify that is a reasonable power of 2 */ + if (number_mask & CPUINFO_ARM_LINUX_VALID_CACHE_LINE) { + switch (number) { + case 16: + case 32: + case 64: + case 128: + break; + default: + cpuinfo_log_warning("invalid %s %.*s is ignored: a value of 16, 32, 64, or 128 expected", + number_name, (int) (number_end - number_start), number_start); + } + } + + *number_ptr = number; + *flags |= number_mask | CPUINFO_ARM_LINUX_VALID_PROCESSOR; +} +#endif /* CPUINFO_ARCH_ARM */ + +struct proc_cpuinfo_parser_state { + char* hardware; + char* revision; + uint32_t processor_index; + uint32_t max_processors_count; + struct cpuinfo_arm_linux_processor* processors; + struct cpuinfo_arm_linux_processor dummy_processor; +}; + +/* + * Decode a single line of /proc/cpuinfo information. + * Lines have format [ ]*:[ ] + * An example of /proc/cpuinfo (from Pandaboard-ES): + * + * Processor : ARMv7 Processor rev 10 (v7l) + * processor : 0 + * BogoMIPS : 1392.74 + * + * processor : 1 + * BogoMIPS : 1363.33 + * + * Features : swp half thumb fastmult vfp edsp thumbee neon vfpv3 + * CPU implementer : 0x41 + * CPU architecture: 7 + * CPU variant : 0x2 + * CPU part : 0xc09 + * CPU revision : 10 + * + * Hardware : OMAP4 Panda board + * Revision : 0020 + * Serial : 0000000000000000 + */ +static bool parse_line( + const char* line_start, + const char* line_end, + struct proc_cpuinfo_parser_state state[restrict static 1], + uint64_t line_number) +{ + /* Empty line. Skip. */ + if (line_start == line_end) { + return true; + } + + /* Search for ':' on the line. */ + const char* separator = line_start; + for (; separator != line_end; separator++) { + if (*separator == ':') { + break; + } + } + /* Skip line if no ':' separator was found. */ + if (separator == line_end) { + cpuinfo_log_info("Line %.*s in /proc/cpuinfo is ignored: key/value separator ':' not found", + (int) (line_end - line_start), line_start); + return true; + } + + /* Skip trailing spaces in key part. */ + const char* key_end = separator; + for (; key_end != line_start; key_end--) { + if (key_end[-1] != ' ' && key_end[-1] != '\t') { + break; + } + } + /* Skip line if key contains nothing but spaces. */ + if (key_end == line_start) { + cpuinfo_log_info("Line %.*s in /proc/cpuinfo is ignored: key contains only spaces", + (int) (line_end - line_start), line_start); + return true; + } + + /* Skip leading spaces in value part. */ + const char* value_start = separator + 1; + for (; value_start != line_end; value_start++) { + if (*value_start != ' ') { + break; + } + } + /* Value part contains nothing but spaces. Skip line. */ + if (value_start == line_end) { + cpuinfo_log_info("Line %.*s in /proc/cpuinfo is ignored: value contains only spaces", + (int) (line_end - line_start), line_start); + return true; + } + + /* Skip trailing spaces in value part (if any) */ + const char* value_end = line_end; + for (; value_end != value_start; value_end--) { + if (value_end[-1] != ' ') { + break; + } + } + + const uint32_t processor_index = state->processor_index; + const uint32_t max_processors_count = state->max_processors_count; + struct cpuinfo_arm_linux_processor* processors = state->processors; + struct cpuinfo_arm_linux_processor* processor = &state->dummy_processor; + if (processor_index < max_processors_count) { + processor = &processors[processor_index]; + } + + const size_t key_length = key_end - line_start; + switch (key_length) { + case 6: + if (memcmp(line_start, "Serial", key_length) == 0) { + /* Usually contains just zeros, useless */ +#if CPUINFO_ARCH_ARM + } else if (memcmp(line_start, "I size", key_length) == 0) { + parse_cache_number(value_start, value_end, + "instruction cache size", &processor->proc_cpuinfo_cache.i_size, + &processor->flags, CPUINFO_ARM_LINUX_VALID_ICACHE_SIZE); + } else if (memcmp(line_start, "I sets", key_length) == 0) { + parse_cache_number(value_start, value_end, + "instruction cache sets", &processor->proc_cpuinfo_cache.i_sets, + &processor->flags, CPUINFO_ARM_LINUX_VALID_ICACHE_SETS); + } else if (memcmp(line_start, "D size", key_length) == 0) { + parse_cache_number(value_start, value_end, + "data cache size", &processor->proc_cpuinfo_cache.d_size, + &processor->flags, CPUINFO_ARM_LINUX_VALID_DCACHE_SIZE); + } else if (memcmp(line_start, "D sets", key_length) == 0) { + parse_cache_number(value_start, value_end, + "data cache sets", &processor->proc_cpuinfo_cache.d_sets, + &processor->flags, CPUINFO_ARM_LINUX_VALID_DCACHE_SETS); +#endif /* CPUINFO_ARCH_ARM */ + } else { + goto unknown; + } + break; +#if CPUINFO_ARCH_ARM + case 7: + if (memcmp(line_start, "I assoc", key_length) == 0) { + parse_cache_number(value_start, value_end, + "instruction cache associativity", &processor->proc_cpuinfo_cache.i_assoc, + &processor->flags, CPUINFO_ARM_LINUX_VALID_ICACHE_WAYS); + } else if (memcmp(line_start, "D assoc", key_length) == 0) { + parse_cache_number(value_start, value_end, + "data cache associativity", &processor->proc_cpuinfo_cache.d_assoc, + &processor->flags, CPUINFO_ARM_LINUX_VALID_DCACHE_WAYS); + } else { + goto unknown; + } + break; +#endif /* CPUINFO_ARCH_ARM */ + case 8: + if (memcmp(line_start, "CPU part", key_length) == 0) { + parse_cpu_part(value_start, value_end, processor); + } else if (memcmp(line_start, "Features", key_length) == 0) { + parse_features(value_start, value_end, processor); + } else if (memcmp(line_start, "BogoMIPS", key_length) == 0) { + /* BogoMIPS is useless, don't parse */ + } else if (memcmp(line_start, "Hardware", key_length) == 0) { + size_t value_length = value_end - value_start; + if (value_length > CPUINFO_HARDWARE_VALUE_MAX) { + cpuinfo_log_info( + "length of Hardware value \"%.*s\" in /proc/cpuinfo exceeds limit (%d): truncating to the limit", + (int) value_length, value_start, CPUINFO_HARDWARE_VALUE_MAX); + value_length = CPUINFO_HARDWARE_VALUE_MAX; + } else { + state->hardware[value_length] = '\0'; + } + memcpy(state->hardware, value_start, value_length); + cpuinfo_log_debug("parsed /proc/cpuinfo Hardware = \"%.*s\"", (int) value_length, value_start); + } else if (memcmp(line_start, "Revision", key_length) == 0) { + size_t value_length = value_end - value_start; + if (value_length > CPUINFO_REVISION_VALUE_MAX) { + cpuinfo_log_info( + "length of Revision value \"%.*s\" in /proc/cpuinfo exceeds limit (%d): truncating to the limit", + (int) value_length, value_start, CPUINFO_REVISION_VALUE_MAX); + value_length = CPUINFO_REVISION_VALUE_MAX; + } else { + state->revision[value_length] = '\0'; + } + memcpy(state->revision, value_start, value_length); + cpuinfo_log_debug("parsed /proc/cpuinfo Revision = \"%.*s\"", (int) value_length, value_start); + } else { + goto unknown; + } + break; + case 9: + if (memcmp(line_start, "processor", key_length) == 0) { + const uint32_t new_processor_index = parse_processor_number(value_start, value_end); + if (new_processor_index < processor_index) { + /* Strange: decreasing processor number */ + cpuinfo_log_warning( + "unexpectedly low processor number %"PRIu32" following processor %"PRIu32" in /proc/cpuinfo", + new_processor_index, processor_index); + } else if (new_processor_index > processor_index + 1) { + /* Strange, but common: skipped processor $(processor_index + 1) */ + cpuinfo_log_info( + "unexpectedly high processor number %"PRIu32" following processor %"PRIu32" in /proc/cpuinfo", + new_processor_index, processor_index); + } + if (new_processor_index < max_processors_count) { + /* Record that the processor was mentioned in /proc/cpuinfo */ + processors[new_processor_index].flags |= CPUINFO_ARM_LINUX_VALID_PROCESSOR; + } else { + /* Log and ignore processor */ + cpuinfo_log_warning("processor %"PRIu32" in /proc/cpuinfo is ignored: index exceeds system limit %"PRIu32, + new_processor_index, max_processors_count - 1); + } + state->processor_index = new_processor_index; + return true; + } else if (memcmp(line_start, "Processor", key_length) == 0) { + /* TODO: parse to fix misreported architecture, similar to Android's cpufeatures */ + } else { + goto unknown; + } + break; + case 11: + if (memcmp(line_start, "CPU variant", key_length) == 0) { + parse_cpu_variant(value_start, value_end, processor); + } else { + goto unknown; + } + break; + case 12: + if (memcmp(line_start, "CPU revision", key_length) == 0) { + parse_cpu_revision(value_start, value_end, processor); + } else { + goto unknown; + } + break; +#if CPUINFO_ARCH_ARM + case 13: + if (memcmp(line_start, "I line length", key_length) == 0) { + parse_cache_number(value_start, value_end, + "instruction cache line size", &processor->proc_cpuinfo_cache.i_line_length, + &processor->flags, CPUINFO_ARM_LINUX_VALID_ICACHE_LINE); + } else if (memcmp(line_start, "D line length", key_length) == 0) { + parse_cache_number(value_start, value_end, + "data cache line size", &processor->proc_cpuinfo_cache.d_line_length, + &processor->flags, CPUINFO_ARM_LINUX_VALID_DCACHE_LINE); + } else { + goto unknown; + } + break; +#endif /* CPUINFO_ARCH_ARM */ + case 15: + if (memcmp(line_start, "CPU implementer", key_length) == 0) { + parse_cpu_implementer(value_start, value_end, processor); + } else if (memcmp(line_start, "CPU implementor", key_length) == 0) { + parse_cpu_implementer(value_start, value_end, processor); + } else { + goto unknown; + } + break; + case 16: + if (memcmp(line_start, "CPU architecture", key_length) == 0) { + parse_cpu_architecture(value_start, value_end, processor); + } else { + goto unknown; + } + break; + default: + unknown: + cpuinfo_log_debug("unknown /proc/cpuinfo key: %.*s", (int) key_length, line_start); + + } + return true; +} + +bool cpuinfo_arm_linux_parse_proc_cpuinfo( + char hardware[restrict static CPUINFO_HARDWARE_VALUE_MAX], + char revision[restrict static CPUINFO_REVISION_VALUE_MAX], + uint32_t max_processors_count, + struct cpuinfo_arm_linux_processor processors[restrict static max_processors_count]) +{ + struct proc_cpuinfo_parser_state state = { + .hardware = hardware, + .revision = revision, + .processor_index = 0, + .max_processors_count = max_processors_count, + .processors = processors, + }; + return cpuinfo_linux_parse_multiline_file("/proc/cpuinfo", BUFFER_SIZE, + (cpuinfo_line_callback) parse_line, &state); +} diff --git a/dep/cpuinfo/src/arm/linux/hwcap.c b/dep/cpuinfo/src/arm/linux/hwcap.c new file mode 100644 index 000000000..35e9994f6 --- /dev/null +++ b/dep/cpuinfo/src/arm/linux/hwcap.c @@ -0,0 +1,159 @@ +#include + +#include +#include +#include +#include +#include +#include +#include + +#if CPUINFO_MOCK + #include +#endif +#include +#include +#include + +#if CPUINFO_ARCH_ARM64 || CPUINFO_ARCH_ARM && !defined(__ANDROID__) + #include +#else + #define AT_HWCAP 16 + #define AT_HWCAP2 26 +#endif + + +#if CPUINFO_MOCK + static uint32_t mock_hwcap = 0; + void cpuinfo_set_hwcap(uint32_t hwcap) { + mock_hwcap = hwcap; + } + + static uint32_t mock_hwcap2 = 0; + void cpuinfo_set_hwcap2(uint32_t hwcap2) { + mock_hwcap2 = hwcap2; + } +#endif + + +#if CPUINFO_ARCH_ARM + typedef unsigned long (*getauxval_function_t)(unsigned long); + + bool cpuinfo_arm_linux_hwcap_from_getauxval( + uint32_t hwcap[restrict static 1], + uint32_t hwcap2[restrict static 1]) + { + #if CPUINFO_MOCK + *hwcap = mock_hwcap; + *hwcap2 = mock_hwcap2; + return true; + #elif defined(__ANDROID__) + /* Android: dynamically check if getauxval is supported */ + void* libc = NULL; + getauxval_function_t getauxval = NULL; + + dlerror(); + libc = dlopen("libc.so", RTLD_LAZY); + if (libc == NULL) { + cpuinfo_log_warning("failed to load libc.so: %s", dlerror()); + goto cleanup; + } + + getauxval = (getauxval_function_t) dlsym(libc, "getauxval"); + if (getauxval == NULL) { + cpuinfo_log_info("failed to locate getauxval in libc.so: %s", dlerror()); + goto cleanup; + } + + *hwcap = getauxval(AT_HWCAP); + *hwcap2 = getauxval(AT_HWCAP2); + + cleanup: + if (libc != NULL) { + dlclose(libc); + libc = NULL; + } + return getauxval != NULL; + #else + /* GNU/Linux: getauxval is always supported */ + *hwcap = getauxval(AT_HWCAP); + *hwcap2 = getauxval(AT_HWCAP2); + return true; + #endif + } + + #ifdef __ANDROID__ + bool cpuinfo_arm_linux_hwcap_from_procfs( + uint32_t hwcap[restrict static 1], + uint32_t hwcap2[restrict static 1]) + { + #if CPUINFO_MOCK + *hwcap = mock_hwcap; + *hwcap2 = mock_hwcap2; + return true; + #else + uint32_t hwcaps[2] = { 0, 0 }; + bool result = false; + int file = -1; + + file = open("/proc/self/auxv", O_RDONLY); + if (file == -1) { + cpuinfo_log_warning("failed to open /proc/self/auxv: %s", strerror(errno)); + goto cleanup; + } + + ssize_t bytes_read; + do { + Elf32_auxv_t elf_auxv; + bytes_read = read(file, &elf_auxv, sizeof(Elf32_auxv_t)); + if (bytes_read < 0) { + cpuinfo_log_warning("failed to read /proc/self/auxv: %s", strerror(errno)); + goto cleanup; + } else if (bytes_read > 0) { + if (bytes_read == sizeof(elf_auxv)) { + switch (elf_auxv.a_type) { + case AT_HWCAP: + hwcaps[0] = (uint32_t) elf_auxv.a_un.a_val; + break; + case AT_HWCAP2: + hwcaps[1] = (uint32_t) elf_auxv.a_un.a_val; + break; + } + } else { + cpuinfo_log_warning( + "failed to read %zu bytes from /proc/self/auxv: %zu bytes available", + sizeof(elf_auxv), (size_t) bytes_read); + goto cleanup; + } + } + } while (bytes_read == sizeof(Elf32_auxv_t)); + + /* Success, commit results */ + *hwcap = hwcaps[0]; + *hwcap2 = hwcaps[1]; + result = true; + + cleanup: + if (file != -1) { + close(file); + file = -1; + } + return result; + #endif + } + #endif /* __ANDROID__ */ +#elif CPUINFO_ARCH_ARM64 + void cpuinfo_arm_linux_hwcap_from_getauxval( + uint32_t hwcap[restrict static 1], + uint32_t hwcap2[restrict static 1]) + { + #if CPUINFO_MOCK + *hwcap = mock_hwcap; + *hwcap2 = mock_hwcap2; + #else + *hwcap = (uint32_t) getauxval(AT_HWCAP); + *hwcap2 = (uint32_t) getauxval(AT_HWCAP2); + return ; + #endif + } +#endif diff --git a/dep/cpuinfo/src/arm/linux/init.c b/dep/cpuinfo/src/arm/linux/init.c new file mode 100644 index 000000000..23d843996 --- /dev/null +++ b/dep/cpuinfo/src/arm/linux/init.c @@ -0,0 +1,765 @@ +#include +#include +#include +#include + +#include +#include +#if defined(__ANDROID__) + #include +#endif +#include +#include +#include +#include +#include + + +struct cpuinfo_arm_isa cpuinfo_isa = { 0 }; + +static struct cpuinfo_package package = { { 0 } }; + +static inline bool bitmask_all(uint32_t bitfield, uint32_t mask) { + return (bitfield & mask) == mask; +} + +static inline uint32_t min(uint32_t a, uint32_t b) { + return a < b ? a : b; +} + +static inline int cmp(uint32_t a, uint32_t b) { + return (a > b) - (a < b); +} + +static bool cluster_siblings_parser( + uint32_t processor, uint32_t siblings_start, uint32_t siblings_end, + struct cpuinfo_arm_linux_processor* processors) +{ + processors[processor].flags |= CPUINFO_LINUX_FLAG_PACKAGE_CLUSTER; + uint32_t package_leader_id = processors[processor].package_leader_id; + + for (uint32_t sibling = siblings_start; sibling < siblings_end; sibling++) { + if (!bitmask_all(processors[sibling].flags, CPUINFO_LINUX_FLAG_VALID)) { + cpuinfo_log_info("invalid processor %"PRIu32" reported as a sibling for processor %"PRIu32, + sibling, processor); + continue; + } + + const uint32_t sibling_package_leader_id = processors[sibling].package_leader_id; + if (sibling_package_leader_id < package_leader_id) { + package_leader_id = sibling_package_leader_id; + } + + processors[sibling].package_leader_id = package_leader_id; + processors[sibling].flags |= CPUINFO_LINUX_FLAG_PACKAGE_CLUSTER; + } + + processors[processor].package_leader_id = package_leader_id; + + return true; +} + +static int cmp_arm_linux_processor(const void* ptr_a, const void* ptr_b) { + const struct cpuinfo_arm_linux_processor* processor_a = (const struct cpuinfo_arm_linux_processor*) ptr_a; + const struct cpuinfo_arm_linux_processor* processor_b = (const struct cpuinfo_arm_linux_processor*) ptr_b; + + /* Move usable processors towards the start of the array */ + const bool usable_a = bitmask_all(processor_a->flags, CPUINFO_LINUX_FLAG_VALID); + const bool usable_b = bitmask_all(processor_b->flags, CPUINFO_LINUX_FLAG_VALID); + if (usable_a != usable_b) { + return (int) usable_b - (int) usable_a; + } + + /* Compare based on core type (e.g. Cortex-A57 < Cortex-A53) */ + const uint32_t midr_a = processor_a->midr; + const uint32_t midr_b = processor_b->midr; + if (midr_a != midr_b) { + const uint32_t score_a = midr_score_core(midr_a); + const uint32_t score_b = midr_score_core(midr_b); + if (score_a != score_b) { + return score_a > score_b ? -1 : 1; + } + } + + /* Compare based on core frequency (e.g. 2.0 GHz < 1.2 GHz) */ + const uint32_t frequency_a = processor_a->max_frequency; + const uint32_t frequency_b = processor_b->max_frequency; + if (frequency_a != frequency_b) { + return frequency_a > frequency_b ? -1 : 1; + } + + /* Compare based on cluster leader id (i.e. cluster 1 < cluster 0) */ + const uint32_t cluster_a = processor_a->package_leader_id; + const uint32_t cluster_b = processor_b->package_leader_id; + if (cluster_a != cluster_b) { + return cluster_a > cluster_b ? -1 : 1; + } + + /* Compare based on system processor id (i.e. processor 0 < processor 1) */ + const uint32_t id_a = processor_a->system_processor_id; + const uint32_t id_b = processor_b->system_processor_id; + return cmp(id_a, id_b); +} + +void cpuinfo_arm_linux_init(void) { + struct cpuinfo_arm_linux_processor* arm_linux_processors = NULL; + struct cpuinfo_processor* processors = NULL; + struct cpuinfo_core* cores = NULL; + struct cpuinfo_cluster* clusters = NULL; + struct cpuinfo_uarch_info* uarchs = NULL; + struct cpuinfo_cache* l1i = NULL; + struct cpuinfo_cache* l1d = NULL; + struct cpuinfo_cache* l2 = NULL; + struct cpuinfo_cache* l3 = NULL; + const struct cpuinfo_processor** linux_cpu_to_processor_map = NULL; + const struct cpuinfo_core** linux_cpu_to_core_map = NULL; + uint32_t* linux_cpu_to_uarch_index_map = NULL; + + const uint32_t max_processors_count = cpuinfo_linux_get_max_processors_count(); + cpuinfo_log_debug("system maximum processors count: %"PRIu32, max_processors_count); + + const uint32_t max_possible_processors_count = 1 + + cpuinfo_linux_get_max_possible_processor(max_processors_count); + cpuinfo_log_debug("maximum possible processors count: %"PRIu32, max_possible_processors_count); + const uint32_t max_present_processors_count = 1 + + cpuinfo_linux_get_max_present_processor(max_processors_count); + cpuinfo_log_debug("maximum present processors count: %"PRIu32, max_present_processors_count); + + uint32_t valid_processor_mask = 0; + uint32_t arm_linux_processors_count = max_processors_count; + if (max_present_processors_count != 0) { + arm_linux_processors_count = min(arm_linux_processors_count, max_present_processors_count); + valid_processor_mask = CPUINFO_LINUX_FLAG_PRESENT; + } + if (max_possible_processors_count != 0) { + arm_linux_processors_count = min(arm_linux_processors_count, max_possible_processors_count); + valid_processor_mask |= CPUINFO_LINUX_FLAG_POSSIBLE; + } + if ((max_present_processors_count | max_possible_processors_count) == 0) { + cpuinfo_log_error("failed to parse both lists of possible and present processors"); + return; + } + + arm_linux_processors = calloc(arm_linux_processors_count, sizeof(struct cpuinfo_arm_linux_processor)); + if (arm_linux_processors == NULL) { + cpuinfo_log_error( + "failed to allocate %zu bytes for descriptions of %"PRIu32" ARM logical processors", + arm_linux_processors_count * sizeof(struct cpuinfo_arm_linux_processor), + arm_linux_processors_count); + return; + } + + if (max_possible_processors_count) { + cpuinfo_linux_detect_possible_processors( + arm_linux_processors_count, &arm_linux_processors->flags, + sizeof(struct cpuinfo_arm_linux_processor), + CPUINFO_LINUX_FLAG_POSSIBLE); + } + + if (max_present_processors_count) { + cpuinfo_linux_detect_present_processors( + arm_linux_processors_count, &arm_linux_processors->flags, + sizeof(struct cpuinfo_arm_linux_processor), + CPUINFO_LINUX_FLAG_PRESENT); + } + +#if defined(__ANDROID__) + struct cpuinfo_android_properties android_properties; + cpuinfo_arm_android_parse_properties(&android_properties); +#else + char proc_cpuinfo_hardware[CPUINFO_HARDWARE_VALUE_MAX]; +#endif + char proc_cpuinfo_revision[CPUINFO_REVISION_VALUE_MAX]; + + if (!cpuinfo_arm_linux_parse_proc_cpuinfo( +#if defined(__ANDROID__) + android_properties.proc_cpuinfo_hardware, +#else + proc_cpuinfo_hardware, +#endif + proc_cpuinfo_revision, + arm_linux_processors_count, + arm_linux_processors)) { + cpuinfo_log_error("failed to parse processor information from /proc/cpuinfo"); + return; + } + + for (uint32_t i = 0; i < arm_linux_processors_count; i++) { + if (bitmask_all(arm_linux_processors[i].flags, valid_processor_mask)) { + arm_linux_processors[i].flags |= CPUINFO_LINUX_FLAG_VALID; + cpuinfo_log_debug("parsed processor %"PRIu32" MIDR 0x%08"PRIx32, + i, arm_linux_processors[i].midr); + } + } + + uint32_t valid_processors = 0, last_midr = 0; + #if CPUINFO_ARCH_ARM + uint32_t last_architecture_version = 0, last_architecture_flags = 0; + #endif + for (uint32_t i = 0; i < arm_linux_processors_count; i++) { + arm_linux_processors[i].system_processor_id = i; + if (bitmask_all(arm_linux_processors[i].flags, CPUINFO_LINUX_FLAG_VALID)) { + valid_processors += 1; + + if (!(arm_linux_processors[i].flags & CPUINFO_ARM_LINUX_VALID_PROCESSOR)) { + /* + * Processor is in possible and present lists, but not reported in /proc/cpuinfo. + * This is fairly common: high-index processors can be not reported if they are offline. + */ + cpuinfo_log_info("processor %"PRIu32" is not listed in /proc/cpuinfo", i); + } + + if (bitmask_all(arm_linux_processors[i].flags, CPUINFO_ARM_LINUX_VALID_MIDR)) { + last_midr = arm_linux_processors[i].midr; + } + #if CPUINFO_ARCH_ARM + if (bitmask_all(arm_linux_processors[i].flags, CPUINFO_ARM_LINUX_VALID_ARCHITECTURE)) { + last_architecture_version = arm_linux_processors[i].architecture_version; + last_architecture_flags = arm_linux_processors[i].architecture_flags; + } + #endif + } else { + /* Processor reported in /proc/cpuinfo, but not in possible and/or present lists: log and ignore */ + if (!(arm_linux_processors[i].flags & CPUINFO_ARM_LINUX_VALID_PROCESSOR)) { + cpuinfo_log_warning("invalid processor %"PRIu32" reported in /proc/cpuinfo", i); + } + } + } + +#if defined(__ANDROID__) + const struct cpuinfo_arm_chipset chipset = + cpuinfo_arm_android_decode_chipset(&android_properties, valid_processors, 0); +#else + const struct cpuinfo_arm_chipset chipset = + cpuinfo_arm_linux_decode_chipset(proc_cpuinfo_hardware, proc_cpuinfo_revision, valid_processors, 0); +#endif + + #if CPUINFO_ARCH_ARM + uint32_t isa_features = 0, isa_features2 = 0; + #ifdef __ANDROID__ + /* + * On Android before API 20, libc.so does not provide getauxval function. + * Thus, we try to dynamically find it, or use two fallback mechanisms: + * 1. dlopen libc.so, and try to find getauxval + * 2. Parse /proc/self/auxv procfs file + * 3. Use features reported in /proc/cpuinfo + */ + if (!cpuinfo_arm_linux_hwcap_from_getauxval(&isa_features, &isa_features2)) { + /* getauxval can't be used, fall back to parsing /proc/self/auxv */ + if (!cpuinfo_arm_linux_hwcap_from_procfs(&isa_features, &isa_features2)) { + /* + * Reading /proc/self/auxv failed, probably due to file permissions. + * Use information from /proc/cpuinfo to detect ISA. + * + * If different processors report different ISA features, take the intersection. + */ + uint32_t processors_with_features = 0; + for (uint32_t i = 0; i < arm_linux_processors_count; i++) { + if (bitmask_all(arm_linux_processors[i].flags, CPUINFO_LINUX_FLAG_VALID | CPUINFO_ARM_LINUX_VALID_FEATURES)) { + if (processors_with_features == 0) { + isa_features = arm_linux_processors[i].features; + isa_features2 = arm_linux_processors[i].features2; + } else { + isa_features &= arm_linux_processors[i].features; + isa_features2 &= arm_linux_processors[i].features2; + } + processors_with_features += 1; + } + } + } + } + #else + /* On GNU/Linux getauxval is always available */ + cpuinfo_arm_linux_hwcap_from_getauxval(&isa_features, &isa_features2); + #endif + cpuinfo_arm_linux_decode_isa_from_proc_cpuinfo( + isa_features, isa_features2, + last_midr, last_architecture_version, last_architecture_flags, + &chipset, &cpuinfo_isa); + #elif CPUINFO_ARCH_ARM64 + uint32_t isa_features = 0, isa_features2 = 0; + /* getauxval is always available on ARM64 Android */ + cpuinfo_arm_linux_hwcap_from_getauxval(&isa_features, &isa_features2); + cpuinfo_arm64_linux_decode_isa_from_proc_cpuinfo( + isa_features, isa_features2, last_midr, &chipset, &cpuinfo_isa); + #endif + + /* Detect min/max frequency and package ID */ + for (uint32_t i = 0; i < arm_linux_processors_count; i++) { + if (bitmask_all(arm_linux_processors[i].flags, CPUINFO_LINUX_FLAG_VALID)) { + const uint32_t max_frequency = cpuinfo_linux_get_processor_max_frequency(i); + if (max_frequency != 0) { + arm_linux_processors[i].max_frequency = max_frequency; + arm_linux_processors[i].flags |= CPUINFO_LINUX_FLAG_MAX_FREQUENCY; + } + + const uint32_t min_frequency = cpuinfo_linux_get_processor_min_frequency(i); + if (min_frequency != 0) { + arm_linux_processors[i].min_frequency = min_frequency; + arm_linux_processors[i].flags |= CPUINFO_LINUX_FLAG_MIN_FREQUENCY; + } + + if (cpuinfo_linux_get_processor_package_id(i, &arm_linux_processors[i].package_id)) { + arm_linux_processors[i].flags |= CPUINFO_LINUX_FLAG_PACKAGE_ID; + } + } + } + + /* Initialize topology group IDs */ + for (uint32_t i = 0; i < arm_linux_processors_count; i++) { + arm_linux_processors[i].package_leader_id = i; + } + + /* Propagate topology group IDs among siblings */ + for (uint32_t i = 0; i < arm_linux_processors_count; i++) { + if (!bitmask_all(arm_linux_processors[i].flags, CPUINFO_LINUX_FLAG_VALID)) { + continue; + } + + if (arm_linux_processors[i].flags & CPUINFO_LINUX_FLAG_PACKAGE_ID) { + cpuinfo_linux_detect_core_siblings( + arm_linux_processors_count, i, + (cpuinfo_siblings_callback) cluster_siblings_parser, + arm_linux_processors); + } + } + + /* Propagate all cluster IDs */ + uint32_t clustered_processors = 0; + for (uint32_t i = 0; i < arm_linux_processors_count; i++) { + if (bitmask_all(arm_linux_processors[i].flags, CPUINFO_LINUX_FLAG_VALID | CPUINFO_LINUX_FLAG_PACKAGE_CLUSTER)) { + clustered_processors += 1; + + const uint32_t package_leader_id = arm_linux_processors[i].package_leader_id; + if (package_leader_id < i) { + arm_linux_processors[i].package_leader_id = arm_linux_processors[package_leader_id].package_leader_id; + } + + cpuinfo_log_debug("processor %"PRIu32" clustered with processor %"PRIu32" as inferred from system siblings lists", + i, arm_linux_processors[i].package_leader_id); + } + } + + if (clustered_processors != valid_processors) { + /* + * Topology information about some or all logical processors may be unavailable, for the following reasons: + * - Linux kernel is too old, or configured without support for topology information in sysfs. + * - Core is offline, and Linux kernel is configured to not report topology for offline cores. + * + * In this case, we assign processors to clusters using two methods: + * - Try heuristic cluster configurations (e.g. 6-core SoC usually has 4+2 big.LITTLE configuration). + * - If heuristic failed, assign processors to core clusters in a sequential scan. + */ + if (!cpuinfo_arm_linux_detect_core_clusters_by_heuristic(valid_processors, arm_linux_processors_count, arm_linux_processors)) { + cpuinfo_arm_linux_detect_core_clusters_by_sequential_scan(arm_linux_processors_count, arm_linux_processors); + } + } + + cpuinfo_arm_linux_count_cluster_processors(arm_linux_processors_count, arm_linux_processors); + + const uint32_t cluster_count = cpuinfo_arm_linux_detect_cluster_midr( + &chipset, + arm_linux_processors_count, valid_processors, arm_linux_processors); + + /* Initialize core vendor, uarch, MIDR, and frequency for every logical processor */ + for (uint32_t i = 0; i < arm_linux_processors_count; i++) { + if (bitmask_all(arm_linux_processors[i].flags, CPUINFO_LINUX_FLAG_VALID)) { + const uint32_t cluster_leader = arm_linux_processors[i].package_leader_id; + if (cluster_leader == i) { + /* Cluster leader: decode core vendor and uarch */ + cpuinfo_arm_decode_vendor_uarch( + arm_linux_processors[cluster_leader].midr, +#if CPUINFO_ARCH_ARM + !!(arm_linux_processors[cluster_leader].features & CPUINFO_ARM_LINUX_FEATURE_VFPV4), +#endif + &arm_linux_processors[cluster_leader].vendor, + &arm_linux_processors[cluster_leader].uarch); + } else { + /* Cluster non-leader: copy vendor, uarch, MIDR, and frequency from cluster leader */ + arm_linux_processors[i].flags |= arm_linux_processors[cluster_leader].flags & + (CPUINFO_ARM_LINUX_VALID_MIDR | CPUINFO_LINUX_FLAG_MAX_FREQUENCY); + arm_linux_processors[i].midr = arm_linux_processors[cluster_leader].midr; + arm_linux_processors[i].vendor = arm_linux_processors[cluster_leader].vendor; + arm_linux_processors[i].uarch = arm_linux_processors[cluster_leader].uarch; + arm_linux_processors[i].max_frequency = arm_linux_processors[cluster_leader].max_frequency; + } + } + } + + for (uint32_t i = 0; i < arm_linux_processors_count; i++) { + if (bitmask_all(arm_linux_processors[i].flags, CPUINFO_LINUX_FLAG_VALID)) { + cpuinfo_log_debug("post-analysis processor %"PRIu32": MIDR %08"PRIx32" frequency %"PRIu32, + i, arm_linux_processors[i].midr, arm_linux_processors[i].max_frequency); + } + } + + qsort(arm_linux_processors, arm_linux_processors_count, + sizeof(struct cpuinfo_arm_linux_processor), cmp_arm_linux_processor); + + for (uint32_t i = 0; i < arm_linux_processors_count; i++) { + if (bitmask_all(arm_linux_processors[i].flags, CPUINFO_LINUX_FLAG_VALID)) { + cpuinfo_log_debug("post-sort processor %"PRIu32": system id %"PRIu32" MIDR %08"PRIx32" frequency %"PRIu32, + i, arm_linux_processors[i].system_processor_id, arm_linux_processors[i].midr, arm_linux_processors[i].max_frequency); + } + } + + uint32_t uarchs_count = 0; + enum cpuinfo_uarch last_uarch; + for (uint32_t i = 0; i < arm_linux_processors_count; i++) { + if (bitmask_all(arm_linux_processors[i].flags, CPUINFO_LINUX_FLAG_VALID)) { + if (uarchs_count == 0 || arm_linux_processors[i].uarch != last_uarch) { + last_uarch = arm_linux_processors[i].uarch; + uarchs_count += 1; + } + arm_linux_processors[i].uarch_index = uarchs_count - 1; + } + } + + /* + * Assumptions: + * - No SMP (i.e. each core supports only one hardware thread). + * - Level 1 instruction and data caches are private to the core clusters. + * - Level 2 and level 3 cache is shared between cores in the same cluster. + */ + cpuinfo_arm_chipset_to_string(&chipset, package.name); + package.processor_count = valid_processors; + package.core_count = valid_processors; + package.cluster_count = cluster_count; + + processors = calloc(valid_processors, sizeof(struct cpuinfo_processor)); + if (processors == NULL) { + cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" logical processors", + valid_processors * sizeof(struct cpuinfo_processor), valid_processors); + goto cleanup; + } + + cores = calloc(valid_processors, sizeof(struct cpuinfo_core)); + if (cores == NULL) { + cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" cores", + valid_processors * sizeof(struct cpuinfo_core), valid_processors); + goto cleanup; + } + + clusters = calloc(cluster_count, sizeof(struct cpuinfo_cluster)); + if (clusters == NULL) { + cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" core clusters", + cluster_count * sizeof(struct cpuinfo_cluster), cluster_count); + goto cleanup; + } + + uarchs = calloc(uarchs_count, sizeof(struct cpuinfo_uarch_info)); + if (uarchs == NULL) { + cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" microarchitectures", + uarchs_count * sizeof(struct cpuinfo_uarch_info), uarchs_count); + goto cleanup; + } + + linux_cpu_to_processor_map = calloc(arm_linux_processors_count, sizeof(struct cpuinfo_processor*)); + if (linux_cpu_to_processor_map == NULL) { + cpuinfo_log_error("failed to allocate %zu bytes for %"PRIu32" logical processor mapping entries", + arm_linux_processors_count * sizeof(struct cpuinfo_processor*), arm_linux_processors_count); + goto cleanup; + } + + linux_cpu_to_core_map = calloc(arm_linux_processors_count, sizeof(struct cpuinfo_core*)); + if (linux_cpu_to_core_map == NULL) { + cpuinfo_log_error("failed to allocate %zu bytes for %"PRIu32" core mapping entries", + arm_linux_processors_count * sizeof(struct cpuinfo_core*), arm_linux_processors_count); + goto cleanup; + } + + if (uarchs_count > 1) { + linux_cpu_to_uarch_index_map = calloc(arm_linux_processors_count, sizeof(uint32_t)); + if (linux_cpu_to_uarch_index_map == NULL) { + cpuinfo_log_error("failed to allocate %zu bytes for %"PRIu32" uarch index mapping entries", + arm_linux_processors_count * sizeof(uint32_t), arm_linux_processors_count); + goto cleanup; + } + } + + l1i = calloc(valid_processors, sizeof(struct cpuinfo_cache)); + if (l1i == NULL) { + cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" L1I caches", + valid_processors * sizeof(struct cpuinfo_cache), valid_processors); + goto cleanup; + } + + l1d = calloc(valid_processors, sizeof(struct cpuinfo_cache)); + if (l1d == NULL) { + cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" L1D caches", + valid_processors * sizeof(struct cpuinfo_cache), valid_processors); + goto cleanup; + } + + uint32_t uarchs_index = 0; + for (uint32_t i = 0; i < arm_linux_processors_count; i++) { + if (bitmask_all(arm_linux_processors[i].flags, CPUINFO_LINUX_FLAG_VALID)) { + if (uarchs_index == 0 || arm_linux_processors[i].uarch != last_uarch) { + last_uarch = arm_linux_processors[i].uarch; + uarchs[uarchs_index] = (struct cpuinfo_uarch_info) { + .uarch = arm_linux_processors[i].uarch, + .midr = arm_linux_processors[i].midr, + }; + uarchs_index += 1; + } + uarchs[uarchs_index - 1].processor_count += 1; + uarchs[uarchs_index - 1].core_count += 1; + } + } + + uint32_t l2_count = 0, l3_count = 0, big_l3_size = 0, cluster_id = UINT32_MAX; + /* Indication whether L3 (if it exists) is shared between all cores */ + bool shared_l3 = true; + /* Populate cache infromation structures in l1i, l1d */ + for (uint32_t i = 0; i < valid_processors; i++) { + if (arm_linux_processors[i].package_leader_id == arm_linux_processors[i].system_processor_id) { + cluster_id += 1; + clusters[cluster_id] = (struct cpuinfo_cluster) { + .processor_start = i, + .processor_count = arm_linux_processors[i].package_processor_count, + .core_start = i, + .core_count = arm_linux_processors[i].package_processor_count, + .cluster_id = cluster_id, + .package = &package, + .vendor = arm_linux_processors[i].vendor, + .uarch = arm_linux_processors[i].uarch, + .midr = arm_linux_processors[i].midr, + }; + } + + processors[i].smt_id = 0; + processors[i].core = cores + i; + processors[i].cluster = clusters + cluster_id; + processors[i].package = &package; + processors[i].linux_id = (int) arm_linux_processors[i].system_processor_id; + processors[i].cache.l1i = l1i + i; + processors[i].cache.l1d = l1d + i; + linux_cpu_to_processor_map[arm_linux_processors[i].system_processor_id] = &processors[i]; + + cores[i].processor_start = i; + cores[i].processor_count = 1; + cores[i].core_id = i; + cores[i].cluster = clusters + cluster_id; + cores[i].package = &package; + cores[i].vendor = arm_linux_processors[i].vendor; + cores[i].uarch = arm_linux_processors[i].uarch; + cores[i].midr = arm_linux_processors[i].midr; + linux_cpu_to_core_map[arm_linux_processors[i].system_processor_id] = &cores[i]; + + if (linux_cpu_to_uarch_index_map != NULL) { + linux_cpu_to_uarch_index_map[arm_linux_processors[i].system_processor_id] = + arm_linux_processors[i].uarch_index; + } + + struct cpuinfo_cache temp_l2 = { 0 }, temp_l3 = { 0 }; + cpuinfo_arm_decode_cache( + arm_linux_processors[i].uarch, + arm_linux_processors[i].package_processor_count, + arm_linux_processors[i].midr, + &chipset, + cluster_id, + arm_linux_processors[i].architecture_version, + &l1i[i], &l1d[i], &temp_l2, &temp_l3); + l1i[i].processor_start = l1d[i].processor_start = i; + l1i[i].processor_count = l1d[i].processor_count = 1; + #if CPUINFO_ARCH_ARM + /* L1I reported in /proc/cpuinfo overrides defaults */ + if (bitmask_all(arm_linux_processors[i].flags, CPUINFO_ARM_LINUX_VALID_ICACHE)) { + l1i[i] = (struct cpuinfo_cache) { + .size = arm_linux_processors[i].proc_cpuinfo_cache.i_size, + .associativity = arm_linux_processors[i].proc_cpuinfo_cache.i_assoc, + .sets = arm_linux_processors[i].proc_cpuinfo_cache.i_sets, + .partitions = 1, + .line_size = arm_linux_processors[i].proc_cpuinfo_cache.i_line_length + }; + } + /* L1D reported in /proc/cpuinfo overrides defaults */ + if (bitmask_all(arm_linux_processors[i].flags, CPUINFO_ARM_LINUX_VALID_DCACHE)) { + l1d[i] = (struct cpuinfo_cache) { + .size = arm_linux_processors[i].proc_cpuinfo_cache.d_size, + .associativity = arm_linux_processors[i].proc_cpuinfo_cache.d_assoc, + .sets = arm_linux_processors[i].proc_cpuinfo_cache.d_sets, + .partitions = 1, + .line_size = arm_linux_processors[i].proc_cpuinfo_cache.d_line_length + }; + } + #endif + + if (temp_l3.size != 0) { + /* + * Assumptions: + * - L2 is private to each core + * - L3 is shared by cores in the same cluster + * - If cores in different clusters report the same L3, it is shared between all cores. + */ + l2_count += 1; + if (arm_linux_processors[i].package_leader_id == arm_linux_processors[i].system_processor_id) { + if (cluster_id == 0) { + big_l3_size = temp_l3.size; + l3_count = 1; + } else if (temp_l3.size != big_l3_size) { + /* If some cores have different L3 size, L3 is not shared between all cores */ + shared_l3 = false; + l3_count += 1; + } + } + } else { + /* If some cores don't have L3 cache, L3 is not shared between all cores */ + shared_l3 = false; + if (temp_l2.size != 0) { + /* Assume L2 is shared by cores in the same cluster */ + if (arm_linux_processors[i].package_leader_id == arm_linux_processors[i].system_processor_id) { + l2_count += 1; + } + } + } + } + + if (l2_count != 0) { + l2 = calloc(l2_count, sizeof(struct cpuinfo_cache)); + if (l2 == NULL) { + cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" L2 caches", + l2_count * sizeof(struct cpuinfo_cache), l2_count); + goto cleanup; + } + + if (l3_count != 0) { + l3 = calloc(l3_count, sizeof(struct cpuinfo_cache)); + if (l3 == NULL) { + cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" L3 caches", + l3_count * sizeof(struct cpuinfo_cache), l3_count); + goto cleanup; + } + } + } + + cluster_id = UINT32_MAX; + uint32_t l2_index = UINT32_MAX, l3_index = UINT32_MAX; + for (uint32_t i = 0; i < valid_processors; i++) { + if (arm_linux_processors[i].package_leader_id == arm_linux_processors[i].system_processor_id) { + cluster_id++; + } + + struct cpuinfo_cache dummy_l1i, dummy_l1d, temp_l2 = { 0 }, temp_l3 = { 0 }; + cpuinfo_arm_decode_cache( + arm_linux_processors[i].uarch, + arm_linux_processors[i].package_processor_count, + arm_linux_processors[i].midr, + &chipset, + cluster_id, + arm_linux_processors[i].architecture_version, + &dummy_l1i, &dummy_l1d, &temp_l2, &temp_l3); + + if (temp_l3.size != 0) { + /* + * Assumptions: + * - L2 is private to each core + * - L3 is shared by cores in the same cluster + * - If cores in different clusters report the same L3, it is shared between all cores. + */ + l2_index += 1; + l2[l2_index] = (struct cpuinfo_cache) { + .size = temp_l2.size, + .associativity = temp_l2.associativity, + .sets = temp_l2.sets, + .partitions = 1, + .line_size = temp_l2.line_size, + .flags = temp_l2.flags, + .processor_start = i, + .processor_count = 1, + }; + processors[i].cache.l2 = l2 + l2_index; + if (arm_linux_processors[i].package_leader_id == arm_linux_processors[i].system_processor_id) { + l3_index += 1; + if (l3_index < l3_count) { + l3[l3_index] = (struct cpuinfo_cache) { + .size = temp_l3.size, + .associativity = temp_l3.associativity, + .sets = temp_l3.sets, + .partitions = 1, + .line_size = temp_l3.line_size, + .flags = temp_l3.flags, + .processor_start = i, + .processor_count = + shared_l3 ? valid_processors : arm_linux_processors[i].package_processor_count, + }; + } + } + if (shared_l3) { + processors[i].cache.l3 = l3; + } else if (l3_index < l3_count) { + processors[i].cache.l3 = l3 + l3_index; + } + } else if (temp_l2.size != 0) { + /* Assume L2 is shared by cores in the same cluster */ + if (arm_linux_processors[i].package_leader_id == arm_linux_processors[i].system_processor_id) { + l2_index += 1; + l2[l2_index] = (struct cpuinfo_cache) { + .size = temp_l2.size, + .associativity = temp_l2.associativity, + .sets = temp_l2.sets, + .partitions = 1, + .line_size = temp_l2.line_size, + .flags = temp_l2.flags, + .processor_start = i, + .processor_count = arm_linux_processors[i].package_processor_count, + }; + } + processors[i].cache.l2 = l2 + l2_index; + } + } + + /* Commit */ + cpuinfo_processors = processors; + cpuinfo_cores = cores; + cpuinfo_clusters = clusters; + cpuinfo_packages = &package; + cpuinfo_uarchs = uarchs; + cpuinfo_cache[cpuinfo_cache_level_1i] = l1i; + cpuinfo_cache[cpuinfo_cache_level_1d] = l1d; + cpuinfo_cache[cpuinfo_cache_level_2] = l2; + cpuinfo_cache[cpuinfo_cache_level_3] = l3; + + cpuinfo_processors_count = valid_processors; + cpuinfo_cores_count = valid_processors; + cpuinfo_clusters_count = cluster_count; + cpuinfo_packages_count = 1; + cpuinfo_uarchs_count = uarchs_count; + cpuinfo_cache_count[cpuinfo_cache_level_1i] = valid_processors; + cpuinfo_cache_count[cpuinfo_cache_level_1d] = valid_processors; + cpuinfo_cache_count[cpuinfo_cache_level_2] = l2_count; + cpuinfo_cache_count[cpuinfo_cache_level_3] = l3_count; + cpuinfo_max_cache_size = cpuinfo_arm_compute_max_cache_size(&processors[0]); + + cpuinfo_linux_cpu_max = arm_linux_processors_count; + cpuinfo_linux_cpu_to_processor_map = linux_cpu_to_processor_map; + cpuinfo_linux_cpu_to_core_map = linux_cpu_to_core_map; + cpuinfo_linux_cpu_to_uarch_index_map = linux_cpu_to_uarch_index_map; + + __sync_synchronize(); + + cpuinfo_is_initialized = true; + + processors = NULL; + cores = NULL; + clusters = NULL; + uarchs = NULL; + l1i = l1d = l2 = l3 = NULL; + linux_cpu_to_processor_map = NULL; + linux_cpu_to_core_map = NULL; + linux_cpu_to_uarch_index_map = NULL; + +cleanup: + free(arm_linux_processors); + free(processors); + free(cores); + free(clusters); + free(uarchs); + free(l1i); + free(l1d); + free(l2); + free(l3); + free(linux_cpu_to_processor_map); + free(linux_cpu_to_core_map); + free(linux_cpu_to_uarch_index_map); +} diff --git a/dep/cpuinfo/src/arm/linux/midr.c b/dep/cpuinfo/src/arm/linux/midr.c new file mode 100644 index 000000000..2c3116b62 --- /dev/null +++ b/dep/cpuinfo/src/arm/linux/midr.c @@ -0,0 +1,863 @@ +#include +#include +#include +#include + +#include +#include +#if defined(__ANDROID__) + #include +#endif +#include +#include +#include +#include +#include +#include + + +#define CLUSTERS_MAX 3 + +static inline bool bitmask_all(uint32_t bitfield, uint32_t mask) { + return (bitfield & mask) == mask; +} + +/* Description of core clusters configuration in a chipset (identified by series and model number) */ +struct cluster_config { + /* Number of cores (logical processors) */ + uint8_t cores; + /* ARM chipset series (see cpuinfo_arm_chipset_series enum) */ + uint8_t series; + /* Chipset model number (see cpuinfo_arm_chipset struct) */ + uint16_t model; + /* Number of heterogenous clusters in the CPU package */ + uint8_t clusters; + /* + * Number of cores in each cluster: + # - Symmetric configurations: [0] = # cores + * - big.LITTLE configurations: [0] = # LITTLE cores, [1] = # big cores + * - Max.Med.Min configurations: [0] = # Min cores, [1] = # Med cores, [2] = # Max cores + */ + uint8_t cluster_cores[CLUSTERS_MAX]; + /* + * MIDR of cores in each cluster: + * - Symmetric configurations: [0] = core MIDR + * - big.LITTLE configurations: [0] = LITTLE core MIDR, [1] = big core MIDR + * - Max.Med.Min configurations: [0] = Min core MIDR, [1] = Med core MIDR, [2] = Max core MIDR + */ + uint32_t cluster_midr[CLUSTERS_MAX]; +}; + +/* + * The list of chipsets where MIDR may not be unambigiously decoded at least on some devices. + * The typical reasons for impossibility to decoded MIDRs are buggy kernels, which either do not report all MIDR + * information (e.g. on ATM7029 kernel doesn't report CPU Part), or chipsets have more than one type of cores + * (i.e. 4x Cortex-A53 + 4x Cortex-A53 is out) and buggy kernels report MIDR information only about some cores + * in /proc/cpuinfo (either only online cores, or only the core that reads /proc/cpuinfo). On these kernels/chipsets, + * it is not possible to detect all core types by just parsing /proc/cpuinfo, so we use chipset name and this table to + * find their MIDR (and thus microarchitecture, cache, etc). + * + * Note: not all chipsets with heterogeneous multiprocessing need an entry in this table. The following HMP + * chipsets always list information about all cores in /proc/cpuinfo: + * + * - Snapdragon 660 + * - Snapdragon 820 (MSM8996) + * - Snapdragon 821 (MSM8996PRO) + * - Snapdragon 835 (MSM8998) + * - Exynos 8895 + * - Kirin 960 + * + * As these are all new processors, there is hope that this table won't uncontrollably grow over time. + */ +static const struct cluster_config cluster_configs[] = { +#if CPUINFO_ARCH_ARM + { + /* + * MSM8916 (Snapdragon 410): 4x Cortex-A53 + * Some AArch32 phones use non-standard /proc/cpuinfo format. + */ + .cores = 4, + .series = cpuinfo_arm_chipset_series_qualcomm_msm, + .model = UINT16_C(8916), + .clusters = 1, + .cluster_cores = { + [0] = 4, + }, + .cluster_midr = { + [0] = UINT32_C(0x410FD030), + }, + }, + { + /* + * MSM8939 (Snapdragon 615): 4x Cortex-A53 + 4x Cortex-A53 + * Some AArch32 phones use non-standard /proc/cpuinfo format. + */ + .cores = 8, + .series = cpuinfo_arm_chipset_series_qualcomm_msm, + .model = UINT16_C(8939), + .clusters = 2, + .cluster_cores = { + [0] = 4, + [1] = 4, + }, + .cluster_midr = { + [0] = UINT32_C(0x410FD034), + [1] = UINT32_C(0x410FD034), + }, + }, +#endif + { + /* MSM8956 (Snapdragon 650): 2x Cortex-A72 + 4x Cortex-A53 */ + .cores = 6, + .series = cpuinfo_arm_chipset_series_qualcomm_msm, + .model = UINT16_C(8956), + .clusters = 2, + .cluster_cores = { + [0] = 4, + [1] = 2, + }, + .cluster_midr = { + [0] = UINT32_C(0x410FD034), + [1] = UINT32_C(0x410FD080), + }, + }, + { + /* MSM8976/MSM8976PRO (Snapdragon 652/653): 4x Cortex-A72 + 4x Cortex-A53 */ + .cores = 8, + .series = cpuinfo_arm_chipset_series_qualcomm_msm, + .model = UINT16_C(8976), + .clusters = 2, + .cluster_cores = { + [0] = 4, + [1] = 4, + }, + .cluster_midr = { + [0] = UINT32_C(0x410FD034), + [1] = UINT32_C(0x410FD080), + }, + }, + { + /* MSM8992 (Snapdragon 808): 2x Cortex-A57 + 4x Cortex-A53 */ + .cores = 6, + .series = cpuinfo_arm_chipset_series_qualcomm_msm, + .model = UINT16_C(8992), + .clusters = 2, + .cluster_cores = { + [0] = 4, + [1] = 2, + }, + .cluster_midr = { + [0] = UINT32_C(0x410FD033), + [1] = UINT32_C(0x411FD072), + }, + }, + { + /* MSM8994/MSM8994V (Snapdragon 810): 4x Cortex-A57 + 4x Cortex-A53 */ + .cores = 8, + .series = cpuinfo_arm_chipset_series_qualcomm_msm, + .model = UINT16_C(8994), + .clusters = 2, + .cluster_cores = { + [0] = 4, + [1] = 4, + }, + .cluster_midr = { + [0] = UINT32_C(0x410FD032), + [1] = UINT32_C(0x411FD071), + }, + }, +#if CPUINFO_ARCH_ARM + { + /* Exynos 5422: 4x Cortex-A15 + 4x Cortex-A7 */ + .cores = 8, + .series = cpuinfo_arm_chipset_series_samsung_exynos, + .model = UINT16_C(5422), + .clusters = 2, + .cluster_cores = { + [0] = 4, + [1] = 4, + }, + .cluster_midr = { + [0] = UINT32_C(0x410FC073), + [1] = UINT32_C(0x412FC0F3), + }, + }, + { + /* Exynos 5430: 4x Cortex-A15 + 4x Cortex-A7 */ + .cores = 8, + .series = cpuinfo_arm_chipset_series_samsung_exynos, + .model = UINT16_C(5430), + .clusters = 2, + .cluster_cores = { + [0] = 4, + [1] = 4, + }, + .cluster_midr = { + [0] = UINT32_C(0x410FC074), + [1] = UINT32_C(0x413FC0F3), + }, + }, +#endif /* CPUINFO_ARCH_ARM */ + { + /* Exynos 5433: 4x Cortex-A57 + 4x Cortex-A53 */ + .cores = 8, + .series = cpuinfo_arm_chipset_series_samsung_exynos, + .model = UINT16_C(5433), + .clusters = 2, + .cluster_cores = { + [0] = 4, + [1] = 4, + }, + .cluster_midr = { + [0] = UINT32_C(0x410FD031), + [1] = UINT32_C(0x411FD070), + }, + }, + { + /* Exynos 7420: 4x Cortex-A57 + 4x Cortex-A53 */ + .cores = 8, + .series = cpuinfo_arm_chipset_series_samsung_exynos, + .model = UINT16_C(7420), + .clusters = 2, + .cluster_cores = { + [0] = 4, + [1] = 4, + }, + .cluster_midr = { + [0] = UINT32_C(0x410FD032), + [1] = UINT32_C(0x411FD070), + }, + }, + { + /* Exynos 8890: 4x Exynos M1 + 4x Cortex-A53 */ + .cores = 8, + .series = cpuinfo_arm_chipset_series_samsung_exynos, + .model = UINT16_C(8890), + .clusters = 2, + .cluster_cores = { + [0] = 4, + [1] = 4, + }, + .cluster_midr = { + [0] = UINT32_C(0x410FD034), + [1] = UINT32_C(0x531F0011), + }, + }, +#if CPUINFO_ARCH_ARM + { + /* Kirin 920: 4x Cortex-A15 + 4x Cortex-A7 */ + .cores = 8, + .series = cpuinfo_arm_chipset_series_hisilicon_kirin, + .model = UINT16_C(920), + .clusters = 2, + .cluster_cores = { + [0] = 4, + [1] = 4, + }, + .cluster_midr = { + [0] = UINT32_C(0x410FC075), + [1] = UINT32_C(0x413FC0F3), + }, + }, + { + /* Kirin 925: 4x Cortex-A15 + 4x Cortex-A7 */ + .cores = 8, + .series = cpuinfo_arm_chipset_series_hisilicon_kirin, + .model = UINT16_C(925), + .clusters = 2, + .cluster_cores = { + [0] = 4, + [1] = 4, + }, + .cluster_midr = { + [0] = UINT32_C(0x410FC075), + [1] = UINT32_C(0x413FC0F3), + }, + }, + { + /* Kirin 928: 4x Cortex-A15 + 4x Cortex-A7 */ + .cores = 8, + .series = cpuinfo_arm_chipset_series_hisilicon_kirin, + .model = UINT16_C(928), + .clusters = 2, + .cluster_cores = { + [0] = 4, + [1] = 4, + }, + .cluster_midr = { + [0] = UINT32_C(0x410FC075), + [1] = UINT32_C(0x413FC0F3), + }, + }, +#endif /* CPUINFO_ARCH_ARM */ + { + /* Kirin 950: 4x Cortex-A72 + 4x Cortex-A53 */ + .cores = 8, + .series = cpuinfo_arm_chipset_series_hisilicon_kirin, + .model = UINT16_C(950), + .clusters = 2, + .cluster_cores = { + [0] = 4, + [1] = 4, + }, + .cluster_midr = { + [0] = UINT32_C(0x410FD034), + [1] = UINT32_C(0x410FD080), + }, + }, + { + /* Kirin 955: 4x Cortex-A72 + 4x Cortex-A53 */ + .cores = 8, + .series = cpuinfo_arm_chipset_series_hisilicon_kirin, + .model = UINT16_C(955), + .clusters = 2, + .cluster_cores = { + [0] = 4, + [1] = 4, + }, + .cluster_midr = { + [0] = UINT32_C(0x410FD034), + [1] = UINT32_C(0x410FD080), + }, + }, +#if CPUINFO_ARCH_ARM + { + /* MediaTek MT8135: 2x Cortex-A7 + 2x Cortex-A15 */ + .cores = 4, + .series = cpuinfo_arm_chipset_series_mediatek_mt, + .model = UINT16_C(8135), + .clusters = 2, + .cluster_cores = { + [0] = 2, + [1] = 2, + }, + .cluster_midr = { + [0] = UINT32_C(0x410FC073), + [1] = UINT32_C(0x413FC0F2), + }, + }, +#endif + { + /* MediaTek MT8173: 2x Cortex-A72 + 2x Cortex-A53 */ + .cores = 4, + .series = cpuinfo_arm_chipset_series_mediatek_mt, + .model = UINT16_C(8173), + .clusters = 2, + .cluster_cores = { + [0] = 2, + [1] = 2, + }, + .cluster_midr = { + [0] = UINT32_C(0x410FD032), + [1] = UINT32_C(0x410FD080), + }, + }, + { + /* MediaTek MT8176: 2x Cortex-A72 + 4x Cortex-A53 */ + .cores = 6, + .series = cpuinfo_arm_chipset_series_mediatek_mt, + .model = UINT16_C(8176), + .clusters = 2, + .cluster_cores = { + [0] = 4, + [1] = 2, + }, + .cluster_midr = { + [0] = UINT32_C(0x410FD032), + [1] = UINT32_C(0x410FD080), + }, + }, +#if CPUINFO_ARCH_ARM64 + { + /* + * MediaTek MT8735: 4x Cortex-A53 + * Some AArch64 phones use non-standard /proc/cpuinfo format. + */ + .cores = 4, + .series = cpuinfo_arm_chipset_series_mediatek_mt, + .model = UINT16_C(8735), + .clusters = 1, + .cluster_cores = { + [0] = 4, + }, + .cluster_midr = { + [0] = UINT32_C(0x410FD034), + }, + }, +#endif +#if CPUINFO_ARCH_ARM + { + /* + * MediaTek MT6592: 4x Cortex-A7 + 4x Cortex-A7 + * Some phones use non-standard /proc/cpuinfo format. + */ + .cores = 4, + .series = cpuinfo_arm_chipset_series_mediatek_mt, + .model = UINT16_C(6592), + .clusters = 2, + .cluster_cores = { + [0] = 4, + [1] = 4, + }, + .cluster_midr = { + [0] = UINT32_C(0x410FC074), + [1] = UINT32_C(0x410FC074), + }, + }, + { + /* MediaTek MT6595: 4x Cortex-A17 + 4x Cortex-A7 */ + .cores = 8, + .series = cpuinfo_arm_chipset_series_mediatek_mt, + .model = UINT16_C(6595), + .clusters = 2, + .cluster_cores = { + [0] = 4, + [1] = 4, + }, + .cluster_midr = { + [0] = UINT32_C(0x410FC075), + [1] = UINT32_C(0x410FC0E0), + }, + }, +#endif + { + /* MediaTek MT6797: 2x Cortex-A72 + 4x Cortex-A53 + 4x Cortex-A53 */ + .cores = 10, + .series = cpuinfo_arm_chipset_series_mediatek_mt, + .model = UINT16_C(6797), + .clusters = 3, + .cluster_cores = { + [0] = 4, + [1] = 4, + [2] = 2, + }, + .cluster_midr = { + [0] = UINT32_C(0x410FD034), + [1] = UINT32_C(0x410FD034), + [2] = UINT32_C(0x410FD081), + }, + }, + { + /* MediaTek MT6799: 2x Cortex-A73 + 4x Cortex-A53 + 4x Cortex-A35 */ + .cores = 10, + .series = cpuinfo_arm_chipset_series_mediatek_mt, + .model = UINT16_C(6799), + .clusters = 3, + .cluster_cores = { + [0] = 4, + [1] = 4, + [2] = 2, + }, + .cluster_midr = { + [0] = UINT32_C(0x410FD041), + [1] = UINT32_C(0x410FD034), + [2] = UINT32_C(0x410FD092), + }, + }, + { + /* Rockchip RK3399: 2x Cortex-A72 + 4x Cortex-A53 */ + .cores = 6, + .series = cpuinfo_arm_chipset_series_rockchip_rk, + .model = UINT16_C(3399), + .clusters = 2, + .cluster_cores = { + [0] = 4, + [1] = 2, + }, + .cluster_midr = { + [0] = UINT32_C(0x410FD034), + [1] = UINT32_C(0x410FD082), + }, + }, +#if CPUINFO_ARCH_ARM + { + /* Actions ATM8029: 4x Cortex-A5 + * Most devices use non-standard /proc/cpuinfo format. + */ + .cores = 4, + .series = cpuinfo_arm_chipset_series_actions_atm, + .model = UINT16_C(7029), + .clusters = 1, + .cluster_cores = { + [0] = 4, + }, + .cluster_midr = { + [0] = UINT32_C(0x410FC051), + }, + }, +#endif +}; + +/* + * Searches chipset name in mapping of chipset name to cores' MIDR values. If match is successful, initializes MIDR + * for all clusters' leaders with tabulated values. + * + * @param[in] chipset - chipset (SoC) name information. + * @param clusters_count - number of CPU core clusters detected in the SoC. + * @param cluster_leaders - indices of core clusters' leaders in the @p processors array. + * @param processors_count - number of usable logical processors in the system. + * @param[in,out] processors - array of logical processor descriptions with pre-parsed MIDR, maximum frequency, + * and decoded core cluster (package_leader_id) information. + * Upon successful return, processors[i].midr for all clusters' leaders contains the + * tabulated MIDR values. + * @param verify_midr - indicated whether the function should check that the MIDR values to be assigned to leaders of + * core clusters are consistent with known parts of their parsed values. + * Set if to false if the only MIDR value parsed from /proc/cpuinfo is for the last processor + * reported in /proc/cpuinfo and thus can't be unambiguously attributed to that processor. + * + * @retval true if the chipset was found in the mapping and core clusters' leaders initialized with MIDR values. + * @retval false if the chipset was not found in the mapping, or any consistency check failed. + */ +static bool cpuinfo_arm_linux_detect_cluster_midr_by_chipset( + const struct cpuinfo_arm_chipset chipset[restrict static 1], + uint32_t clusters_count, + const uint32_t cluster_leaders[restrict static CLUSTERS_MAX], + uint32_t processors_count, + struct cpuinfo_arm_linux_processor processors[restrict static processors_count], + bool verify_midr) +{ + if (clusters_count <= CLUSTERS_MAX) { + for (uint32_t c = 0; c < CPUINFO_COUNT_OF(cluster_configs); c++) { + if (cluster_configs[c].model == chipset->model && cluster_configs[c].series == chipset->series) { + /* Verify that the total number of cores and clusters of cores matches expectation */ + if (cluster_configs[c].cores != processors_count || cluster_configs[c].clusters != clusters_count) { + return false; + } + + /* Verify that core cluster configuration matches expectation */ + for (uint32_t cluster = 0; cluster < clusters_count; cluster++) { + const uint32_t cluster_leader = cluster_leaders[cluster]; + if (cluster_configs[c].cluster_cores[cluster] != processors[cluster_leader].package_processor_count) { + return false; + } + } + + if (verify_midr) { + /* Verify known parts of MIDR */ + for (uint32_t cluster = 0; cluster < clusters_count; cluster++) { + const uint32_t cluster_leader = cluster_leaders[cluster]; + + /* Create a mask of known midr bits */ + uint32_t midr_mask = 0; + if (processors[cluster_leader].flags & CPUINFO_ARM_LINUX_VALID_IMPLEMENTER) { + midr_mask |= CPUINFO_ARM_MIDR_IMPLEMENTER_MASK; + } + if (processors[cluster_leader].flags & CPUINFO_ARM_LINUX_VALID_VARIANT) { + midr_mask |= CPUINFO_ARM_MIDR_VARIANT_MASK; + } + if (processors[cluster_leader].flags & CPUINFO_ARM_LINUX_VALID_PART) { + midr_mask |= CPUINFO_ARM_MIDR_PART_MASK; + } + if (processors[cluster_leader].flags & CPUINFO_ARM_LINUX_VALID_REVISION) { + midr_mask |= CPUINFO_ARM_MIDR_REVISION_MASK; + } + + /* Verify the bits under the mask */ + if ((processors[cluster_leader].midr ^ cluster_configs[c].cluster_midr[cluster]) & midr_mask) { + cpuinfo_log_debug("parsed MIDR of cluster %08"PRIu32" does not match tabulated value %08"PRIu32, + processors[cluster_leader].midr, cluster_configs[c].cluster_midr[cluster]); + return false; + } + } + } + + /* Assign MIDRs according to tabulated configurations */ + for (uint32_t cluster = 0; cluster < clusters_count; cluster++) { + const uint32_t cluster_leader = cluster_leaders[cluster]; + processors[cluster_leader].midr = cluster_configs[c].cluster_midr[cluster]; + processors[cluster_leader].flags |= CPUINFO_ARM_LINUX_VALID_MIDR; + cpuinfo_log_debug("cluster %"PRIu32" MIDR = 0x%08"PRIx32, cluster, cluster_configs[c].cluster_midr[cluster]); + } + return true; + } + } + } + return false; +} + +/* + * Initializes MIDR for leaders of core clusters using a heuristic for big.LITTLE systems: + * - If the only known MIDR is for the big core cluster, guess the matching MIDR for the LITTLE cluster. + * - Estimate which of the clusters is big using maximum frequency, if known, otherwise using system processor ID. + * - Initialize the MIDR for big and LITTLE core clusters using the guesstimates values. + * + * @param clusters_count - number of CPU core clusters detected in the SoC. + * @param cluster_with_midr_count - number of CPU core clusters in the SoC with known MIDR values. + * @param last_processor_with_midr - index of the last logical processor with known MIDR in the @p processors array. + * @param cluster_leaders - indices of core clusters' leaders in the @p processors array. + * @param[in,out] processors - array of logical processor descriptions with pre-parsed MIDR, maximum frequency, + * and decoded core cluster (package_leader_id) information. + * Upon successful return, processors[i].midr for all core clusters' leaders contains + * the heuristically detected MIDR value. + * @param verify_midr - indicated whether the function should check that the MIDR values to be assigned to leaders of + * core clusters are consistent with known parts of their parsed values. + * Set if to false if the only MIDR value parsed from /proc/cpuinfo is for the last processor + * reported in /proc/cpuinfo and thus can't be unambiguously attributed to that processor. + * + * @retval true if this is a big.LITTLE system with only one known MIDR and the CPU core clusters' leaders were + * initialized with MIDR values. + * @retval false if this is not a big.LITTLE system. + */ +static bool cpuinfo_arm_linux_detect_cluster_midr_by_big_little_heuristic( + uint32_t clusters_count, + uint32_t cluster_with_midr_count, + uint32_t last_processor_with_midr, + const uint32_t cluster_leaders[restrict static CLUSTERS_MAX], + struct cpuinfo_arm_linux_processor processors[restrict static last_processor_with_midr], + bool verify_midr) +{ + if (clusters_count != 2 || cluster_with_midr_count != 1) { + /* Not a big.LITTLE system, or MIDR is known for both/neither clusters */ + return false; + } + + const uint32_t midr_flags = + (processors[processors[last_processor_with_midr].package_leader_id].flags & CPUINFO_ARM_LINUX_VALID_MIDR); + const uint32_t big_midr = processors[processors[last_processor_with_midr].package_leader_id].midr; + const uint32_t little_midr = midr_little_core_for_big(big_midr); + + /* Default assumption: the first reported cluster is LITTLE cluster (this holds on most Linux kernels) */ + uint32_t little_cluster_leader = cluster_leaders[0]; + const uint32_t other_cluster_leader = cluster_leaders[1]; + /* If maximum frequency is known for both clusters, assume LITTLE cluster is the one with lower frequency */ + if (processors[little_cluster_leader].flags & processors[other_cluster_leader].flags & CPUINFO_LINUX_FLAG_MAX_FREQUENCY) { + if (processors[little_cluster_leader].max_frequency > processors[other_cluster_leader].max_frequency) { + little_cluster_leader = other_cluster_leader; + } + } + + if (verify_midr) { + /* Verify known parts of MIDR */ + for (uint32_t cluster = 0; cluster < clusters_count; cluster++) { + const uint32_t cluster_leader = cluster_leaders[cluster]; + + /* Create a mask of known midr bits */ + uint32_t midr_mask = 0; + if (processors[cluster_leader].flags & CPUINFO_ARM_LINUX_VALID_IMPLEMENTER) { + midr_mask |= CPUINFO_ARM_MIDR_IMPLEMENTER_MASK; + } + if (processors[cluster_leader].flags & CPUINFO_ARM_LINUX_VALID_VARIANT) { + midr_mask |= CPUINFO_ARM_MIDR_VARIANT_MASK; + } + if (processors[cluster_leader].flags & CPUINFO_ARM_LINUX_VALID_PART) { + midr_mask |= CPUINFO_ARM_MIDR_PART_MASK; + } + if (processors[cluster_leader].flags & CPUINFO_ARM_LINUX_VALID_REVISION) { + midr_mask |= CPUINFO_ARM_MIDR_REVISION_MASK; + } + + /* Verify the bits under the mask */ + const uint32_t midr = (cluster_leader == little_cluster_leader) ? little_midr : big_midr; + if ((processors[cluster_leader].midr ^ midr) & midr_mask) { + cpuinfo_log_debug( + "parsed MIDR %08"PRIu32" of cluster leader %"PRIu32" is inconsistent with expected value %08"PRIu32, + processors[cluster_leader].midr, cluster_leader, midr); + return false; + } + } + } + + for (uint32_t c = 0; c < clusters_count; c++) { + /* Skip cluster with already assigned MIDR */ + const uint32_t cluster_leader = cluster_leaders[c]; + if (bitmask_all(processors[cluster_leader].flags, CPUINFO_ARM_LINUX_VALID_MIDR)) { + continue; + } + + const uint32_t midr = (cluster_leader == little_cluster_leader) ? little_midr : big_midr; + cpuinfo_log_info("assume processor %"PRIu32" to have MIDR %08"PRIx32, cluster_leader, midr); + /* To be consistent, we copy the MIDR entirely, rather than by parts */ + processors[cluster_leader].midr = midr; + processors[cluster_leader].flags |= midr_flags; + } + return true; +} + +/* + * Initializes MIDR for leaders of core clusters in a single sequential scan: + * - Clusters preceeding the first reported MIDR value are assumed to have default MIDR value. + * - Clusters following any reported MIDR value to have that MIDR value. + * + * @param default_midr - MIDR value that will be assigned to cluster leaders preceeding any reported MIDR value. + * @param processors_count - number of logical processor descriptions in the @p processors array. + * @param[in,out] processors - array of logical processor descriptions with pre-parsed MIDR, maximum frequency, + * and decoded core cluster (package_leader_id) information. + * Upon successful return, processors[i].midr for all core clusters' leaders contains + * the assigned MIDR value. + */ +static void cpuinfo_arm_linux_detect_cluster_midr_by_sequential_scan( + uint32_t default_midr, + uint32_t processors_count, + struct cpuinfo_arm_linux_processor processors[restrict static processors_count]) +{ + uint32_t midr = default_midr; + for (uint32_t i = 0; i < processors_count; i++) { + if (bitmask_all(processors[i].flags, CPUINFO_LINUX_FLAG_VALID)) { + if (processors[i].package_leader_id == i) { + if (bitmask_all(processors[i].flags, CPUINFO_ARM_LINUX_VALID_MIDR)) { + midr = processors[i].midr; + } else { + cpuinfo_log_info("assume processor %"PRIu32" to have MIDR %08"PRIx32, i, midr); + /* To be consistent, we copy the MIDR entirely, rather than by parts */ + processors[i].midr = midr; + processors[i].flags |= CPUINFO_ARM_LINUX_VALID_MIDR; + } + } + } + } +} + +/* + * Detects MIDR of each CPU core clusters' leader. + * + * @param[in] chipset - chipset (SoC) name information. + * @param max_processors - number of processor descriptions in the @p processors array. + * @param usable_processors - number of processor descriptions in the @p processors array with both POSSIBLE and + * PRESENT flags. + * @param[in,out] processors - array of logical processor descriptions with pre-parsed MIDR, maximum frequency, + * and decoded core cluster (package_leader_id) information. + * Upon return, processors[i].midr for all clusters' leaders contains the MIDR value. + * + * @returns The number of core clusters + */ +uint32_t cpuinfo_arm_linux_detect_cluster_midr( + const struct cpuinfo_arm_chipset chipset[restrict static 1], + uint32_t max_processors, + uint32_t usable_processors, + struct cpuinfo_arm_linux_processor processors[restrict static max_processors]) +{ + uint32_t clusters_count = 0; + uint32_t cluster_leaders[CLUSTERS_MAX]; + uint32_t last_processor_in_cpuinfo = max_processors; + uint32_t last_processor_with_midr = max_processors; + uint32_t processors_with_midr_count = 0; + for (uint32_t i = 0; i < max_processors; i++) { + if (bitmask_all(processors[i].flags, CPUINFO_LINUX_FLAG_VALID)) { + if (processors[i].flags & CPUINFO_ARM_LINUX_VALID_PROCESSOR) { + last_processor_in_cpuinfo = i; + } + if (bitmask_all(processors[i].flags, CPUINFO_ARM_LINUX_VALID_IMPLEMENTER | CPUINFO_ARM_LINUX_VALID_PART)) { + last_processor_with_midr = i; + processors_with_midr_count += 1; + } + const uint32_t group_leader = processors[i].package_leader_id; + if (group_leader == i) { + if (clusters_count < CLUSTERS_MAX) { + cluster_leaders[clusters_count] = i; + } + clusters_count += 1; + } else { + /* Copy known bits of information to cluster leader */ + + if ((processors[i].flags & ~processors[group_leader].flags) & CPUINFO_LINUX_FLAG_MAX_FREQUENCY) { + processors[group_leader].max_frequency = processors[i].max_frequency; + processors[group_leader].flags |= CPUINFO_LINUX_FLAG_MAX_FREQUENCY; + } + if (!bitmask_all(processors[group_leader].flags, CPUINFO_ARM_LINUX_VALID_MIDR) && + bitmask_all(processors[i].flags, CPUINFO_ARM_LINUX_VALID_MIDR)) + { + processors[group_leader].midr = processors[i].midr; + processors[group_leader].flags |= CPUINFO_ARM_LINUX_VALID_MIDR; + } + } + } + } + cpuinfo_log_debug("detected %"PRIu32" core clusters", clusters_count); + + /* + * Two relations between reported /proc/cpuinfo information, and cores is possible: + * - /proc/cpuinfo reports information for all or some of the cores below the corresponding + * "processor : " lines. Information on offline cores may be missing. + * - /proc/cpuinfo reports information only once, after all "processor : " lines. + * The reported information may relate to processor #0 or to the processor which + * executed the system calls to read /proc/cpuinfo. It is also indistinguishable + * from /proc/cpuinfo reporting information only for the last core (e.g. if all other + * cores are offline). + * + * We detect the second case by checking if /proc/cpuinfo contains valid MIDR only for one, + * last reported, processor. Note, that the last reported core may be not the last + * present & possible processor, as /proc/cpuinfo may non-report high-index offline cores. + */ + if (processors_with_midr_count == 1 && last_processor_in_cpuinfo == last_processor_with_midr && clusters_count > 1) { + /* + * There are multiple core clusters, but /proc/cpuinfo reported MIDR only for one + * processor, and we don't even know which logical processor this information refers to. + * + * We make three attempts to detect MIDR for all clusters: + * 1. Search tabulated MIDR values for chipsets which have heterogeneous clusters and ship with Linux + * kernels which do not always report all cores in /proc/cpuinfo. If found, use the tabulated values. + * 2. For systems with 2 clusters and MIDR known for one cluster, assume big.LITTLE configuration, + * and estimate MIDR for the other cluster under assumption that MIDR for the big cluster is known. + * 3. Initialize MIDRs for all core clusters to the only parsed MIDR value. + */ + cpuinfo_log_debug("the only reported MIDR can not be attributed to a particular processor"); + + if (cpuinfo_arm_linux_detect_cluster_midr_by_chipset( + chipset, clusters_count, cluster_leaders, usable_processors, processors, false)) + { + return clusters_count; + } + + /* Try big.LITTLE heuristic */ + if (cpuinfo_arm_linux_detect_cluster_midr_by_big_little_heuristic( + clusters_count, 1, last_processor_with_midr, + cluster_leaders, processors, false)) + { + return clusters_count; + } + + /* Fall back to sequential initialization of MIDR values for core clusters */ + cpuinfo_arm_linux_detect_cluster_midr_by_sequential_scan( + processors[processors[last_processor_with_midr].package_leader_id].midr, + max_processors, processors); + } else if (processors_with_midr_count < usable_processors) { + /* + * /proc/cpuinfo reported MIDR only for some processors, and probably some core clusters do not have MIDR + * for any of the cores. Check if this is the case. + */ + uint32_t clusters_with_midr_count = 0; + for (uint32_t i = 0; i < max_processors; i++) { + if (bitmask_all(processors[i].flags, CPUINFO_LINUX_FLAG_VALID | CPUINFO_ARM_LINUX_VALID_MIDR)) { + if (processors[i].package_leader_id == i) { + clusters_with_midr_count += 1; + } + } + } + + if (clusters_with_midr_count < clusters_count) { + /* + * /proc/cpuinfo reported MIDR only for some clusters, need to reconstruct others. + * We make three attempts to detect MIDR for clusters without it: + * 1. Search tabulated MIDR values for chipsets which have heterogeneous clusters and ship with Linux + * kernels which do not always report all cores in /proc/cpuinfo. If found, use the tabulated values. + * 2. For systems with 2 clusters and MIDR known for one cluster, assume big.LITTLE configuration, + * and estimate MIDR for the other cluster under assumption that MIDR for the big cluster is known. + * 3. Initialize MIDRs for core clusters in a single sequential scan: + * - Clusters preceeding the first reported MIDR value are assumed to have the last reported MIDR value. + * - Clusters following any reported MIDR value to have that MIDR value. + */ + + if (cpuinfo_arm_linux_detect_cluster_midr_by_chipset( + chipset, clusters_count, cluster_leaders, usable_processors, processors, true)) + { + return clusters_count; + } + + if (last_processor_with_midr != max_processors) { + /* Try big.LITTLE heuristic */ + if (cpuinfo_arm_linux_detect_cluster_midr_by_big_little_heuristic( + clusters_count, processors_with_midr_count, last_processor_with_midr, + cluster_leaders, processors, true)) + { + return clusters_count; + } + + /* Fall back to sequential initialization of MIDR values for core clusters */ + cpuinfo_arm_linux_detect_cluster_midr_by_sequential_scan( + processors[processors[last_processor_with_midr].package_leader_id].midr, + max_processors, processors); + } + } + } + return clusters_count; +} diff --git a/dep/cpuinfo/src/arm/mach/init.c b/dep/cpuinfo/src/arm/mach/init.c new file mode 100644 index 000000000..dbea578c4 --- /dev/null +++ b/dep/cpuinfo/src/arm/mach/init.c @@ -0,0 +1,619 @@ +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include +#include +#include +#include + +/* Polyfill recent CPUFAMILY_ARM_* values for older SDKs */ +#ifndef CPUFAMILY_ARM_MONSOON_MISTRAL + #define CPUFAMILY_ARM_MONSOON_MISTRAL 0xE81E7EF6 +#endif +#ifndef CPUFAMILY_ARM_VORTEX_TEMPEST + #define CPUFAMILY_ARM_VORTEX_TEMPEST 0x07D34B9F +#endif +#ifndef CPUFAMILY_ARM_LIGHTNING_THUNDER + #define CPUFAMILY_ARM_LIGHTNING_THUNDER 0x462504D2 +#endif +#ifndef CPUFAMILY_ARM_FIRESTORM_ICESTORM + #define CPUFAMILY_ARM_FIRESTORM_ICESTORM 0x1B588BB3 +#endif + +struct cpuinfo_arm_isa cpuinfo_isa = { +#if CPUINFO_ARCH_ARM + .thumb = true, + .thumb2 = true, + .thumbee = false, + .jazelle = false, + .armv5e = true, + .armv6 = true, + .armv6k = true, + .armv7 = true, + .vfpv2 = false, + .vfpv3 = true, + .d32 = true, + .wmmx = false, + .wmmx2 = false, + .neon = true, +#endif +#if CPUINFO_ARCH_ARM64 + .aes = true, + .sha1 = true, + .sha2 = true, + .pmull = true, + .crc32 = true, +#endif +}; + +static uint32_t get_sys_info(int type_specifier, const char* name) { + size_t size = 0; + uint32_t result = 0; + int mib[2] = { CTL_HW, type_specifier }; + if (sysctl(mib, 2, NULL, &size, NULL, 0) != 0) { + cpuinfo_log_info("sysctl(\"%s\") failed: %s", name, strerror(errno)); + } else if (size == sizeof(uint32_t)) { + sysctl(mib, 2, &result, &size, NULL, 0); + cpuinfo_log_debug("%s: %"PRIu32 ", size = %lu", name, result, size); + } else { + cpuinfo_log_info("sysctl does not support non-integer lookup for (\"%s\")", name); + } + return result; +} + +static uint32_t get_sys_info_by_name(const char* type_specifier) { + size_t size = 0; + uint32_t result = 0; + if (sysctlbyname(type_specifier, NULL, &size, NULL, 0) != 0) { + cpuinfo_log_info("sysctlbyname(\"%s\") failed: %s", type_specifier, strerror(errno)); + } else if (size == sizeof(uint32_t)) { + sysctlbyname(type_specifier, &result, &size, NULL, 0); + cpuinfo_log_debug("%s: %"PRIu32 ", size = %lu", type_specifier, result, size); + } else { + cpuinfo_log_info("sysctl does not support non-integer lookup for (\"%s\")", type_specifier); + } + return result; +} + +static enum cpuinfo_uarch decode_uarch(uint32_t cpu_family, uint32_t cpu_subtype, uint32_t core_index, uint32_t core_count) { + switch (cpu_family) { + case CPUFAMILY_ARM_SWIFT: + return cpuinfo_uarch_swift; + case CPUFAMILY_ARM_CYCLONE: + return cpuinfo_uarch_cyclone; + case CPUFAMILY_ARM_TYPHOON: + return cpuinfo_uarch_typhoon; + case CPUFAMILY_ARM_TWISTER: + return cpuinfo_uarch_twister; + case CPUFAMILY_ARM_HURRICANE: + return cpuinfo_uarch_hurricane; + case CPUFAMILY_ARM_MONSOON_MISTRAL: + /* 2x Monsoon + 4x Mistral cores */ + return core_index < 2 ? cpuinfo_uarch_monsoon : cpuinfo_uarch_mistral; + case CPUFAMILY_ARM_VORTEX_TEMPEST: + /* Hexa-core: 2x Vortex + 4x Tempest; Octa-core: 4x Cortex + 4x Tempest */ + return core_index + 4 < core_count ? cpuinfo_uarch_vortex : cpuinfo_uarch_tempest; + case CPUFAMILY_ARM_LIGHTNING_THUNDER: + /* Hexa-core: 2x Lightning + 4x Thunder; Octa-core (presumed): 4x Lightning + 4x Thunder */ + return core_index + 4 < core_count ? cpuinfo_uarch_lightning : cpuinfo_uarch_thunder; + case CPUFAMILY_ARM_FIRESTORM_ICESTORM: + /* Hexa-core: 2x Firestorm + 4x Icestorm; Octa-core: 4x Firestorm + 4x Icestorm */ + return core_index + 4 < core_count ? cpuinfo_uarch_firestorm : cpuinfo_uarch_icestorm; + default: + /* Use hw.cpusubtype for detection */ + break; + } + + #if CPUINFO_ARCH_ARM + switch (cpu_subtype) { + case CPU_SUBTYPE_ARM_V7: + return cpuinfo_uarch_cortex_a8; + case CPU_SUBTYPE_ARM_V7F: + return cpuinfo_uarch_cortex_a9; + case CPU_SUBTYPE_ARM_V7K: + return cpuinfo_uarch_cortex_a7; + default: + return cpuinfo_uarch_unknown; + } + #else + return cpuinfo_uarch_unknown; + #endif +} + +static void decode_package_name(char* package_name) { + size_t size; + if (sysctlbyname("hw.machine", NULL, &size, NULL, 0) != 0) { + cpuinfo_log_warning("sysctlbyname(\"hw.machine\") failed: %s", strerror(errno)); + return; + } + + char *machine_name = alloca(size); + if (sysctlbyname("hw.machine", machine_name, &size, NULL, 0) != 0) { + cpuinfo_log_warning("sysctlbyname(\"hw.machine\") failed: %s", strerror(errno)); + return; + } + cpuinfo_log_debug("hw.machine: %s", machine_name); + + char name[10]; + uint32_t major = 0, minor = 0; + if (sscanf(machine_name, "%9[^,0123456789]%"SCNu32",%"SCNu32, name, &major, &minor) != 3) { + cpuinfo_log_warning("parsing \"hw.machine\" failed: %s", strerror(errno)); + return; + } + + uint32_t chip_model = 0; + char suffix = '\0'; + if (strcmp(name, "iPhone") == 0) { + /* + * iPhone 4 and up are supported: + * - iPhone 4 [A4]: iPhone3,1, iPhone3,2, iPhone3,3 + * - iPhone 4S [A5]: iPhone4,1 + * - iPhone 5 [A6]: iPhone5,1, iPhone5,2 + * - iPhone 5c [A6]: iPhone5,3, iPhone5,4 + * - iPhone 5s [A7]: iPhone6,1, iPhone6,2 + * - iPhone 6 [A8]: iPhone7,2 + * - iPhone 6 Plus [A8]: iPhone7,1 + * - iPhone 6s [A9]: iPhone8,1 + * - iPhone 6s Plus [A9]: iPhone8,2 + * - iPhone SE [A9]: iPhone8,4 + * - iPhone 7 [A10]: iPhone9,1, iPhone9,3 + * - iPhone 7 Plus [A10]: iPhone9,2, iPhone9,4 + * - iPhone 8 [A11]: iPhone10,1, iPhone10,4 + * - iPhone 8 Plus [A11]: iPhone10,2, iPhone10,5 + * - iPhone X [A11]: iPhone10,3, iPhone10,6 + * - iPhone XS [A12]: iPhone11,2, + * - iPhone XS Max [A12]: iPhone11,4, iPhone11,6 + * - iPhone XR [A12]: iPhone11,8 + */ + chip_model = major + 1; + } else if (strcmp(name, "iPad") == 0) { + switch (major) { + /* iPad 2 and up are supported */ + case 2: + /* + * iPad 2 [A5]: iPad2,1, iPad2,2, iPad2,3, iPad2,4 + * iPad mini [A5]: iPad2,5, iPad2,6, iPad2,7 + */ + chip_model = major + 3; + break; + case 3: + /* + * iPad 3rd Gen [A5X]: iPad3,1, iPad3,2, iPad3,3 + * iPad 4th Gen [A6X]: iPad3,4, iPad3,5, iPad3,6 + */ + chip_model = (minor <= 3) ? 5 : 6; + suffix = 'X'; + break; + case 4: + /* + * iPad Air [A7]: iPad4,1, iPad4,2, iPad4,3 + * iPad mini Retina [A7]: iPad4,4, iPad4,5, iPad4,6 + * iPad mini 3 [A7]: iPad4,7, iPad4,8, iPad4,9 + */ + chip_model = major + 3; + break; + case 5: + /* + * iPad mini 4 [A8]: iPad5,1, iPad5,2 + * iPad Air 2 [A8X]: iPad5,3, iPad5,4 + */ + chip_model = major + 3; + suffix = (minor <= 2) ? '\0' : 'X'; + break; + case 6: + /* + * iPad Pro 9.7" [A9X]: iPad6,3, iPad6,4 + * iPad Pro [A9X]: iPad6,7, iPad6,8 + * iPad 5th Gen [A9]: iPad6,11, iPad6,12 + */ + chip_model = major + 3; + suffix = minor <= 8 ? 'X' : '\0'; + break; + case 7: + /* + * iPad Pro 12.9" [A10X]: iPad7,1, iPad7,2 + * iPad Pro 10.5" [A10X]: iPad7,3, iPad7,4 + * iPad 6th Gen [A10]: iPad7,5, iPad7,6 + */ + chip_model = major + 3; + suffix = minor <= 4 ? 'X' : '\0'; + break; + default: + cpuinfo_log_info("unknown iPad: %s", machine_name); + break; + } + } else if (strcmp(name, "iPod") == 0) { + switch (major) { + case 5: + chip_model = 5; + break; + /* iPod touch (5th Gen) [A5]: iPod5,1 */ + case 7: + /* iPod touch (6th Gen, 2015) [A8]: iPod7,1 */ + chip_model = 8; + break; + default: + cpuinfo_log_info("unknown iPod: %s", machine_name); + break; + } + } else { + cpuinfo_log_info("unknown device: %s", machine_name); + } + if (chip_model != 0) { + snprintf(package_name, CPUINFO_PACKAGE_NAME_MAX, "Apple A%"PRIu32"%c", chip_model, suffix); + } +} + +void cpuinfo_arm_mach_init(void) { + struct cpuinfo_processor* processors = NULL; + struct cpuinfo_core* cores = NULL; + struct cpuinfo_cluster* clusters = NULL; + struct cpuinfo_package* packages = NULL; + struct cpuinfo_uarch_info* uarchs = NULL; + struct cpuinfo_cache* l1i = NULL; + struct cpuinfo_cache* l1d = NULL; + struct cpuinfo_cache* l2 = NULL; + struct cpuinfo_cache* l3 = NULL; + + struct cpuinfo_mach_topology mach_topology = cpuinfo_mach_detect_topology(); + processors = calloc(mach_topology.threads, sizeof(struct cpuinfo_processor)); + if (processors == NULL) { + cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" logical processors", + mach_topology.threads * sizeof(struct cpuinfo_processor), mach_topology.threads); + goto cleanup; + } + cores = calloc(mach_topology.cores, sizeof(struct cpuinfo_core)); + if (cores == NULL) { + cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" cores", + mach_topology.cores * sizeof(struct cpuinfo_core), mach_topology.cores); + goto cleanup; + } + packages = calloc(mach_topology.packages, sizeof(struct cpuinfo_package)); + if (packages == NULL) { + cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" packages", + mach_topology.packages * sizeof(struct cpuinfo_package), mach_topology.packages); + goto cleanup; + } + + const uint32_t threads_per_core = mach_topology.threads / mach_topology.cores; + const uint32_t threads_per_package = mach_topology.threads / mach_topology.packages; + const uint32_t cores_per_package = mach_topology.cores / mach_topology.packages; + + for (uint32_t i = 0; i < mach_topology.packages; i++) { + packages[i] = (struct cpuinfo_package) { + .processor_start = i * threads_per_package, + .processor_count = threads_per_package, + .core_start = i * cores_per_package, + .core_count = cores_per_package, + }; + decode_package_name(packages[i].name); + } + + + const uint32_t cpu_family = get_sys_info_by_name("hw.cpufamily"); + const uint32_t cpu_type = get_sys_info_by_name("hw.cputype"); + const uint32_t cpu_subtype = get_sys_info_by_name("hw.cpusubtype"); + switch (cpu_type) { + case CPU_TYPE_ARM64: + cpuinfo_isa.aes = true; + cpuinfo_isa.sha1 = true; + cpuinfo_isa.sha2 = true; + cpuinfo_isa.pmull = true; + cpuinfo_isa.crc32 = true; + break; +#if CPUINFO_ARCH_ARM + case CPU_TYPE_ARM: + switch (cpu_subtype) { + case CPU_SUBTYPE_ARM_V8: + cpuinfo_isa.armv8 = true; + cpuinfo_isa.aes = true; + cpuinfo_isa.sha1 = true; + cpuinfo_isa.sha2 = true; + cpuinfo_isa.pmull = true; + cpuinfo_isa.crc32 = true; + /* Fall-through to add ARMv7S features */ + case CPU_SUBTYPE_ARM_V7S: + case CPU_SUBTYPE_ARM_V7K: + cpuinfo_isa.fma = true; + /* Fall-through to add ARMv7F features */ + case CPU_SUBTYPE_ARM_V7F: + cpuinfo_isa.armv7mp = true; + cpuinfo_isa.fp16 = true; + /* Fall-through to add ARMv7 features */ + case CPU_SUBTYPE_ARM_V7: + break; + default: + break; + } + break; +#endif + } + /* + * Support for ARMv8.1 Atomics & FP16 arithmetic instructions is supposed to be detected via + * sysctlbyname calls with "hw.optional.armv8_1_atomics" and "hw.optional.neon_fp16" arguments + * (see https://devstreaming-cdn.apple.com/videos/wwdc/2018/409t8zw7rumablsh/409/409_whats_new_in_llvm.pdf), + * but on new iOS versions these calls just fail with EPERM. + * + * Thus, we whitelist CPUs known to support these instructions. + */ + switch (cpu_family) { + case CPUFAMILY_ARM_MONSOON_MISTRAL: + case CPUFAMILY_ARM_VORTEX_TEMPEST: + case CPUFAMILY_ARM_LIGHTNING_THUNDER: + case CPUFAMILY_ARM_FIRESTORM_ICESTORM: + #if CPUINFO_ARCH_ARM64 + cpuinfo_isa.atomics = true; + #endif + cpuinfo_isa.fp16arith = true; + } + + /* + * There does not yet seem to exist an OS mechanism to detect support for + * ARMv8.2 optional dot-product instructions, so we currently whitelist CPUs + * known to support these instruction. + */ + switch (cpu_family) { + case CPUFAMILY_ARM_LIGHTNING_THUNDER: + case CPUFAMILY_ARM_FIRESTORM_ICESTORM: + cpuinfo_isa.dot = true; + } + + uint32_t num_clusters = 1; + for (uint32_t i = 0; i < mach_topology.cores; i++) { + cores[i] = (struct cpuinfo_core) { + .processor_start = i * threads_per_core, + .processor_count = threads_per_core, + .core_id = i % cores_per_package, + .package = packages + i / cores_per_package, + .vendor = cpuinfo_vendor_apple, + .uarch = decode_uarch(cpu_family, cpu_subtype, i, mach_topology.cores), + }; + if (i != 0 && cores[i].uarch != cores[i - 1].uarch) { + num_clusters++; + } + } + for (uint32_t i = 0; i < mach_topology.threads; i++) { + const uint32_t smt_id = i % threads_per_core; + const uint32_t core_id = i / threads_per_core; + const uint32_t package_id = i / threads_per_package; + + processors[i].smt_id = smt_id; + processors[i].core = &cores[core_id]; + processors[i].package = &packages[package_id]; + } + + clusters = calloc(num_clusters, sizeof(struct cpuinfo_cluster)); + if (clusters == NULL) { + cpuinfo_log_error( + "failed to allocate %zu bytes for descriptions of %"PRIu32" clusters", + num_clusters * sizeof(struct cpuinfo_cluster), num_clusters); + goto cleanup; + } + uarchs = calloc(num_clusters, sizeof(struct cpuinfo_uarch_info)); + if (uarchs == NULL) { + cpuinfo_log_error( + "failed to allocate %zu bytes for descriptions of %"PRIu32" uarchs", + num_clusters * sizeof(enum cpuinfo_uarch), num_clusters); + goto cleanup; + } + uint32_t cluster_idx = UINT32_MAX; + for (uint32_t i = 0; i < mach_topology.cores; i++) { + if (i == 0 || cores[i].uarch != cores[i - 1].uarch) { + cluster_idx++; + uarchs[cluster_idx] = (struct cpuinfo_uarch_info) { + .uarch = cores[i].uarch, + .processor_count = 1, + .core_count = 1, + }; + clusters[cluster_idx] = (struct cpuinfo_cluster) { + .processor_start = i * threads_per_core, + .processor_count = 1, + .core_start = i, + .core_count = 1, + .cluster_id = cluster_idx, + .package = cores[i].package, + .vendor = cores[i].vendor, + .uarch = cores[i].uarch, + }; + } else { + uarchs[cluster_idx].processor_count++; + uarchs[cluster_idx].core_count++; + clusters[cluster_idx].processor_count++; + clusters[cluster_idx].core_count++; + } + cores[i].cluster = &clusters[cluster_idx]; + } + + for (uint32_t i = 0; i < mach_topology.threads; i++) { + const uint32_t core_id = i / threads_per_core; + processors[i].cluster = cores[core_id].cluster; + } + + for (uint32_t i = 0; i < mach_topology.packages; i++) { + packages[i].cluster_start = 0; + packages[i].cluster_count = num_clusters; + } + + const uint32_t cacheline_size = get_sys_info(HW_CACHELINE, "HW_CACHELINE"); + const uint32_t l1d_cache_size = get_sys_info(HW_L1DCACHESIZE, "HW_L1DCACHESIZE"); + const uint32_t l1i_cache_size = get_sys_info(HW_L1ICACHESIZE, "HW_L1ICACHESIZE"); + const uint32_t l2_cache_size = get_sys_info(HW_L2CACHESIZE, "HW_L2CACHESIZE"); + const uint32_t l3_cache_size = get_sys_info(HW_L3CACHESIZE, "HW_L3CACHESIZE"); + const uint32_t l1_cache_associativity = 4; + const uint32_t l2_cache_associativity = 8; + const uint32_t l3_cache_associativity = 16; + const uint32_t cache_partitions = 1; + const uint32_t cache_flags = 0; + + uint32_t threads_per_l1 = 0, l1_count = 0; + if (l1i_cache_size != 0 || l1d_cache_size != 0) { + /* Assume L1 caches are private to each core */ + threads_per_l1 = 1; + l1_count = mach_topology.threads / threads_per_l1; + cpuinfo_log_debug("detected %"PRIu32" L1 caches", l1_count); + } + + uint32_t threads_per_l2 = 0, l2_count = 0; + if (l2_cache_size != 0) { + /* Assume L2 cache is shared between all cores */ + threads_per_l2 = mach_topology.cores; + l2_count = 1; + cpuinfo_log_debug("detected %"PRIu32" L2 caches", l2_count); + } + + uint32_t threads_per_l3 = 0, l3_count = 0; + if (l3_cache_size != 0) { + /* Assume L3 cache is shared between all cores */ + threads_per_l3 = mach_topology.cores; + l3_count = 1; + cpuinfo_log_debug("detected %"PRIu32" L3 caches", l3_count); + } + + if (l1i_cache_size != 0) { + l1i = calloc(l1_count, sizeof(struct cpuinfo_cache)); + if (l1i == NULL) { + cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" L1I caches", + l1_count * sizeof(struct cpuinfo_cache), l1_count); + goto cleanup; + } + for (uint32_t c = 0; c < l1_count; c++) { + l1i[c] = (struct cpuinfo_cache) { + .size = l1i_cache_size, + .associativity = l1_cache_associativity, + .sets = l1i_cache_size / (l1_cache_associativity * cacheline_size), + .partitions = cache_partitions, + .line_size = cacheline_size, + .flags = cache_flags, + .processor_start = c * threads_per_l1, + .processor_count = threads_per_l1, + }; + } + for (uint32_t t = 0; t < mach_topology.threads; t++) { + processors[t].cache.l1i = &l1i[t / threads_per_l1]; + } + } + + if (l1d_cache_size != 0) { + l1d = calloc(l1_count, sizeof(struct cpuinfo_cache)); + if (l1d == NULL) { + cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" L1D caches", + l1_count * sizeof(struct cpuinfo_cache), l1_count); + goto cleanup; + } + for (uint32_t c = 0; c < l1_count; c++) { + l1d[c] = (struct cpuinfo_cache) { + .size = l1d_cache_size, + .associativity = l1_cache_associativity, + .sets = l1d_cache_size / (l1_cache_associativity * cacheline_size), + .partitions = cache_partitions, + .line_size = cacheline_size, + .flags = cache_flags, + .processor_start = c * threads_per_l1, + .processor_count = threads_per_l1, + }; + } + for (uint32_t t = 0; t < mach_topology.threads; t++) { + processors[t].cache.l1d = &l1d[t / threads_per_l1]; + } + } + + if (l2_count != 0) { + l2 = calloc(l2_count, sizeof(struct cpuinfo_cache)); + if (l2 == NULL) { + cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" L2 caches", + l2_count * sizeof(struct cpuinfo_cache), l2_count); + goto cleanup; + } + for (uint32_t c = 0; c < l2_count; c++) { + l2[c] = (struct cpuinfo_cache) { + .size = l2_cache_size, + .associativity = l2_cache_associativity, + .sets = l2_cache_size / (l2_cache_associativity * cacheline_size), + .partitions = cache_partitions, + .line_size = cacheline_size, + .flags = cache_flags, + .processor_start = c * threads_per_l2, + .processor_count = threads_per_l2, + }; + } + for (uint32_t t = 0; t < mach_topology.threads; t++) { + processors[t].cache.l2 = &l2[0]; + } + } + + if (l3_count != 0) { + l3 = calloc(l3_count, sizeof(struct cpuinfo_cache)); + if (l3 == NULL) { + cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" L3 caches", + l3_count * sizeof(struct cpuinfo_cache), l3_count); + goto cleanup; + } + for (uint32_t c = 0; c < l3_count; c++) { + l3[c] = (struct cpuinfo_cache) { + .size = l3_cache_size, + .associativity = l3_cache_associativity, + .sets = l3_cache_size / (l3_cache_associativity * cacheline_size), + .partitions = cache_partitions, + .line_size = cacheline_size, + .flags = cache_flags, + .processor_start = c * threads_per_l3, + .processor_count = threads_per_l3, + }; + } + for (uint32_t t = 0; t < mach_topology.threads; t++) { + processors[t].cache.l3 = &l3[0]; + } + } + + /* Commit changes */ + cpuinfo_processors = processors; + cpuinfo_cores = cores; + cpuinfo_clusters = clusters; + cpuinfo_packages = packages; + cpuinfo_uarchs = uarchs; + cpuinfo_cache[cpuinfo_cache_level_1i] = l1i; + cpuinfo_cache[cpuinfo_cache_level_1d] = l1d; + cpuinfo_cache[cpuinfo_cache_level_2] = l2; + cpuinfo_cache[cpuinfo_cache_level_3] = l3; + + cpuinfo_processors_count = mach_topology.threads; + cpuinfo_cores_count = mach_topology.cores; + cpuinfo_clusters_count = num_clusters; + cpuinfo_packages_count = mach_topology.packages; + cpuinfo_uarchs_count = num_clusters; + cpuinfo_cache_count[cpuinfo_cache_level_1i] = l1_count; + cpuinfo_cache_count[cpuinfo_cache_level_1d] = l1_count; + cpuinfo_cache_count[cpuinfo_cache_level_2] = l2_count; + cpuinfo_cache_count[cpuinfo_cache_level_3] = l3_count; + cpuinfo_max_cache_size = cpuinfo_compute_max_cache_size(&processors[0]); + + __sync_synchronize(); + + cpuinfo_is_initialized = true; + + processors = NULL; + cores = NULL; + clusters = NULL; + packages = NULL; + uarchs = NULL; + l1i = l1d = l2 = l3 = NULL; + +cleanup: + free(processors); + free(cores); + free(clusters); + free(packages); + free(uarchs); + free(l1i); + free(l1d); + free(l2); + free(l3); +} diff --git a/dep/cpuinfo/src/arm/midr.h b/dep/cpuinfo/src/arm/midr.h new file mode 100644 index 000000000..739dc1906 --- /dev/null +++ b/dep/cpuinfo/src/arm/midr.h @@ -0,0 +1,257 @@ +#pragma once +#include + + +#define CPUINFO_ARM_MIDR_IMPLEMENTER_MASK UINT32_C(0xFF000000) +#define CPUINFO_ARM_MIDR_VARIANT_MASK UINT32_C(0x00F00000) +#define CPUINFO_ARM_MIDR_ARCHITECTURE_MASK UINT32_C(0x000F0000) +#define CPUINFO_ARM_MIDR_PART_MASK UINT32_C(0x0000FFF0) +#define CPUINFO_ARM_MIDR_REVISION_MASK UINT32_C(0x0000000F) + +#define CPUINFO_ARM_MIDR_IMPLEMENTER_OFFSET 24 +#define CPUINFO_ARM_MIDR_VARIANT_OFFSET 20 +#define CPUINFO_ARM_MIDR_ARCHITECTURE_OFFSET 16 +#define CPUINFO_ARM_MIDR_PART_OFFSET 4 +#define CPUINFO_ARM_MIDR_REVISION_OFFSET 0 + +#define CPUINFO_ARM_MIDR_ARM1156 UINT32_C(0x410FB560) +#define CPUINFO_ARM_MIDR_CORTEX_A7 UINT32_C(0x410FC070) +#define CPUINFO_ARM_MIDR_CORTEX_A9 UINT32_C(0x410FC090) +#define CPUINFO_ARM_MIDR_CORTEX_A15 UINT32_C(0x410FC0F0) +#define CPUINFO_ARM_MIDR_CORTEX_A17 UINT32_C(0x410FC0E0) +#define CPUINFO_ARM_MIDR_CORTEX_A35 UINT32_C(0x410FD040) +#define CPUINFO_ARM_MIDR_CORTEX_A53 UINT32_C(0x410FD030) +#define CPUINFO_ARM_MIDR_CORTEX_A55 UINT32_C(0x410FD050) +#define CPUINFO_ARM_MIDR_CORTEX_A57 UINT32_C(0x410FD070) +#define CPUINFO_ARM_MIDR_CORTEX_A72 UINT32_C(0x410FD080) +#define CPUINFO_ARM_MIDR_CORTEX_A73 UINT32_C(0x410FD090) +#define CPUINFO_ARM_MIDR_CORTEX_A75 UINT32_C(0x410FD0A0) +#define CPUINFO_ARM_MIDR_KRYO280_GOLD UINT32_C(0x51AF8001) +#define CPUINFO_ARM_MIDR_KRYO280_SILVER UINT32_C(0x51AF8014) +#define CPUINFO_ARM_MIDR_KRYO385_GOLD UINT32_C(0x518F802D) +#define CPUINFO_ARM_MIDR_KRYO385_SILVER UINT32_C(0x518F803C) +#define CPUINFO_ARM_MIDR_KRYO_SILVER_821 UINT32_C(0x510F2010) +#define CPUINFO_ARM_MIDR_KRYO_GOLD UINT32_C(0x510F2050) +#define CPUINFO_ARM_MIDR_KRYO_SILVER_820 UINT32_C(0x510F2110) +#define CPUINFO_ARM_MIDR_EXYNOS_M1_M2 UINT32_C(0x530F0010) +#define CPUINFO_ARM_MIDR_DENVER2 UINT32_C(0x4E0F0030) + +inline static uint32_t midr_set_implementer(uint32_t midr, uint32_t implementer) { + return (midr & ~CPUINFO_ARM_MIDR_IMPLEMENTER_MASK) | + ((implementer << CPUINFO_ARM_MIDR_IMPLEMENTER_OFFSET) & CPUINFO_ARM_MIDR_IMPLEMENTER_MASK); +} + +inline static uint32_t midr_set_variant(uint32_t midr, uint32_t variant) { + return (midr & ~CPUINFO_ARM_MIDR_VARIANT_MASK) | + ((variant << CPUINFO_ARM_MIDR_VARIANT_OFFSET) & CPUINFO_ARM_MIDR_VARIANT_MASK); +} + +inline static uint32_t midr_set_architecture(uint32_t midr, uint32_t architecture) { + return (midr & ~CPUINFO_ARM_MIDR_ARCHITECTURE_MASK) | + ((architecture << CPUINFO_ARM_MIDR_ARCHITECTURE_OFFSET) & CPUINFO_ARM_MIDR_ARCHITECTURE_MASK); +} + +inline static uint32_t midr_set_part(uint32_t midr, uint32_t part) { + return (midr & ~CPUINFO_ARM_MIDR_PART_MASK) | + ((part << CPUINFO_ARM_MIDR_PART_OFFSET) & CPUINFO_ARM_MIDR_PART_MASK); +} + +inline static uint32_t midr_set_revision(uint32_t midr, uint32_t revision) { + return (midr & ~CPUINFO_ARM_MIDR_REVISION_MASK) | + ((revision << CPUINFO_ARM_MIDR_REVISION_OFFSET) & CPUINFO_ARM_MIDR_REVISION_MASK); +} + +inline static uint32_t midr_get_variant(uint32_t midr) { + return (midr & CPUINFO_ARM_MIDR_VARIANT_MASK) >> CPUINFO_ARM_MIDR_VARIANT_OFFSET; +} + +inline static uint32_t midr_get_implementer(uint32_t midr) { + return (midr & CPUINFO_ARM_MIDR_IMPLEMENTER_MASK) >> CPUINFO_ARM_MIDR_IMPLEMENTER_OFFSET; +} + +inline static uint32_t midr_get_part(uint32_t midr) { + return (midr & CPUINFO_ARM_MIDR_PART_MASK) >> CPUINFO_ARM_MIDR_PART_OFFSET; +} + +inline static uint32_t midr_get_revision(uint32_t midr) { + return (midr & CPUINFO_ARM_MIDR_REVISION_MASK) >> CPUINFO_ARM_MIDR_REVISION_OFFSET; +} + +inline static uint32_t midr_copy_implementer(uint32_t midr, uint32_t other_midr) { + return (midr & ~CPUINFO_ARM_MIDR_IMPLEMENTER_MASK) | (other_midr & CPUINFO_ARM_MIDR_IMPLEMENTER_MASK); +} + +inline static uint32_t midr_copy_variant(uint32_t midr, uint32_t other_midr) { + return (midr & ~CPUINFO_ARM_MIDR_VARIANT_MASK) | (other_midr & CPUINFO_ARM_MIDR_VARIANT_MASK); +} + +inline static uint32_t midr_copy_architecture(uint32_t midr, uint32_t other_midr) { + return (midr & ~CPUINFO_ARM_MIDR_ARCHITECTURE_MASK) | (other_midr & CPUINFO_ARM_MIDR_ARCHITECTURE_MASK); +} + +inline static uint32_t midr_copy_part(uint32_t midr, uint32_t other_midr) { + return (midr & ~CPUINFO_ARM_MIDR_PART_MASK) | (other_midr & CPUINFO_ARM_MIDR_PART_MASK); +} + +inline static uint32_t midr_copy_revision(uint32_t midr, uint32_t other_midr) { + return (midr & ~CPUINFO_ARM_MIDR_REVISION_MASK) | (other_midr & CPUINFO_ARM_MIDR_REVISION_MASK); +} + +inline static bool midr_is_arm1156(uint32_t midr) { + const uint32_t uarch_mask = CPUINFO_ARM_MIDR_IMPLEMENTER_MASK | CPUINFO_ARM_MIDR_PART_MASK; + return (midr & uarch_mask) == (CPUINFO_ARM_MIDR_ARM1156 & uarch_mask); +} + +inline static bool midr_is_arm11(uint32_t midr) { + return (midr & (CPUINFO_ARM_MIDR_IMPLEMENTER_MASK | 0x0000F000)) == UINT32_C(0x4100B000); +} + +inline static bool midr_is_cortex_a9(uint32_t midr) { + const uint32_t uarch_mask = CPUINFO_ARM_MIDR_IMPLEMENTER_MASK | CPUINFO_ARM_MIDR_PART_MASK; + return (midr & uarch_mask) == (CPUINFO_ARM_MIDR_CORTEX_A9 & uarch_mask); +} + +inline static bool midr_is_scorpion(uint32_t midr) { + switch (midr & (CPUINFO_ARM_MIDR_IMPLEMENTER_MASK | CPUINFO_ARM_MIDR_PART_MASK)) { + case UINT32_C(0x510000F0): + case UINT32_C(0x510002D0): + return true; + default: + return false; + } +} + +inline static bool midr_is_krait(uint32_t midr) { + switch (midr & (CPUINFO_ARM_MIDR_IMPLEMENTER_MASK | CPUINFO_ARM_MIDR_PART_MASK)) { + case UINT32_C(0x510004D0): + case UINT32_C(0x510006F0): + return true; + default: + return false; + } +} + +inline static bool midr_is_cortex_a53(uint32_t midr) { + const uint32_t uarch_mask = CPUINFO_ARM_MIDR_IMPLEMENTER_MASK | CPUINFO_ARM_MIDR_PART_MASK; + return (midr & uarch_mask) == (CPUINFO_ARM_MIDR_CORTEX_A53 & uarch_mask); +} + +inline static bool midr_is_qualcomm_cortex_a53_silver(uint32_t midr) { + const uint32_t uarch_mask = CPUINFO_ARM_MIDR_IMPLEMENTER_MASK | CPUINFO_ARM_MIDR_PART_MASK; + return (midr & uarch_mask) == (CPUINFO_ARM_MIDR_KRYO280_SILVER & uarch_mask); +} + +inline static bool midr_is_qualcomm_cortex_a55_silver(uint32_t midr) { + const uint32_t uarch_mask = CPUINFO_ARM_MIDR_IMPLEMENTER_MASK | CPUINFO_ARM_MIDR_PART_MASK; + return (midr & uarch_mask) == (CPUINFO_ARM_MIDR_KRYO385_SILVER & uarch_mask); +} + +inline static bool midr_is_kryo280_gold(uint32_t midr) { + const uint32_t uarch_mask = CPUINFO_ARM_MIDR_IMPLEMENTER_MASK | CPUINFO_ARM_MIDR_PART_MASK; + return (midr & uarch_mask) == (CPUINFO_ARM_MIDR_KRYO280_GOLD & uarch_mask); +} + +inline static bool midr_is_kryo_silver(uint32_t midr) { + const uint32_t uarch_mask = + CPUINFO_ARM_MIDR_IMPLEMENTER_MASK | CPUINFO_ARM_MIDR_ARCHITECTURE_MASK | CPUINFO_ARM_MIDR_PART_MASK; + switch (midr & uarch_mask) { + case CPUINFO_ARM_MIDR_KRYO_SILVER_820: + case CPUINFO_ARM_MIDR_KRYO_SILVER_821: + return true; + default: + return false; + } +} + +inline static bool midr_is_kryo_gold(uint32_t midr) { + const uint32_t uarch_mask = CPUINFO_ARM_MIDR_IMPLEMENTER_MASK | CPUINFO_ARM_MIDR_PART_MASK; + return (midr & uarch_mask) == (CPUINFO_ARM_MIDR_KRYO_GOLD & uarch_mask); +} + +inline static uint32_t midr_score_core(uint32_t midr) { + const uint32_t core_mask = CPUINFO_ARM_MIDR_IMPLEMENTER_MASK | CPUINFO_ARM_MIDR_PART_MASK; + switch (midr & core_mask) { + case UINT32_C(0x53000030): /* Exynos M4 */ + case UINT32_C(0x53000040): /* Exynos M5 */ + case UINT32_C(0x4100D440): /* Cortex-X1 */ + /* These cores are in big role w.r.t Cortex-A75/-A76/-A77/-A78 */ + return 6; + case UINT32_C(0x4E000030): /* Denver 2 */ + case UINT32_C(0x53000010): /* Exynos M1 and Exynos M2 */ + case UINT32_C(0x53000020): /* Exynos M3 */ + case UINT32_C(0x51008040): /* Kryo 485 Gold / Gold Prime */ + case UINT32_C(0x51008020): /* Kryo 385 Gold */ + case UINT32_C(0x51008000): /* Kryo 260 / 280 Gold */ + case UINT32_C(0x51002050): /* Kryo Gold */ + case UINT32_C(0x4800D400): /* Cortex-A76 (HiSilicon) */ + case UINT32_C(0x4100D410): /* Cortex-A78 */ + case UINT32_C(0x4100D0D0): /* Cortex-A77 */ + case UINT32_C(0x4100D0E0): /* Cortex-A76AE */ + case UINT32_C(0x4100D0B0): /* Cortex-A76 */ + case UINT32_C(0x4100D0A0): /* Cortex-A75 */ + case UINT32_C(0x4100D090): /* Cortex-A73 */ + case UINT32_C(0x4100D080): /* Cortex-A72 */ +#if CPUINFO_ARCH_ARM + case UINT32_C(0x4100C0F0): /* Cortex-A15 */ + case UINT32_C(0x4100C0E0): /* Cortex-A17 */ + case UINT32_C(0x4100C0D0): /* Rockchip RK3288 cores */ + case UINT32_C(0x4100C0C0): /* Cortex-A12 */ +#endif /* CPUINFO_ARCH_ARM */ + /* These cores are always in big role */ + return 5; + case UINT32_C(0x4100D070): /* Cortex-A57 */ + /* Cortex-A57 can be in LITTLE role w.r.t. Denver 2, or in big role w.r.t. Cortex-A53 */ + return 4; +#if CPUINFO_ARCH_ARM64 + case UINT32_C(0x4100D060): /* Cortex-A65 */ +#endif /* CPUINFO_ARCH_ARM64 */ + case UINT32_C(0x4100D050): /* Cortex-A55 */ + case UINT32_C(0x4100D030): /* Cortex-A53 */ + /* Cortex-A53 is usually in LITTLE role, but can be in big role w.r.t. Cortex-A35 */ + return 2; + case UINT32_C(0x4100D040): /* Cortex-A35 */ +#if CPUINFO_ARCH_ARM + case UINT32_C(0x4100C070): /* Cortex-A7 */ +#endif /* CPUINFO_ARCH_ARM */ + case UINT32_C(0x51008050): /* Kryo 485 Silver */ + case UINT32_C(0x51008030): /* Kryo 385 Silver */ + case UINT32_C(0x51008010): /* Kryo 260 / 280 Silver */ + case UINT32_C(0x51002110): /* Kryo Silver (Snapdragon 820) */ + case UINT32_C(0x51002010): /* Kryo Silver (Snapdragon 821) */ + /* These cores are always in LITTLE core */ + return 1; + default: + /* + * Unknown cores, or cores which do not have big/LITTLE roles. + * To be future-proof w.r.t. cores not yet recognized in cpuinfo, assume position between + * Cortex-A57/A72/A73/A75 and Cortex-A53/A55. Then at least future cores paired with + * one of these known cores will be properly scored. + */ + return 3; + } +} + +inline static uint32_t midr_little_core_for_big(uint32_t midr) { + const uint32_t core_mask = + CPUINFO_ARM_MIDR_IMPLEMENTER_MASK | CPUINFO_ARM_MIDR_ARCHITECTURE_MASK | CPUINFO_ARM_MIDR_PART_MASK; + switch (midr & core_mask) { + case CPUINFO_ARM_MIDR_CORTEX_A75: + return CPUINFO_ARM_MIDR_CORTEX_A55; + case CPUINFO_ARM_MIDR_CORTEX_A73: + case CPUINFO_ARM_MIDR_CORTEX_A72: + case CPUINFO_ARM_MIDR_CORTEX_A57: + case CPUINFO_ARM_MIDR_EXYNOS_M1_M2: + return CPUINFO_ARM_MIDR_CORTEX_A53; + case CPUINFO_ARM_MIDR_CORTEX_A17: + case CPUINFO_ARM_MIDR_CORTEX_A15: + return CPUINFO_ARM_MIDR_CORTEX_A7; + case CPUINFO_ARM_MIDR_KRYO280_GOLD: + return CPUINFO_ARM_MIDR_KRYO280_SILVER; + case CPUINFO_ARM_MIDR_KRYO_GOLD: + return CPUINFO_ARM_MIDR_KRYO_SILVER_820; + case CPUINFO_ARM_MIDR_DENVER2: + return CPUINFO_ARM_MIDR_CORTEX_A57; + default: + return midr; + } +} diff --git a/dep/cpuinfo/src/arm/tlb.c b/dep/cpuinfo/src/arm/tlb.c new file mode 100644 index 000000000..9beb8327e --- /dev/null +++ b/dep/cpuinfo/src/arm/tlb.c @@ -0,0 +1,133 @@ + + +switch (uarch) { + case cpuinfo_uarch_cortex_a5: + /* + * Cortex-A5 Technical Reference Manual: + * 6.3.1. Micro TLB + * The first level of caching for the page table information is a micro TLB of + * 10 entries that is implemented on each of the instruction and data sides. + * 6.3.2. Main TLB + * Misses from the instruction and data micro TLBs are handled by a unified main TLB. + * The main TLB is 128-entry two-way set-associative. + */ + break; + case cpuinfo_uarch_cortex_a7: + /* + * Cortex-A7 MPCore Technical Reference Manual: + * 5.3.1. Micro TLB + * The first level of caching for the page table information is a micro TLB of + * 10 entries that is implemented on each of the instruction and data sides. + * 5.3.2. Main TLB + * Misses from the micro TLBs are handled by a unified main TLB. This is a 256-entry 2-way + * set-associative structure. The main TLB supports all the VMSAv7 page sizes of + * 4KB, 64KB, 1MB and 16MB in addition to the LPAE page sizes of 2MB and 1G. + */ + break; + case cpuinfo_uarch_cortex_a8: + /* + * Cortex-A8 Technical Reference Manual: + * 6.1. About the MMU + * The MMU features include the following: + * - separate, fully-associative, 32-entry data and instruction TLBs + * - TLB entries that support 4KB, 64KB, 1MB, and 16MB pages + */ + break; + case cpuinfo_uarch_cortex_a9: + /* + * ARM Cortex‑A9 Technical Reference Manual: + * 6.2.1 Micro TLB + * The first level of caching for the page table information is a micro TLB of 32 entries on the data side, + * and configurable 32 or 64 entries on the instruction side. + * 6.2.2 Main TLB + * The main TLB is implemented as a combination of: + * - A fully-associative, lockable array of four elements. + * - A 2-way associative structure of 2x32, 2x64, 2x128 or 2x256 entries. + */ + break; + case cpuinfo_uarch_cortex_a15: + /* + * ARM Cortex-A15 MPCore Processor Technical Reference Manual: + * 5.2.1. L1 instruction TLB + * The L1 instruction TLB is a 32-entry fully-associative structure. This TLB caches entries at the 4KB + * granularity of Virtual Address (VA) to Physical Address (PA) mapping only. If the page tables map the + * memory region to a larger granularity than 4K, it only allocates one mapping for the particular 4K region + * to which the current access corresponds. + * 5.2.2. L1 data TLB + * There are two separate 32-entry fully-associative TLBs that are used for data loads and stores, + * respectively. Similar to the L1 instruction TLB, both of these cache entries at the 4KB granularity of + * VA to PA mappings only. At implementation time, the Cortex-A15 MPCore processor can be configured with + * the -l1tlb_1m option, to have the L1 data TLB cache entries at both the 4KB and 1MB granularity. + * With this configuration, any translation that results in a 1MB or larger page is cached in the L1 data + * TLB as a 1MB entry. Any translation that results in a page smaller than 1MB is cached in the L1 data TLB + * as a 4KB entry. By default, all translations are cached in the L1 data TLB as a 4KB entry. + * 5.2.3. L2 TLB + * Misses from the L1 instruction and data TLBs are handled by a unified L2 TLB. This is a 512-entry 4-way + * set-associative structure. The L2 TLB supports all the VMSAv7 page sizes of 4K, 64K, 1MB and 16MB in + * addition to the LPAE page sizes of 2MB and 1GB. + */ + break; + case cpuinfo_uarch_cortex_a17: + /* + * ARM Cortex-A17 MPCore Processor Technical Reference Manual: + * 5.2.1. Instruction micro TLB + * The instruction micro TLB is implemented as a 32, 48 or 64 entry, fully-associative structure. This TLB + * caches entries at the 4KB and 1MB granularity of Virtual Address (VA) to Physical Address (PA) mapping + * only. If the translation tables map the memory region to a larger granularity than 4KB or 1MB, it only + * allocates one mapping for the particular 4KB region to which the current access corresponds. + * 5.2.2. Data micro TLB + * The data micro TLB is a 32 entry fully-associative TLB that is used for data loads and stores. The cache + * entries have a 4KB and 1MB granularity of VA to PA mappings only. + * 5.2.3. Unified main TLB + * Misses from the instruction and data micro TLBs are handled by a unified main TLB. This is a 1024 entry + * 4-way set-associative structure. The main TLB supports all the VMSAv7 page sizes of 4K, 64K, 1MB and 16MB + * in addition to the LPAE page sizes of 2MB and 1GB. + */ + break; + case cpuinfo_uarch_cortex_a35: + /* + * ARM Cortex‑A35 Processor Technical Reference Manual: + * A6.2 TLB Organization + * Micro TLB + * The first level of caching for the translation table information is a micro TLB of ten entries that + * is implemented on each of the instruction and data sides. + * Main TLB + * A unified main TLB handles misses from the micro TLBs. It has a 512-entry, 2-way, set-associative + * structure and supports all VMSAv8 block sizes, except 1GB. If it fetches a 1GB block, the TLB splits + * it into 512MB blocks and stores the appropriate block for the lookup. + */ + break; + case cpuinfo_uarch_cortex_a53: + /* + * ARM Cortex-A53 MPCore Processor Technical Reference Manual: + * 5.2.1. Micro TLB + * The first level of caching for the translation table information is a micro TLB of ten entries that is + * implemented on each of the instruction and data sides. + * 5.2.2. Main TLB + * A unified main TLB handles misses from the micro TLBs. This is a 512-entry, 4-way, set-associative + * structure. The main TLB supports all VMSAv8 block sizes, except 1GB. If a 1GB block is fetched, it is + * split into 512MB blocks and the appropriate block for the lookup stored. + */ + break; + case cpuinfo_uarch_cortex_a57: + /* + * ARM® Cortex-A57 MPCore Processor Technical Reference Manual: + * 5.2.1 L1 instruction TLB + * The L1 instruction TLB is a 48-entry fully-associative structure. This TLB caches entries of three + * different page sizes, natively 4KB, 64KB, and 1MB, of VA to PA mappings. If the page tables map the memory + * region to a larger granularity than 1MB, it only allocates one mapping for the particular 1MB region to + * which the current access corresponds. + * 5.2.2 L1 data TLB + * The L1 data TLB is a 32-entry fully-associative TLB that is used for data loads and stores. This TLB + * caches entries of three different page sizes, natively 4KB, 64KB, and 1MB, of VA to PA mappings. + * 5.2.3 L2 TLB + * Misses from the L1 instruction and data TLBs are handled by a unified L2 TLB. This is a 1024-entry 4-way + * set-associative structure. The L2 TLB supports the page sizes of 4K, 64K, 1MB and 16MB. It also supports + * page sizes of 2MB and 1GB for the long descriptor format translation in AArch32 state and in AArch64 state + * when using the 4KB translation granule. In addition, the L2 TLB supports the 512MB page map size defined + * for the AArch64 translations that use a 64KB translation granule. + */ + break; +} + + diff --git a/dep/cpuinfo/src/arm/uarch.c b/dep/cpuinfo/src/arm/uarch.c new file mode 100644 index 000000000..6677d90a9 --- /dev/null +++ b/dep/cpuinfo/src/arm/uarch.c @@ -0,0 +1,367 @@ +#include + +#include +#include +#include + + +void cpuinfo_arm_decode_vendor_uarch( + uint32_t midr, +#if CPUINFO_ARCH_ARM + bool has_vfpv4, +#endif /* CPUINFO_ARCH_ARM */ +#ifndef _MSC_VER + enum cpuinfo_vendor vendor[restrict static 1], + enum cpuinfo_uarch uarch[restrict static 1]) +#else + enum cpuinfo_vendor vendor[1], + enum cpuinfo_uarch uarch[1]) +#endif +{ + switch (midr_get_implementer(midr)) { + case 'A': + *vendor = cpuinfo_vendor_arm; + switch (midr_get_part(midr)) { +#if CPUINFO_ARCH_ARM + case 0xC05: + *uarch = cpuinfo_uarch_cortex_a5; + break; + case 0xC07: + *uarch = cpuinfo_uarch_cortex_a7; + break; + case 0xC08: + *uarch = cpuinfo_uarch_cortex_a8; + break; + case 0xC09: + *uarch = cpuinfo_uarch_cortex_a9; + break; + case 0xC0C: + *uarch = cpuinfo_uarch_cortex_a12; + break; + case 0xC0E: + *uarch = cpuinfo_uarch_cortex_a17; + break; + case 0xC0D: + /* + * Rockchip RK3288 only. + * Core information is ambiguous: some sources specify Cortex-A12, others - Cortex-A17. + * Assume it is Cortex-A12. + */ + *uarch = cpuinfo_uarch_cortex_a12; + break; + case 0xC0F: + *uarch = cpuinfo_uarch_cortex_a15; + break; +#endif /* CPUINFO_ARCH_ARM */ + case 0xD01: + *uarch = cpuinfo_uarch_cortex_a32; + break; + case 0xD03: + *uarch = cpuinfo_uarch_cortex_a53; + break; + case 0xD04: + *uarch = cpuinfo_uarch_cortex_a35; + break; + case 0xD05: + // Note: use Variant, not Revision, field + *uarch = (midr & CPUINFO_ARM_MIDR_VARIANT_MASK) == 0 ? + cpuinfo_uarch_cortex_a55r0 : cpuinfo_uarch_cortex_a55; + break; + case 0xD06: + *uarch = cpuinfo_uarch_cortex_a65; + break; + case 0xD07: + *uarch = cpuinfo_uarch_cortex_a57; + break; + case 0xD08: + *uarch = cpuinfo_uarch_cortex_a72; + break; + case 0xD09: + *uarch = cpuinfo_uarch_cortex_a73; + break; + case 0xD0A: + *uarch = cpuinfo_uarch_cortex_a75; + break; + case 0xD0B: + *uarch = cpuinfo_uarch_cortex_a76; + break; +#if CPUINFO_ARCH_ARM64 && !defined(__ANDROID__) + case 0xD0C: + *uarch = cpuinfo_uarch_neoverse_n1; + break; +#endif /* CPUINFO_ARCH_ARM64 && !defined(__ANDROID__) */ + case 0xD0D: + *uarch = cpuinfo_uarch_cortex_a77; + break; + case 0xD0E: /* Cortex-A76AE */ + *uarch = cpuinfo_uarch_cortex_a76; + break; + case 0xD41: /* Cortex-A78 */ + *uarch = cpuinfo_uarch_cortex_a78; + break; + case 0xD44: /* Cortex-X1 */ + *uarch = cpuinfo_uarch_cortex_x1; + break; +#if CPUINFO_ARCH_ARM64 && !defined(__ANDROID__) + case 0xD4A: + *uarch = cpuinfo_uarch_neoverse_e1; + break; +#endif /* CPUINFO_ARCH_ARM64 && !defined(__ANDROID__) */ + default: + switch (midr_get_part(midr) >> 8) { +#if CPUINFO_ARCH_ARM + case 7: + *uarch = cpuinfo_uarch_arm7; + break; + case 9: + *uarch = cpuinfo_uarch_arm9; + break; + case 11: + *uarch = cpuinfo_uarch_arm11; + break; +#endif /* CPUINFO_ARCH_ARM */ + default: + cpuinfo_log_warning("unknown ARM CPU part 0x%03"PRIx32" ignored", midr_get_part(midr)); + } + } + break; + case 'B': + *vendor = cpuinfo_vendor_broadcom; + switch (midr_get_part(midr)) { + case 0x00F: + *uarch = cpuinfo_uarch_brahma_b15; + break; + case 0x100: + *uarch = cpuinfo_uarch_brahma_b53; + break; +#if CPUINFO_ARCH_ARM64 && !defined(__ANDROID__) + case 0x516: + /* Broadcom Vulkan was sold to Cavium before it reached the market, so we identify it as Cavium ThunderX2 */ + *vendor = cpuinfo_vendor_cavium; + *uarch = cpuinfo_uarch_thunderx2; + break; +#endif + default: + cpuinfo_log_warning("unknown Broadcom CPU part 0x%03"PRIx32" ignored", midr_get_part(midr)); + } + break; +#if CPUINFO_ARCH_ARM64 && !defined(__ANDROID__) + case 'C': + *vendor = cpuinfo_vendor_cavium; + switch (midr_get_part(midr)) { + case 0x0A0: /* ThunderX */ + case 0x0A1: /* ThunderX 88XX */ + case 0x0A2: /* ThunderX 81XX */ + case 0x0A3: /* ThunderX 83XX */ + *uarch = cpuinfo_uarch_thunderx; + break; + case 0x0AF: /* ThunderX2 99XX */ + *uarch = cpuinfo_uarch_thunderx2; + break; + default: + cpuinfo_log_warning("unknown Cavium CPU part 0x%03"PRIx32" ignored", midr_get_part(midr)); + } + break; +#endif + case 'H': + *vendor = cpuinfo_vendor_huawei; + switch (midr_get_part(midr)) { +#if CPUINFO_ARCH_ARM64 && !defined(__ANDROID__) + case 0xD01: /* Kunpeng 920 series */ + *uarch = cpuinfo_uarch_taishan_v110; + break; +#endif + case 0xD40: /* Kirin 980 Big/Medium cores -> Cortex-A76 */ + *vendor = cpuinfo_vendor_arm; + *uarch = cpuinfo_uarch_cortex_a76; + break; + default: + cpuinfo_log_warning("unknown Huawei CPU part 0x%03"PRIx32" ignored", midr_get_part(midr)); + } + break; +#if CPUINFO_ARCH_ARM + case 'i': + *vendor = cpuinfo_vendor_intel; + switch (midr_get_part(midr) >> 8) { + case 2: /* PXA 210/25X/26X */ + case 4: /* PXA 27X */ + case 6: /* PXA 3XX */ + *uarch = cpuinfo_uarch_xscale; + break; + default: + cpuinfo_log_warning("unknown Intel CPU part 0x%03"PRIx32" ignored", midr_get_part(midr)); + } + break; +#endif /* CPUINFO_ARCH_ARM */ + case 'N': + *vendor = cpuinfo_vendor_nvidia; + switch (midr_get_part(midr)) { + case 0x000: + *uarch = cpuinfo_uarch_denver; + break; + case 0x003: + *uarch = cpuinfo_uarch_denver2; + break; + case 0x004: + *uarch = cpuinfo_uarch_carmel; + break; + default: + cpuinfo_log_warning("unknown Nvidia CPU part 0x%03"PRIx32" ignored", midr_get_part(midr)); + } + break; +#if !defined(__ANDROID__) + case 'P': + *vendor = cpuinfo_vendor_apm; + switch (midr_get_part(midr)) { + case 0x000: + *uarch = cpuinfo_uarch_xgene; + break; + default: + cpuinfo_log_warning("unknown Applied Micro CPU part 0x%03"PRIx32" ignored", midr_get_part(midr)); + } + break; +#endif + case 'Q': + *vendor = cpuinfo_vendor_qualcomm; + switch (midr_get_part(midr)) { +#if CPUINFO_ARCH_ARM + case 0x00F: + /* Mostly Scorpions, but some Cortex A5 may report this value as well */ + if (has_vfpv4) { + /* Unlike Scorpion, Cortex-A5 comes with VFPv4 */ + *vendor = cpuinfo_vendor_arm; + *uarch = cpuinfo_uarch_cortex_a5; + } else { + *uarch = cpuinfo_uarch_scorpion; + } + break; + case 0x02D: /* Dual-core Scorpions */ + *uarch = cpuinfo_uarch_scorpion; + break; + case 0x04D: + /* + * Dual-core Krait: + * - r1p0 -> Krait 200 + * - r1p4 -> Krait 200 + * - r2p0 -> Krait 300 + */ + case 0x06F: + /* + * Quad-core Krait: + * - r0p1 -> Krait 200 + * - r0p2 -> Krait 200 + * - r1p0 -> Krait 300 + * - r2p0 -> Krait 400 (Snapdragon 800 MSMxxxx) + * - r2p1 -> Krait 400 (Snapdragon 801 MSMxxxxPRO) + * - r3p1 -> Krait 450 + */ + *uarch = cpuinfo_uarch_krait; + break; +#endif /* CPUINFO_ARCH_ARM */ + case 0x201: /* Qualcomm Snapdragon 821: Low-power Kryo "Silver" */ + case 0x205: /* Qualcomm Snapdragon 820 & 821: High-performance Kryo "Gold" */ + case 0x211: /* Qualcomm Snapdragon 820: Low-power Kryo "Silver" */ + *uarch = cpuinfo_uarch_kryo; + break; + case 0x800: /* High-performance Kryo 260 (r10p2) / Kryo 280 (r10p1) "Gold" -> Cortex-A73 */ + *vendor = cpuinfo_vendor_arm; + *uarch = cpuinfo_uarch_cortex_a73; + break; + case 0x801: /* Low-power Kryo 260 / 280 "Silver" -> Cortex-A53 */ + *vendor = cpuinfo_vendor_arm; + *uarch = cpuinfo_uarch_cortex_a53; + break; + case 0x802: /* High-performance Kryo 385 "Gold" -> Cortex-A75 */ + *vendor = cpuinfo_vendor_arm; + *uarch = cpuinfo_uarch_cortex_a75; + break; + case 0x803: /* Low-power Kryo 385 "Silver" -> Cortex-A55r0 */ + *vendor = cpuinfo_vendor_arm; + *uarch = cpuinfo_uarch_cortex_a55r0; + break; + case 0x804: /* High-performance Kryo 485 "Gold" / "Gold Prime" -> Cortex-A76 */ + *vendor = cpuinfo_vendor_arm; + *uarch = cpuinfo_uarch_cortex_a76; + break; + case 0x805: /* Low-performance Kryo 485 "Silver" -> Cortex-A55 */ + *vendor = cpuinfo_vendor_arm; + *uarch = cpuinfo_uarch_cortex_a55; + break; +#if CPUINFO_ARCH_ARM64 && !defined(__ANDROID__) + case 0xC00: + *uarch = cpuinfo_uarch_falkor; + break; + case 0xC01: + *uarch = cpuinfo_uarch_saphira; + break; +#endif /* CPUINFO_ARCH_ARM64 && !defined(__ANDROID__) */ + default: + cpuinfo_log_warning("unknown Qualcomm CPU part 0x%03"PRIx32" ignored", midr_get_part(midr)); + } + break; + case 'S': + *vendor = cpuinfo_vendor_samsung; + switch (midr & (CPUINFO_ARM_MIDR_VARIANT_MASK | CPUINFO_ARM_MIDR_PART_MASK)) { + case 0x00100010: + /* + * Exynos 8890 MIDR = 0x531F0011, assume Exynos M1 has: + * - CPU variant 0x1 + * - CPU part 0x001 + */ + *uarch = cpuinfo_uarch_exynos_m1; + break; + case 0x00400010: + /* + * Exynos 8895 MIDR = 0x534F0010, assume Exynos M2 has: + * - CPU variant 0x4 + * - CPU part 0x001 + */ + *uarch = cpuinfo_uarch_exynos_m2; + break; + case 0x00100020: + /* + * Exynos 9810 MIDR = 0x531F0020, assume Exynos M3 has: + * - CPU variant 0x1 + * - CPU part 0x002 + */ + *uarch = cpuinfo_uarch_exynos_m3; + break; + case 0x00100030: + /* + * Exynos 9820 MIDR = 0x531F0030, assume Exynos M4 has: + * - CPU variant 0x1 + * - CPU part 0x003 + */ + *uarch = cpuinfo_uarch_exynos_m4; + break; + case 0x00100040: + /* + * Exynos 9820 MIDR = 0x531F0040, assume Exynos M5 has: + * - CPU variant 0x1 + * - CPU part 0x004 + */ + *uarch = cpuinfo_uarch_exynos_m5; + break; + default: + cpuinfo_log_warning("unknown Samsung CPU variant 0x%01"PRIx32" part 0x%03"PRIx32" ignored", + midr_get_variant(midr), midr_get_part(midr)); + } + break; +#if CPUINFO_ARCH_ARM + case 'V': + *vendor = cpuinfo_vendor_marvell; + switch (midr_get_part(midr)) { + case 0x581: /* PJ4 / PJ4B */ + case 0x584: /* PJ4B-MP / PJ4C */ + *uarch = cpuinfo_uarch_pj4; + break; + default: + cpuinfo_log_warning("unknown Marvell CPU part 0x%03"PRIx32" ignored", midr_get_part(midr)); + } + break; +#endif /* CPUINFO_ARCH_ARM */ + default: + cpuinfo_log_warning("unknown CPU implementer '%c' (0x%02"PRIx32") with CPU part 0x%03"PRIx32" ignored", + (char) midr_get_implementer(midr), midr_get_implementer(midr), midr_get_part(midr)); + } +} diff --git a/dep/cpuinfo/src/arm/windows/api.h b/dep/cpuinfo/src/arm/windows/api.h new file mode 100644 index 000000000..33d917e0d --- /dev/null +++ b/dep/cpuinfo/src/arm/windows/api.h @@ -0,0 +1,41 @@ +#pragma once + +#include +#include +#include + +#include +#include + +struct cpuinfo_arm_linux_processor { + /** + * Minimum processor ID on the package which includes this logical processor. + * This value can serve as an ID for the cluster of logical processors: it is the + * same for all logical processors on the same package. + */ + uint32_t package_leader_id; + /** + * Minimum processor ID on the core which includes this logical processor. + * This value can serve as an ID for the cluster of logical processors: it is the + * same for all logical processors on the same package. + */ + /** + * Number of logical processors in the package. + */ + uint32_t package_processor_count; + /** + * Maximum frequency, in kHZ. + * The value is parsed from /sys/devices/system/cpu/cpu/cpufreq/cpuinfo_max_freq + * If failed to read or parse the file, the value is 0. + */ + uint32_t max_frequency; + /** + * Minimum frequency, in kHZ. + * The value is parsed from /sys/devices/system/cpu/cpu/cpufreq/cpuinfo_min_freq + * If failed to read or parse the file, the value is 0. + */ + uint32_t min_frequency; + /** Linux processor ID */ + uint32_t system_processor_id; + uint32_t flags; +}; diff --git a/dep/cpuinfo/src/arm/windows/init.c b/dep/cpuinfo/src/arm/windows/init.c new file mode 100644 index 000000000..0bdb604b9 --- /dev/null +++ b/dep/cpuinfo/src/arm/windows/init.c @@ -0,0 +1,334 @@ +#include +#include +#include +#include + +#include +#include +#include +#include + +#include + +#ifdef __GNUC__ + #define CPUINFO_ALLOCA __builtin_alloca +#else + #define CPUINFO_ALLOCA _alloca +#endif + + +static inline uint32_t bit_mask(uint32_t bits) { + return (UINT32_C(1) << bits) - UINT32_C(1); +} + +static inline uint32_t low_index_from_kaffinity(KAFFINITY kaffinity) { + #if defined(_M_ARM64) + unsigned long index; + _BitScanForward64(&index, (unsigned __int64) kaffinity); + return (uint32_t) index; + #elif defined(_M_ARM) + unsigned long index; + _BitScanForward(&index, (unsigned long) kaffinity); + return (uint32_t) index; + #else + #error Platform-specific implementation required + #endif +} + +static bool cpuinfo_arm_windows_is_wine(void) { + HMODULE ntdll = GetModuleHandleW(L"ntdll.dll"); + if (ntdll == NULL) { + return false; + } + + return GetProcAddress(ntdll, "wine_get_version") != NULL; +} + +BOOL CALLBACK cpuinfo_arm_windows_init(PINIT_ONCE init_once, PVOID parameter, PVOID* context) { + struct cpuinfo_processor* processors = NULL; + struct cpuinfo_core* cores = NULL; + struct cpuinfo_cluster* clusters = NULL; + struct cpuinfo_package* packages = NULL; + uint32_t* core_efficiency_classes = NULL; + PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX processor_infos = NULL; + + HANDLE heap = GetProcessHeap(); + const bool is_wine = cpuinfo_arm_windows_is_wine(); + + /* WINE doesn't implement GetMaximumProcessorGroupCount and aborts when calling it */ + const uint32_t max_group_count = is_wine ? 1 : (uint32_t) GetMaximumProcessorGroupCount(); + cpuinfo_log_debug("detected %"PRIu32" processor groups", max_group_count); + + uint32_t processors_count = 0; + uint32_t* processors_per_group = (uint32_t*) CPUINFO_ALLOCA(max_group_count * sizeof(uint32_t)); + for (uint32_t i = 0; i < max_group_count; i++) { + processors_per_group[i] = GetMaximumProcessorCount((WORD) i); + cpuinfo_log_debug("detected %"PRIu32" processors in group %"PRIu32, + processors_per_group[i], i); + processors_count += processors_per_group[i]; + } + + uint32_t* processors_before_group = (uint32_t*) CPUINFO_ALLOCA(max_group_count * sizeof(uint32_t)); + for (uint32_t i = 0, count = 0; i < max_group_count; i++) { + processors_before_group[i] = count; + cpuinfo_log_debug("detected %"PRIu32" processors before group %"PRIu32, + processors_before_group[i], i); + count += processors_per_group[i]; + } + + processors = HeapAlloc(heap, HEAP_ZERO_MEMORY, processors_count * sizeof(struct cpuinfo_processor)); + if (processors == NULL) { + cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" logical processors", + processors_count * sizeof(struct cpuinfo_processor), processors_count); + goto cleanup; + } + + DWORD cores_info_size = 0; + if (GetLogicalProcessorInformationEx(RelationProcessorCore, NULL, &cores_info_size) == FALSE) { + const DWORD last_error = GetLastError(); + if (last_error != ERROR_INSUFFICIENT_BUFFER) { + cpuinfo_log_error("failed to query size of processor cores information: error %"PRIu32, + (uint32_t) last_error); + goto cleanup; + } + } + + DWORD packages_info_size = 0; + if (GetLogicalProcessorInformationEx(RelationProcessorPackage, NULL, &packages_info_size) == FALSE) { + const DWORD last_error = GetLastError(); + if (last_error != ERROR_INSUFFICIENT_BUFFER) { + cpuinfo_log_error("failed to query size of processor packages information: error %"PRIu32, + (uint32_t) last_error); + goto cleanup; + } + } + + DWORD max_info_size = max(cores_info_size, packages_info_size); + + processor_infos = HeapAlloc(heap, 0, max_info_size); + if (processor_infos == NULL) { + cpuinfo_log_error("failed to allocate %"PRIu32" bytes for logical processor information", + (uint32_t) max_info_size); + goto cleanup; + } + + if (GetLogicalProcessorInformationEx(RelationProcessorPackage, processor_infos, &max_info_size) == FALSE) { + cpuinfo_log_error("failed to query processor packages information: error %"PRIu32, + (uint32_t) GetLastError()); + goto cleanup; + } + + uint32_t packages_count = 0; + PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX packages_info_end = + (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX) ((uintptr_t) processor_infos + packages_info_size); + for (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX package_info = processor_infos; + package_info < packages_info_end; + package_info = (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX) ((uintptr_t) package_info + package_info->Size)) + { + if (package_info->Relationship != RelationProcessorPackage) { + cpuinfo_log_warning("unexpected processor info type (%"PRIu32") for processor package information", + (uint32_t) package_info->Relationship); + continue; + } + + /* We assume that packages are reported in APIC order */ + const uint32_t package_id = packages_count++; + /* Iterate processor groups and set the package part of APIC ID */ + for (uint32_t i = 0; i < package_info->Processor.GroupCount; i++) { + const uint32_t group_id = package_info->Processor.GroupMask[i].Group; + /* Global index of the first logical processor belonging to this group */ + const uint32_t group_processors_start = processors_before_group[group_id]; + /* Bitmask representing processors in this group belonging to this package */ + KAFFINITY group_processors_mask = package_info->Processor.GroupMask[i].Mask; + while (group_processors_mask != 0) { + const uint32_t group_processor_id = low_index_from_kaffinity(group_processors_mask); + const uint32_t processor_id = group_processors_start + group_processor_id; + processors[processor_id].package = (const struct cpuinfo_package*) NULL + package_id; + processors[processor_id].windows_group_id = (uint16_t) group_id; + processors[processor_id].windows_processor_id = (uint16_t) group_processor_id; + + /* Reset the lowest bit in affinity mask */ + group_processors_mask &= (group_processors_mask - 1); + } + } + } + + max_info_size = max(cores_info_size, packages_info_size); + if (GetLogicalProcessorInformationEx(RelationProcessorCore, processor_infos, &max_info_size) == FALSE) { + cpuinfo_log_error("failed to query processor cores information: error %"PRIu32, + (uint32_t) GetLastError()); + goto cleanup; + } + + uint32_t cores_count = 0; + /* Index (among all cores) of the the first core on the current package */ + uint32_t package_core_start = 0; + uint32_t current_package_apic_id = 0; + PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX cores_info_end = + (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX) ((uintptr_t) processor_infos + cores_info_size); + for (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX core_info = processor_infos; + core_info < cores_info_end; + core_info = (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX) ((uintptr_t) core_info + core_info->Size)) + { + if (core_info->Relationship != RelationProcessorCore) { + cpuinfo_log_warning("unexpected processor info type (%"PRIu32") for processor core information", + (uint32_t) core_info->Relationship); + continue; + } + + /* We assume that cores and logical processors are reported in APIC order */ + const uint32_t core_id = cores_count++; + if (core_efficiency_classes == NULL) + core_efficiency_classes = (uint32_t*)HeapAlloc(heap, HEAP_ZERO_MEMORY, sizeof(uint32_t) * cores_count); + else + core_efficiency_classes = (uint32_t*)HeapReAlloc(heap, HEAP_ZERO_MEMORY, core_efficiency_classes, sizeof(uint32_t) * cores_count); + core_efficiency_classes[core_id] = core_info->Processor.EfficiencyClass; + + uint32_t smt_id = 0; + /* Iterate processor groups and set the core & SMT parts of APIC ID */ + for (uint32_t i = 0; i < core_info->Processor.GroupCount; i++) { + const uint32_t group_id = core_info->Processor.GroupMask[i].Group; + /* Global index of the first logical processor belonging to this group */ + const uint32_t group_processors_start = processors_before_group[group_id]; + /* Bitmask representing processors in this group belonging to this package */ + KAFFINITY group_processors_mask = core_info->Processor.GroupMask[i].Mask; + while (group_processors_mask != 0) { + const uint32_t group_processor_id = low_index_from_kaffinity(group_processors_mask); + const uint32_t processor_id = group_processors_start + group_processor_id; + + /* Core ID w.r.t package */ + const uint32_t package_core_id = core_id - package_core_start; + + /* Set SMT ID (assume logical processors within the core are reported in APIC order) */ + processors[processor_id].smt_id = smt_id++; + processors[processor_id].core = (const struct cpuinfo_core*) NULL + core_id; + + /* Reset the lowest bit in affinity mask */ + group_processors_mask &= (group_processors_mask - 1); + } + } + } + + cores = HeapAlloc(heap, HEAP_ZERO_MEMORY, cores_count * sizeof(struct cpuinfo_core)); + if (cores == NULL) { + cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" cores", + cores_count * sizeof(struct cpuinfo_core), cores_count); + goto cleanup; + } + + clusters = HeapAlloc(heap, HEAP_ZERO_MEMORY, packages_count * sizeof(struct cpuinfo_cluster)); + if (clusters == NULL) { + cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" core clusters", + packages_count * sizeof(struct cpuinfo_cluster), packages_count); + goto cleanup; + } + + packages = HeapAlloc(heap, HEAP_ZERO_MEMORY, packages_count * sizeof(struct cpuinfo_package)); + if (packages == NULL) { + cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" physical packages", + packages_count * sizeof(struct cpuinfo_package), packages_count); + goto cleanup; + } + + for (uint32_t i = processors_count; i != 0; i--) { + const uint32_t processor_id = i - 1; + struct cpuinfo_processor* processor = processors + processor_id; + + /* Adjust core and package pointers for all logical processors */ + struct cpuinfo_core* core = + (struct cpuinfo_core*) ((uintptr_t) cores + (uintptr_t) processor->core); + processor->core = core; + struct cpuinfo_cluster* cluster = + (struct cpuinfo_cluster*) ((uintptr_t) clusters + (uintptr_t) processor->cluster); + processor->cluster = cluster; + struct cpuinfo_package* package = + (struct cpuinfo_package*) ((uintptr_t) packages + (uintptr_t) processor->package); + processor->package = package; + + /* This can be overwritten by lower-index processors on the same package */ + package->processor_start = processor_id; + package->processor_count += 1; + + /* This can be overwritten by lower-index processors on the same cluster */ + cluster->processor_start = processor_id; + cluster->processor_count += 1; + + /* This can be overwritten by lower-index processors on the same core*/ + core->processor_start = processor_id; + core->processor_count += 1; + } + + /* Set vendor/uarch/CPUID information for cores */ + for (uint32_t i = cores_count; i != 0; i--) { + const uint32_t global_core_id = i - 1; + struct cpuinfo_core* core = cores + global_core_id; + const struct cpuinfo_processor* processor = processors + core->processor_start; + struct cpuinfo_package* package = (struct cpuinfo_package*) processor->package; + struct cpuinfo_cluster* cluster = (struct cpuinfo_cluster*) processor->cluster; + + core->cluster = cluster; + core->package = package; + core->core_id = global_core_id; + core->vendor = cpuinfo_vendor_unknown; + core->uarch = cpuinfo_uarch_unknown; + + /* Lazy */ + core->frequency = core_efficiency_classes[global_core_id]; + + /* This can be overwritten by lower-index cores on the same cluster/package */ + cluster->core_start = global_core_id; + cluster->core_count += 1; + package->core_start = global_core_id; + package->core_count += 1; + } + + for (uint32_t i = 0; i < packages_count; i++) { + struct cpuinfo_package* package = packages + i; + struct cpuinfo_cluster* cluster = clusters + i; + + cluster->package = package; + cluster->vendor = cores[cluster->core_start].vendor; + cluster->uarch = cores[cluster->core_start].uarch; + package->cluster_start = i; + package->cluster_count = 1; + } + + + /* Commit changes */ + cpuinfo_processors = processors; + cpuinfo_cores = cores; + cpuinfo_clusters = clusters; + cpuinfo_packages = packages; + + cpuinfo_processors_count = processors_count; + cpuinfo_cores_count = cores_count; + cpuinfo_clusters_count = packages_count; + cpuinfo_packages_count = packages_count; + + MemoryBarrier(); + + cpuinfo_is_initialized = true; + + processors = NULL; + cores = NULL; + clusters = NULL; + packages = NULL; + +cleanup: + if (core_efficiency_classes != NULL) { + HeapFree(heap, 0, core_efficiency_classes); + } + if (processors != NULL) { + HeapFree(heap, 0, processors); + } + if (cores != NULL) { + HeapFree(heap, 0, cores); + } + if (clusters != NULL) { + HeapFree(heap, 0, clusters); + } + if (packages != NULL) { + HeapFree(heap, 0, packages); + } + return TRUE; +} diff --git a/dep/cpuinfo/src/cache.c b/dep/cpuinfo/src/cache.c new file mode 100644 index 000000000..b976b8796 --- /dev/null +++ b/dep/cpuinfo/src/cache.c @@ -0,0 +1,18 @@ +#include + +#include +#include + + +uint32_t cpuinfo_compute_max_cache_size(const struct cpuinfo_processor* processor) { + if (processor->cache.l4 != NULL) { + return processor->cache.l4->size; + } else if (processor->cache.l3 != NULL) { + return processor->cache.l3->size; + } else if (processor->cache.l2 != NULL) { + return processor->cache.l2->size; + } else if (processor->cache.l1d != NULL) { + return processor->cache.l1d->size; + } + return 0; +} diff --git a/dep/cpuinfo/src/cpuinfo/common.h b/dep/cpuinfo/src/cpuinfo/common.h new file mode 100644 index 000000000..b2b404d74 --- /dev/null +++ b/dep/cpuinfo/src/cpuinfo/common.h @@ -0,0 +1,40 @@ +/* + * Copyright (c) Facebook, Inc. and its affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#pragma once + + +#define CPUINFO_COUNT_OF(array) (sizeof(array) / sizeof(0[array])) + +#if defined(__GNUC__) + #define CPUINFO_LIKELY(condition) (__builtin_expect(!!(condition), 1)) + #define CPUINFO_UNLIKELY(condition) (__builtin_expect(!!(condition), 0)) +#else + #define CPUINFO_LIKELY(condition) (!!(condition)) + #define CPUINFO_UNLIKELY(condition) (!!(condition)) +#endif + +#ifndef CPUINFO_INTERNAL + #if defined(__ELF__) + #define CPUINFO_INTERNAL __attribute__((__visibility__("internal"))) + #elif defined(__MACH__) + #define CPUINFO_INTERNAL __attribute__((__visibility__("hidden"))) + #else + #define CPUINFO_INTERNAL + #endif +#endif + +#ifndef CPUINFO_PRIVATE + #if defined(__ELF__) + #define CPUINFO_PRIVATE __attribute__((__visibility__("hidden"))) + #elif defined(__MACH__) + #define CPUINFO_PRIVATE __attribute__((__visibility__("hidden"))) + #else + #define CPUINFO_PRIVATE + #endif +#endif diff --git a/dep/cpuinfo/src/cpuinfo/internal-api.h b/dep/cpuinfo/src/cpuinfo/internal-api.h new file mode 100644 index 000000000..79f6ad4ce --- /dev/null +++ b/dep/cpuinfo/src/cpuinfo/internal-api.h @@ -0,0 +1,66 @@ +#pragma once + +#include +#include + +#if defined(_WIN32) || defined(__CYGWIN__) + #include +#endif + +#include +#include + + +enum cpuinfo_cache_level { + cpuinfo_cache_level_1i = 0, + cpuinfo_cache_level_1d = 1, + cpuinfo_cache_level_2 = 2, + cpuinfo_cache_level_3 = 3, + cpuinfo_cache_level_4 = 4, + cpuinfo_cache_level_max = 5, +}; + +extern CPUINFO_INTERNAL bool cpuinfo_is_initialized; + +extern CPUINFO_INTERNAL struct cpuinfo_processor* cpuinfo_processors; +extern CPUINFO_INTERNAL struct cpuinfo_core* cpuinfo_cores; +extern CPUINFO_INTERNAL struct cpuinfo_cluster* cpuinfo_clusters; +extern CPUINFO_INTERNAL struct cpuinfo_package* cpuinfo_packages; +extern CPUINFO_INTERNAL struct cpuinfo_cache* cpuinfo_cache[cpuinfo_cache_level_max]; + +extern CPUINFO_INTERNAL uint32_t cpuinfo_processors_count; +extern CPUINFO_INTERNAL uint32_t cpuinfo_cores_count; +extern CPUINFO_INTERNAL uint32_t cpuinfo_clusters_count; +extern CPUINFO_INTERNAL uint32_t cpuinfo_packages_count; +extern CPUINFO_INTERNAL uint32_t cpuinfo_cache_count[cpuinfo_cache_level_max]; +extern CPUINFO_INTERNAL uint32_t cpuinfo_max_cache_size; + +#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64 + extern CPUINFO_INTERNAL struct cpuinfo_uarch_info* cpuinfo_uarchs; + extern CPUINFO_INTERNAL uint32_t cpuinfo_uarchs_count; +#else + extern CPUINFO_INTERNAL struct cpuinfo_uarch_info cpuinfo_global_uarch; +#endif + +#ifdef __linux__ + extern CPUINFO_INTERNAL uint32_t cpuinfo_linux_cpu_max; + extern CPUINFO_INTERNAL const struct cpuinfo_processor** cpuinfo_linux_cpu_to_processor_map; + extern CPUINFO_INTERNAL const struct cpuinfo_core** cpuinfo_linux_cpu_to_core_map; +#endif + +CPUINFO_PRIVATE void cpuinfo_x86_mach_init(void); +CPUINFO_PRIVATE void cpuinfo_x86_linux_init(void); +#if defined(_WIN32) || defined(__CYGWIN__) + #ifdef _M_ARM64 + CPUINFO_PRIVATE BOOL CALLBACK cpuinfo_arm_windows_init(PINIT_ONCE init_once, PVOID parameter, PVOID* context); + #else + CPUINFO_PRIVATE BOOL CALLBACK cpuinfo_x86_windows_init(PINIT_ONCE init_once, PVOID parameter, PVOID* context); + #endif +#endif +CPUINFO_PRIVATE void cpuinfo_arm_mach_init(void); +CPUINFO_PRIVATE void cpuinfo_arm_linux_init(void); +CPUINFO_PRIVATE void cpuinfo_emscripten_init(void); + +CPUINFO_PRIVATE uint32_t cpuinfo_compute_max_cache_size(const struct cpuinfo_processor* processor); + +typedef void (*cpuinfo_processor_callback)(uint32_t); diff --git a/dep/cpuinfo/src/cpuinfo/log.h b/dep/cpuinfo/src/cpuinfo/log.h new file mode 100644 index 000000000..dac8cdb66 --- /dev/null +++ b/dep/cpuinfo/src/cpuinfo/log.h @@ -0,0 +1,17 @@ +#pragma once + +#include + +#include + +#define CPUINFO_LOG_DEBUG_PARSERS 0 + +#ifndef CPUINFO_LOG_LEVEL + #define CPUINFO_LOG_LEVEL CLOG_ERROR +#endif + +CLOG_DEFINE_LOG_DEBUG(cpuinfo_log_debug, "cpuinfo", CPUINFO_LOG_LEVEL); +CLOG_DEFINE_LOG_INFO(cpuinfo_log_info, "cpuinfo", CPUINFO_LOG_LEVEL); +CLOG_DEFINE_LOG_WARNING(cpuinfo_log_warning, "cpuinfo", CPUINFO_LOG_LEVEL); +CLOG_DEFINE_LOG_ERROR(cpuinfo_log_error, "cpuinfo", CPUINFO_LOG_LEVEL); +CLOG_DEFINE_LOG_FATAL(cpuinfo_log_fatal, "cpuinfo", CPUINFO_LOG_LEVEL); diff --git a/dep/cpuinfo/src/cpuinfo/utils.h b/dep/cpuinfo/src/cpuinfo/utils.h new file mode 100644 index 000000000..157baad9a --- /dev/null +++ b/dep/cpuinfo/src/cpuinfo/utils.h @@ -0,0 +1,19 @@ +#pragma once + +#include + + +inline static uint32_t bit_length(uint32_t n) { + const uint32_t n_minus_1 = n - 1; + if (n_minus_1 == 0) { + return 0; + } else { + #ifdef _MSC_VER + unsigned long bsr; + _BitScanReverse(&bsr, n_minus_1); + return bsr + 1; + #else + return 32 - __builtin_clz(n_minus_1); + #endif + } +} diff --git a/dep/cpuinfo/src/emscripten/init.c b/dep/cpuinfo/src/emscripten/init.c new file mode 100644 index 000000000..ce4bdea2e --- /dev/null +++ b/dep/cpuinfo/src/emscripten/init.c @@ -0,0 +1,277 @@ +#include +#include +#include +#include +#include + +#include + +#include +#include +#include + + +static const volatile float infinity = INFINITY; + +static struct cpuinfo_package static_package = { }; + +static struct cpuinfo_cache static_x86_l3 = { + .size = 2 * 1024 * 1024, + .associativity = 16, + .sets = 2048, + .partitions = 1, + .line_size = 64, +}; + +void cpuinfo_emscripten_init(void) { + struct cpuinfo_processor* processors = NULL; + struct cpuinfo_core* cores = NULL; + struct cpuinfo_cluster* clusters = NULL; + struct cpuinfo_cache* l1i = NULL; + struct cpuinfo_cache* l1d = NULL; + struct cpuinfo_cache* l2 = NULL; + + const bool is_x86 = signbit(infinity - infinity); + + int logical_cores_count = emscripten_num_logical_cores(); + if (logical_cores_count <= 0) { + logical_cores_count = 1; + } + uint32_t processor_count = (uint32_t) logical_cores_count; + uint32_t core_count = processor_count; + uint32_t cluster_count = 1; + uint32_t big_cluster_core_count = core_count; + uint32_t processors_per_core = 1; + if (is_x86) { + if (processor_count % 2 == 0) { + processors_per_core = 2; + core_count = processor_count / 2; + big_cluster_core_count = core_count; + } + } else { + /* Assume ARM/ARM64 */ + if (processor_count > 4) { + /* Assume big.LITTLE architecture */ + cluster_count = 2; + big_cluster_core_count = processor_count >= 8 ? 4 : 2; + } + } + uint32_t l2_count = is_x86 ? core_count : cluster_count; + + processors = calloc(processor_count, sizeof(struct cpuinfo_processor)); + if (processors == NULL) { + cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" logical processors", + processor_count * sizeof(struct cpuinfo_processor), processor_count); + goto cleanup; + } + cores = calloc(processor_count, sizeof(struct cpuinfo_core)); + if (cores == NULL) { + cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" cores", + processor_count * sizeof(struct cpuinfo_core), processor_count); + goto cleanup; + } + clusters = calloc(cluster_count, sizeof(struct cpuinfo_cluster)); + if (clusters == NULL) { + cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" clusters", + cluster_count * sizeof(struct cpuinfo_cluster), cluster_count); + goto cleanup; + } + + l1i = calloc(core_count, sizeof(struct cpuinfo_cache)); + if (l1i == NULL) { + cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" L1I caches", + core_count * sizeof(struct cpuinfo_cache), core_count); + goto cleanup; + } + + l1d = calloc(core_count, sizeof(struct cpuinfo_cache)); + if (l1d == NULL) { + cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" L1D caches", + core_count * sizeof(struct cpuinfo_cache), core_count); + goto cleanup; + } + + l2 = calloc(l2_count, sizeof(struct cpuinfo_cache)); + if (l2 == NULL) { + cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" L2 caches", + l2_count * sizeof(struct cpuinfo_cache), l2_count); + goto cleanup; + } + + static_package.processor_count = processor_count; + static_package.core_count = core_count; + static_package.cluster_count = cluster_count; + if (is_x86) { + strncpy(static_package.name, "x86 vCPU", CPUINFO_PACKAGE_NAME_MAX); + } else { + strncpy(static_package.name, "ARM vCPU", CPUINFO_PACKAGE_NAME_MAX); + } + + for (uint32_t i = 0; i < core_count; i++) { + for (uint32_t j = 0; j < processors_per_core; j++) { + processors[i * processors_per_core + j] = (struct cpuinfo_processor) { + .smt_id = j, + .core = cores + i, + .cluster = clusters + (uint32_t) (i >= big_cluster_core_count), + .package = &static_package, + .cache.l1i = l1i + i, + .cache.l1d = l1d + i, + .cache.l2 = is_x86 ? l2 + i : l2 + (uint32_t) (i >= big_cluster_core_count), + .cache.l3 = is_x86 ? &static_x86_l3 : NULL, + }; + } + + cores[i] = (struct cpuinfo_core) { + .processor_start = i * processors_per_core, + .processor_count = processors_per_core, + .core_id = i, + .cluster = clusters + (uint32_t) (i >= big_cluster_core_count), + .package = &static_package, + .vendor = cpuinfo_vendor_unknown, + .uarch = cpuinfo_uarch_unknown, + .frequency = 0, + }; + + l1i[i] = (struct cpuinfo_cache) { + .size = 32 * 1024, + .associativity = 4, + .sets = 128, + .partitions = 1, + .line_size = 64, + .processor_start = i * processors_per_core, + .processor_count = processors_per_core, + }; + + l1d[i] = (struct cpuinfo_cache) { + .size = 32 * 1024, + .associativity = 4, + .sets = 128, + .partitions = 1, + .line_size = 64, + .processor_start = i * processors_per_core, + .processor_count = processors_per_core, + }; + + if (is_x86) { + l2[i] = (struct cpuinfo_cache) { + .size = 256 * 1024, + .associativity = 8, + .sets = 512, + .partitions = 1, + .line_size = 64, + .processor_start = i * processors_per_core, + .processor_count = processors_per_core, + }; + } + } + + if (is_x86) { + clusters[0] = (struct cpuinfo_cluster) { + .processor_start = 0, + .processor_count = processor_count, + .core_start = 0, + .core_count = core_count, + .cluster_id = 0, + .package = &static_package, + .vendor = cpuinfo_vendor_unknown, + .uarch = cpuinfo_uarch_unknown, + .frequency = 0, + }; + + static_x86_l3.processor_count = processor_count; + } else { + clusters[0] = (struct cpuinfo_cluster) { + .processor_start = 0, + .processor_count = big_cluster_core_count, + .core_start = 0, + .core_count = big_cluster_core_count, + .cluster_id = 0, + .package = &static_package, + .vendor = cpuinfo_vendor_unknown, + .uarch = cpuinfo_uarch_unknown, + .frequency = 0, + }; + + l2[0] = (struct cpuinfo_cache) { + .size = 1024 * 1024, + .associativity = 8, + .sets = 2048, + .partitions = 1, + .line_size = 64, + .processor_start = 0, + .processor_count = big_cluster_core_count, + }; + + if (cluster_count > 1) { + l2[1] = (struct cpuinfo_cache) { + .size = 256 * 1024, + .associativity = 8, + .sets = 512, + .partitions = 1, + .line_size = 64, + .processor_start = big_cluster_core_count, + .processor_count = processor_count - big_cluster_core_count, + }; + + clusters[1] = (struct cpuinfo_cluster) { + .processor_start = big_cluster_core_count, + .processor_count = processor_count - big_cluster_core_count, + .core_start = big_cluster_core_count, + .core_count = processor_count - big_cluster_core_count, + .cluster_id = 1, + .package = &static_package, + .vendor = cpuinfo_vendor_unknown, + .uarch = cpuinfo_uarch_unknown, + .frequency = 0, + }; + } + } + + /* Commit changes */ + cpuinfo_cache[cpuinfo_cache_level_1i] = l1i; + cpuinfo_cache[cpuinfo_cache_level_1d] = l1d; + cpuinfo_cache[cpuinfo_cache_level_2] = l2; + if (is_x86) { + cpuinfo_cache[cpuinfo_cache_level_3] = &static_x86_l3; + } + + cpuinfo_processors = processors; + cpuinfo_cores = cores; + cpuinfo_clusters = clusters; + cpuinfo_packages = &static_package; + + cpuinfo_cache_count[cpuinfo_cache_level_1i] = processor_count; + cpuinfo_cache_count[cpuinfo_cache_level_1d] = processor_count; + cpuinfo_cache_count[cpuinfo_cache_level_2] = l2_count; + if (is_x86) { + cpuinfo_cache_count[cpuinfo_cache_level_3] = 1; + } + + cpuinfo_global_uarch = (struct cpuinfo_uarch_info) { + .uarch = cpuinfo_uarch_unknown, + .processor_count = processor_count, + .core_count = core_count, + }; + + cpuinfo_processors_count = processor_count; + cpuinfo_cores_count = processor_count; + cpuinfo_clusters_count = cluster_count; + cpuinfo_packages_count = 1; + + cpuinfo_max_cache_size = is_x86 ? 128 * 1024 * 1024 : 8 * 1024 * 1024; + + cpuinfo_is_initialized = true; + + processors = NULL; + cores = NULL; + clusters = NULL; + l1i = l1d = l2 = NULL; + +cleanup: + free(processors); + free(cores); + free(clusters); + free(l1i); + free(l1d); + free(l2); +} diff --git a/dep/cpuinfo/src/init.c b/dep/cpuinfo/src/init.c new file mode 100644 index 000000000..ed37c0794 --- /dev/null +++ b/dep/cpuinfo/src/init.c @@ -0,0 +1,61 @@ +#if defined(_WIN32) || defined(__CYGWIN__) + #include +#elif !defined(__EMSCRIPTEN__) || defined(__EMSCRIPTEN_PTHREADS__) + #include +#endif + +#include +#include +#include + +#ifdef __APPLE__ + #include "TargetConditionals.h" +#endif + + +#if defined(_WIN32) || defined(__CYGWIN__) + static INIT_ONCE init_guard = INIT_ONCE_STATIC_INIT; +#elif !defined(__EMSCRIPTEN__) || defined(__EMSCRIPTEN_PTHREADS__) + static pthread_once_t init_guard = PTHREAD_ONCE_INIT; +#else + static bool init_guard = false; +#endif + +bool CPUINFO_ABI cpuinfo_initialize(void) { +#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + #if defined(__MACH__) && defined(__APPLE__) + pthread_once(&init_guard, &cpuinfo_x86_mach_init); + #elif defined(__linux__) + pthread_once(&init_guard, &cpuinfo_x86_linux_init); + #elif defined(_WIN32) || defined(__CYGWIN__) + InitOnceExecuteOnce(&init_guard, &cpuinfo_x86_windows_init, NULL, NULL); + #else + cpuinfo_log_error("operating system is not supported in cpuinfo"); + #endif +#elif CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64 + #if defined(__linux__) + pthread_once(&init_guard, &cpuinfo_arm_linux_init); + #elif defined(__MACH__) && defined(__APPLE__) + pthread_once(&init_guard, &cpuinfo_arm_mach_init); + #elif defined(_WIN32) + InitOnceExecuteOnce(&init_guard, &cpuinfo_arm_windows_init, NULL, NULL); + #else + cpuinfo_log_error("operating system is not supported in cpuinfo"); + #endif +#elif CPUINFO_ARCH_ASMJS || CPUINFO_ARCH_WASM || CPUINFO_ARCH_WASMSIMD + #if defined(__EMSCRIPTEN_PTHREADS__) + pthread_once(&init_guard, &cpuinfo_emscripten_init); + #else + if (!init_guard) { + cpuinfo_emscripten_init(); + } + init_guard = true; + #endif +#else + cpuinfo_log_error("processor architecture is not supported in cpuinfo"); +#endif + return cpuinfo_is_initialized; +} + +void CPUINFO_ABI cpuinfo_deinitialize(void) { +} diff --git a/dep/cpuinfo/src/linux/api.h b/dep/cpuinfo/src/linux/api.h new file mode 100644 index 000000000..f55b8ac73 --- /dev/null +++ b/dep/cpuinfo/src/linux/api.h @@ -0,0 +1,59 @@ +#pragma once + +#include +#include +#include + +#include +#include + + +#define CPUINFO_LINUX_FLAG_PRESENT UINT32_C(0x00000001) +#define CPUINFO_LINUX_FLAG_POSSIBLE UINT32_C(0x00000002) +#define CPUINFO_LINUX_FLAG_MAX_FREQUENCY UINT32_C(0x00000004) +#define CPUINFO_LINUX_FLAG_MIN_FREQUENCY UINT32_C(0x00000008) +#define CPUINFO_LINUX_FLAG_SMT_ID UINT32_C(0x00000010) +#define CPUINFO_LINUX_FLAG_CORE_ID UINT32_C(0x00000020) +#define CPUINFO_LINUX_FLAG_PACKAGE_ID UINT32_C(0x00000040) +#define CPUINFO_LINUX_FLAG_APIC_ID UINT32_C(0x00000080) +#define CPUINFO_LINUX_FLAG_SMT_CLUSTER UINT32_C(0x00000100) +#define CPUINFO_LINUX_FLAG_CORE_CLUSTER UINT32_C(0x00000200) +#define CPUINFO_LINUX_FLAG_PACKAGE_CLUSTER UINT32_C(0x00000400) +#define CPUINFO_LINUX_FLAG_PROC_CPUINFO UINT32_C(0x00000800) +#define CPUINFO_LINUX_FLAG_VALID UINT32_C(0x00001000) + + +typedef bool (*cpuinfo_cpulist_callback)(uint32_t, uint32_t, void*); +CPUINFO_INTERNAL bool cpuinfo_linux_parse_cpulist(const char* filename, cpuinfo_cpulist_callback callback, void* context); +typedef bool (*cpuinfo_smallfile_callback)(const char*, const char*, void*); +CPUINFO_INTERNAL bool cpuinfo_linux_parse_small_file(const char* filename, size_t buffer_size, cpuinfo_smallfile_callback, void* context); +typedef bool (*cpuinfo_line_callback)(const char*, const char*, void*, uint64_t); +CPUINFO_INTERNAL bool cpuinfo_linux_parse_multiline_file(const char* filename, size_t buffer_size, cpuinfo_line_callback, void* context); + +CPUINFO_INTERNAL uint32_t cpuinfo_linux_get_max_processors_count(void); +CPUINFO_INTERNAL uint32_t cpuinfo_linux_get_max_possible_processor(uint32_t max_processors_count); +CPUINFO_INTERNAL uint32_t cpuinfo_linux_get_max_present_processor(uint32_t max_processors_count); +CPUINFO_INTERNAL uint32_t cpuinfo_linux_get_processor_min_frequency(uint32_t processor); +CPUINFO_INTERNAL uint32_t cpuinfo_linux_get_processor_max_frequency(uint32_t processor); +CPUINFO_INTERNAL bool cpuinfo_linux_get_processor_package_id(uint32_t processor, uint32_t package_id[restrict static 1]); +CPUINFO_INTERNAL bool cpuinfo_linux_get_processor_core_id(uint32_t processor, uint32_t core_id[restrict static 1]); + +CPUINFO_INTERNAL bool cpuinfo_linux_detect_possible_processors(uint32_t max_processors_count, + uint32_t* processor0_flags, uint32_t processor_struct_size, uint32_t possible_flag); +CPUINFO_INTERNAL bool cpuinfo_linux_detect_present_processors(uint32_t max_processors_count, + uint32_t* processor0_flags, uint32_t processor_struct_size, uint32_t present_flag); + +typedef bool (*cpuinfo_siblings_callback)(uint32_t, uint32_t, uint32_t, void*); +CPUINFO_INTERNAL bool cpuinfo_linux_detect_core_siblings( + uint32_t max_processors_count, + uint32_t processor, + cpuinfo_siblings_callback callback, + void* context); +CPUINFO_INTERNAL bool cpuinfo_linux_detect_thread_siblings( + uint32_t max_processors_count, + uint32_t processor, + cpuinfo_siblings_callback callback, + void* context); + +extern CPUINFO_INTERNAL const struct cpuinfo_processor** cpuinfo_linux_cpu_to_processor_map; +extern CPUINFO_INTERNAL const struct cpuinfo_core** cpuinfo_linux_cpu_to_core_map; diff --git a/dep/cpuinfo/src/linux/cpulist.c b/dep/cpuinfo/src/linux/cpulist.c new file mode 100644 index 000000000..287198638 --- /dev/null +++ b/dep/cpuinfo/src/linux/cpulist.c @@ -0,0 +1,214 @@ +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#if CPUINFO_MOCK + #include +#endif +#include +#include + + +/* + * Size, in chars, of the on-stack buffer used for parsing cpu lists. + * This is also the limit on the length of a single entry + * ( or -) + * in the cpu list. + */ +#define BUFFER_SIZE 256 + + +/* Locale-independent */ +inline static bool is_whitespace(char c) { + switch (c) { + case ' ': + case '\t': + case '\n': + case '\r': + return true; + default: + return false; + } +} + +inline static const char* parse_number(const char* string, const char* end, uint32_t number_ptr[restrict static 1]) { + uint32_t number = 0; + while (string != end) { + const uint32_t digit = (uint32_t) (*string) - (uint32_t) '0'; + if (digit >= 10) { + break; + } + number = number * UINT32_C(10) + digit; + string += 1; + } + *number_ptr = number; + return string; +} + +inline static bool parse_entry(const char* entry_start, const char* entry_end, cpuinfo_cpulist_callback callback, void* context) { + /* Skip whitespace at the beginning of an entry */ + for (; entry_start != entry_end; entry_start++) { + if (!is_whitespace(*entry_start)) { + break; + } + } + /* Skip whitespace at the end of an entry */ + for (; entry_end != entry_start; entry_end--) { + if (!is_whitespace(entry_end[-1])) { + break; + } + } + + const size_t entry_length = (size_t) (entry_end - entry_start); + if (entry_length == 0) { + cpuinfo_log_warning("unexpected zero-length cpu list entry ignored"); + return false; + } + + #if CPUINFO_LOG_DEBUG_PARSERS + cpuinfo_log_debug("parse cpu list entry \"%.*s\" (%zu chars)", (int) entry_length, entry_start, entry_length); + #endif + uint32_t first_cpu, last_cpu; + + const char* number_end = parse_number(entry_start, entry_end, &first_cpu); + if (number_end == entry_start) { + /* Failed to parse the number; ignore the entry */ + cpuinfo_log_warning("invalid character '%c' in the cpu list entry \"%.*s\": entry is ignored", + entry_start[0], (int) entry_length, entry_start); + return false; + } else if (number_end == entry_end) { + /* Completely parsed the entry */ + #if CPUINFO_LOG_DEBUG_PARSERS + cpuinfo_log_debug("cpulist: call callback with list_start = %"PRIu32", list_end = %"PRIu32, + first_cpu, first_cpu + 1); + #endif + return callback(first_cpu, first_cpu + 1, context); + } + + /* Parse the second part of the entry */ + if (*number_end != '-') { + cpuinfo_log_warning("invalid character '%c' in the cpu list entry \"%.*s\": entry is ignored", + *number_end, (int) entry_length, entry_start); + return false; + } + + const char* number_start = number_end + 1; + number_end = parse_number(number_start, entry_end, &last_cpu); + if (number_end == number_start) { + /* Failed to parse the second number; ignore the entry */ + cpuinfo_log_warning("invalid character '%c' in the cpu list entry \"%.*s\": entry is ignored", + *number_start, (int) entry_length, entry_start); + return false; + } + + if (number_end != entry_end) { + /* Partially parsed the entry; ignore unparsed characters and continue with the parsed part */ + cpuinfo_log_warning("ignored invalid characters \"%.*s\" at the end of cpu list entry \"%.*s\"", + (int) (entry_end - number_end), number_start, (int) entry_length, entry_start); + } + + if (last_cpu < first_cpu) { + cpuinfo_log_warning("ignored cpu list entry \"%.*s\": invalid range %"PRIu32"-%"PRIu32, + (int) entry_length, entry_start, first_cpu, last_cpu); + return false; + } + + /* Parsed both parts of the entry; update CPU set */ + #if CPUINFO_LOG_DEBUG_PARSERS + cpuinfo_log_debug("cpulist: call callback with list_start = %"PRIu32", list_end = %"PRIu32, + first_cpu, last_cpu + 1); + #endif + return callback(first_cpu, last_cpu + 1, context); +} + +bool cpuinfo_linux_parse_cpulist(const char* filename, cpuinfo_cpulist_callback callback, void* context) { + bool status = true; + int file = -1; + char buffer[BUFFER_SIZE]; + #if CPUINFO_LOG_DEBUG_PARSERS + cpuinfo_log_debug("parsing cpu list from file %s", filename); + #endif + +#if CPUINFO_MOCK + file = cpuinfo_mock_open(filename, O_RDONLY); +#else + file = open(filename, O_RDONLY); +#endif + if (file == -1) { + cpuinfo_log_info("failed to open %s: %s", filename, strerror(errno)); + status = false; + goto cleanup; + } + + size_t position = 0; + const char* buffer_end = &buffer[BUFFER_SIZE]; + char* data_start = buffer; + ssize_t bytes_read; + do { +#if CPUINFO_MOCK + bytes_read = cpuinfo_mock_read(file, data_start, (size_t) (buffer_end - data_start)); +#else + bytes_read = read(file, data_start, (size_t) (buffer_end - data_start)); +#endif + if (bytes_read < 0) { + cpuinfo_log_info("failed to read file %s at position %zu: %s", filename, position, strerror(errno)); + status = false; + goto cleanup; + } + + position += (size_t) bytes_read; + const char* data_end = data_start + (size_t) bytes_read; + const char* entry_start = buffer; + + if (bytes_read == 0) { + /* No more data in the file: process the remaining text in the buffer as a single entry */ + const char* entry_end = data_end; + const bool entry_status = parse_entry(entry_start, entry_end, callback, context); + status &= entry_status; + } else { + const char* entry_end; + do { + /* Find the end of the entry, as indicated by a comma (',') */ + for (entry_end = entry_start; entry_end != data_end; entry_end++) { + if (*entry_end == ',') { + break; + } + } + + /* + * If we located separator at the end of the entry, parse it. + * Otherwise, there may be more data at the end; read the file once again. + */ + if (entry_end != data_end) { + const bool entry_status = parse_entry(entry_start, entry_end, callback, context); + status &= entry_status; + entry_start = entry_end + 1; + } + } while (entry_end != data_end); + + /* Move remaining partial entry data at the end to the beginning of the buffer */ + const size_t entry_length = (size_t) (entry_end - entry_start); + memmove(buffer, entry_start, entry_length); + data_start = &buffer[entry_length]; + } + } while (bytes_read != 0); + +cleanup: + if (file != -1) { +#if CPUINFO_MOCK + cpuinfo_mock_close(file); +#else + close(file); +#endif + file = -1; + } + return status; +} diff --git a/dep/cpuinfo/src/linux/mockfile.c b/dep/cpuinfo/src/linux/mockfile.c new file mode 100644 index 000000000..138acfeb9 --- /dev/null +++ b/dep/cpuinfo/src/linux/mockfile.c @@ -0,0 +1,105 @@ +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#if !CPUINFO_MOCK + #error This file should be built only in mock mode +#endif + +#include +#include +#include +#include + + +static struct cpuinfo_mock_file* cpuinfo_mock_files = NULL; +static uint32_t cpuinfo_mock_file_count = 0; + + +void CPUINFO_ABI cpuinfo_mock_filesystem(struct cpuinfo_mock_file* files) { + cpuinfo_log_info("filesystem mocking enabled"); + uint32_t file_count = 0; + while (files[file_count].path != NULL) { + /* Indicate that file is not opened */ + files[file_count].offset = SIZE_MAX; + file_count += 1; + } + cpuinfo_mock_files = files; + cpuinfo_mock_file_count = file_count; +} + +int CPUINFO_ABI cpuinfo_mock_open(const char* path, int oflag) { + if (cpuinfo_mock_files == NULL) { + cpuinfo_log_warning("cpuinfo_mock_open called without mock filesystem; redictering to open"); + return open(path, oflag); + } + + for (uint32_t i = 0; i < cpuinfo_mock_file_count; i++) { + if (strcmp(cpuinfo_mock_files[i].path, path) == 0) { + if (oflag != O_RDONLY) { + errno = EACCES; + return -1; + } + if (cpuinfo_mock_files[i].offset != SIZE_MAX) { + errno = ENFILE; + return -1; + } + cpuinfo_mock_files[i].offset = 0; + return (int) i; + } + } + errno = ENOENT; + return -1; +} + +int CPUINFO_ABI cpuinfo_mock_close(int fd) { + if (cpuinfo_mock_files == NULL) { + cpuinfo_log_warning("cpuinfo_mock_close called without mock filesystem; redictering to close"); + return close(fd); + } + + if ((unsigned int) fd >= cpuinfo_mock_file_count) { + errno = EBADF; + return -1; + } + if (cpuinfo_mock_files[fd].offset == SIZE_MAX) { + errno = EBADF; + return -1; + } + cpuinfo_mock_files[fd].offset = SIZE_MAX; + return 0; +} + +ssize_t CPUINFO_ABI cpuinfo_mock_read(int fd, void* buffer, size_t capacity) { + if (cpuinfo_mock_files == NULL) { + cpuinfo_log_warning("cpuinfo_mock_read called without mock filesystem; redictering to read"); + return read(fd, buffer, capacity); + } + + if ((unsigned int) fd >= cpuinfo_mock_file_count) { + errno = EBADF; + return -1; + } + if (cpuinfo_mock_files[fd].offset == SIZE_MAX) { + errno = EBADF; + return -1; + } + + const size_t offset = cpuinfo_mock_files[fd].offset; + size_t count = cpuinfo_mock_files[fd].size - offset; + if (count > capacity) { + count = capacity; + } + memcpy(buffer, (void*) cpuinfo_mock_files[fd].content + offset, count); + cpuinfo_mock_files[fd].offset += count; + return (ssize_t) count; +} diff --git a/dep/cpuinfo/src/linux/multiline.c b/dep/cpuinfo/src/linux/multiline.c new file mode 100644 index 000000000..1feeb9b1c --- /dev/null +++ b/dep/cpuinfo/src/linux/multiline.c @@ -0,0 +1,106 @@ +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#if CPUINFO_MOCK + #include +#endif +#include +#include + + +bool cpuinfo_linux_parse_multiline_file(const char* filename, size_t buffer_size, cpuinfo_line_callback callback, void* context) +{ + int file = -1; + bool status = false; + char* buffer = (char*) alloca(buffer_size); + +#if CPUINFO_MOCK + file = cpuinfo_mock_open(filename, O_RDONLY); +#else + file = open(filename, O_RDONLY); +#endif + if (file == -1) { + cpuinfo_log_info("failed to open %s: %s", filename, strerror(errno)); + goto cleanup; + } + + /* Only used for error reporting */ + size_t position = 0; + uint64_t line_number = 1; + const char* buffer_end = &buffer[buffer_size]; + char* data_start = buffer; + ssize_t bytes_read; + do { +#if CPUINFO_MOCK + bytes_read = cpuinfo_mock_read(file, data_start, (size_t) (buffer_end - data_start)); +#else + bytes_read = read(file, data_start, (size_t) (buffer_end - data_start)); +#endif + if (bytes_read < 0) { + cpuinfo_log_info("failed to read file %s at position %zu: %s", + filename, position, strerror(errno)); + goto cleanup; + } + + position += (size_t) bytes_read; + const char* data_end = data_start + (size_t) bytes_read; + const char* line_start = buffer; + + if (bytes_read == 0) { + /* No more data in the file: process the remaining text in the buffer as a single entry */ + const char* line_end = data_end; + if (!callback(line_start, line_end, context, line_number)) { + goto cleanup; + } + } else { + const char* line_end; + do { + /* Find the end of the entry, as indicated by newline character ('\n') */ + for (line_end = line_start; line_end != data_end; line_end++) { + if (*line_end == '\n') { + break; + } + } + + /* + * If we located separator at the end of the entry, parse it. + * Otherwise, there may be more data at the end; read the file once again. + */ + if (line_end != data_end) { + if (!callback(line_start, line_end, context, line_number++)) { + goto cleanup; + } + line_start = line_end + 1; + } + } while (line_end != data_end); + + /* Move remaining partial line data at the end to the beginning of the buffer */ + const size_t line_length = (size_t) (line_end - line_start); + memmove(buffer, line_start, line_length); + data_start = &buffer[line_length]; + } + } while (bytes_read != 0); + + /* Commit */ + status = true; + +cleanup: + if (file != -1) { +#if CPUINFO_MOCK + cpuinfo_mock_close(file); +#else + close(file); +#endif + file = -1; + } + return status; +} diff --git a/dep/cpuinfo/src/linux/processors.c b/dep/cpuinfo/src/linux/processors.c new file mode 100644 index 000000000..aedba7438 --- /dev/null +++ b/dep/cpuinfo/src/linux/processors.c @@ -0,0 +1,406 @@ +#include +#include +#include +#include +#include + +#if !defined(__ANDROID__) + /* + * sched.h is only used for CPU_SETSIZE constant. + * Android NDK headers before platform 21 do have this constant in sched.h + */ + #include +#endif + +#include +#include + + +#define STRINGIFY(token) #token + +#define KERNEL_MAX_FILENAME "/sys/devices/system/cpu/kernel_max" +#define KERNEL_MAX_FILESIZE 32 +#define FREQUENCY_FILENAME_SIZE (sizeof("/sys/devices/system/cpu/cpu" STRINGIFY(UINT32_MAX) "/cpufreq/cpuinfo_max_freq")) +#define MAX_FREQUENCY_FILENAME_FORMAT "/sys/devices/system/cpu/cpu%" PRIu32 "/cpufreq/cpuinfo_max_freq" +#define MIN_FREQUENCY_FILENAME_FORMAT "/sys/devices/system/cpu/cpu%" PRIu32 "/cpufreq/cpuinfo_min_freq" +#define FREQUENCY_FILESIZE 32 +#define PACKAGE_ID_FILENAME_SIZE (sizeof("/sys/devices/system/cpu/cpu" STRINGIFY(UINT32_MAX) "/topology/physical_package_id")) +#define PACKAGE_ID_FILENAME_FORMAT "/sys/devices/system/cpu/cpu%" PRIu32 "/topology/physical_package_id" +#define PACKAGE_ID_FILESIZE 32 +#define CORE_ID_FILENAME_SIZE (sizeof("/sys/devices/system/cpu/cpu" STRINGIFY(UINT32_MAX) "/topology/core_id")) +#define CORE_ID_FILENAME_FORMAT "/sys/devices/system/cpu/cpu%" PRIu32 "/topology/core_id" +#define CORE_ID_FILESIZE 32 + +#define CORE_SIBLINGS_FILENAME_SIZE (sizeof("/sys/devices/system/cpu/cpu" STRINGIFY(UINT32_MAX) "/topology/core_siblings_list")) +#define CORE_SIBLINGS_FILENAME_FORMAT "/sys/devices/system/cpu/cpu%" PRIu32 "/topology/core_siblings_list" +#define THREAD_SIBLINGS_FILENAME_SIZE (sizeof("/sys/devices/system/cpu/cpu" STRINGIFY(UINT32_MAX) "/topology/thread_siblings_list")) +#define THREAD_SIBLINGS_FILENAME_FORMAT "/sys/devices/system/cpu/cpu%" PRIu32 "/topology/thread_siblings_list" + +#define POSSIBLE_CPULIST_FILENAME "/sys/devices/system/cpu/possible" +#define PRESENT_CPULIST_FILENAME "/sys/devices/system/cpu/present" + + +inline static const char* parse_number(const char* start, const char* end, uint32_t number_ptr[restrict static 1]) { + uint32_t number = 0; + const char* parsed = start; + for (; parsed != end; parsed++) { + const uint32_t digit = (uint32_t) (uint8_t) (*parsed) - (uint32_t) '0'; + if (digit >= 10) { + break; + } + number = number * UINT32_C(10) + digit; + } + *number_ptr = number; + return parsed; +} + +/* Locale-independent */ +inline static bool is_whitespace(char c) { + switch (c) { + case ' ': + case '\t': + case '\n': + case '\r': + return true; + default: + return false; + } +} + +#if defined(__ANDROID__) && !defined(CPU_SETSIZE) + /* + * Android NDK headers before platform 21 do not define CPU_SETSIZE, + * so we hard-code its value, as defined in platform 21 headers + */ + #if defined(__LP64__) + static const uint32_t default_max_processors_count = 1024; + #else + static const uint32_t default_max_processors_count = 32; + #endif +#else + static const uint32_t default_max_processors_count = CPU_SETSIZE; +#endif + +static bool uint32_parser(const char* text_start, const char* text_end, void* context) { + if (text_start == text_end) { + cpuinfo_log_error("failed to parse file %s: file is empty", KERNEL_MAX_FILENAME); + return false; + } + + uint32_t kernel_max = 0; + const char* parsed_end = parse_number(text_start, text_end, &kernel_max); + if (parsed_end == text_start) { + cpuinfo_log_error("failed to parse file %s: \"%.*s\" is not an unsigned number", + KERNEL_MAX_FILENAME, (int) (text_end - text_start), text_start); + return false; + } else { + for (const char* char_ptr = parsed_end; char_ptr != text_end; char_ptr++) { + if (!is_whitespace(*char_ptr)) { + cpuinfo_log_warning("non-whitespace characters \"%.*s\" following number in file %s are ignored", + (int) (text_end - char_ptr), char_ptr, KERNEL_MAX_FILENAME); + break; + } + } + } + + uint32_t* kernel_max_ptr = (uint32_t*) context; + *kernel_max_ptr = kernel_max; + return true; +} + +uint32_t cpuinfo_linux_get_max_processors_count(void) { + uint32_t kernel_max; + if (cpuinfo_linux_parse_small_file(KERNEL_MAX_FILENAME, KERNEL_MAX_FILESIZE, uint32_parser, &kernel_max)) { + cpuinfo_log_debug("parsed kernel_max value of %"PRIu32" from %s", kernel_max, KERNEL_MAX_FILENAME); + + if (kernel_max >= default_max_processors_count) { + cpuinfo_log_warning("kernel_max value of %"PRIu32" parsed from %s exceeds platform-default limit %"PRIu32, + kernel_max, KERNEL_MAX_FILENAME, default_max_processors_count - 1); + } + + return kernel_max + 1; + } else { + cpuinfo_log_warning("using platform-default max processors count = %"PRIu32, default_max_processors_count); + return default_max_processors_count; + } +} + +uint32_t cpuinfo_linux_get_processor_max_frequency(uint32_t processor) { + char max_frequency_filename[FREQUENCY_FILENAME_SIZE]; + const int chars_formatted = snprintf( + max_frequency_filename, FREQUENCY_FILENAME_SIZE, MAX_FREQUENCY_FILENAME_FORMAT, processor); + if ((unsigned int) chars_formatted >= FREQUENCY_FILENAME_SIZE) { + cpuinfo_log_warning("failed to format filename for max frequency of processor %"PRIu32, processor); + return 0; + } + + uint32_t max_frequency; + if (cpuinfo_linux_parse_small_file(max_frequency_filename, FREQUENCY_FILESIZE, uint32_parser, &max_frequency)) { + cpuinfo_log_debug("parsed max frequency value of %"PRIu32" KHz for logical processor %"PRIu32" from %s", + max_frequency, processor, max_frequency_filename); + return max_frequency; + } else { + cpuinfo_log_warning("failed to parse max frequency for processor %"PRIu32" from %s", + processor, max_frequency_filename); + return 0; + } +} + +uint32_t cpuinfo_linux_get_processor_min_frequency(uint32_t processor) { + char min_frequency_filename[FREQUENCY_FILENAME_SIZE]; + const int chars_formatted = snprintf( + min_frequency_filename, FREQUENCY_FILENAME_SIZE, MIN_FREQUENCY_FILENAME_FORMAT, processor); + if ((unsigned int) chars_formatted >= FREQUENCY_FILENAME_SIZE) { + cpuinfo_log_warning("failed to format filename for min frequency of processor %"PRIu32, processor); + return 0; + } + + uint32_t min_frequency; + if (cpuinfo_linux_parse_small_file(min_frequency_filename, FREQUENCY_FILESIZE, uint32_parser, &min_frequency)) { + cpuinfo_log_debug("parsed min frequency value of %"PRIu32" KHz for logical processor %"PRIu32" from %s", + min_frequency, processor, min_frequency_filename); + return min_frequency; + } else { + /* + * This error is less severe than parsing max frequency, because min frequency is only useful for clustering, + * while max frequency is also needed for peak FLOPS calculation. + */ + cpuinfo_log_info("failed to parse min frequency for processor %"PRIu32" from %s", + processor, min_frequency_filename); + return 0; + } +} + +bool cpuinfo_linux_get_processor_core_id(uint32_t processor, uint32_t core_id_ptr[restrict static 1]) { + char core_id_filename[PACKAGE_ID_FILENAME_SIZE]; + const int chars_formatted = snprintf( + core_id_filename, CORE_ID_FILENAME_SIZE, CORE_ID_FILENAME_FORMAT, processor); + if ((unsigned int) chars_formatted >= CORE_ID_FILENAME_SIZE) { + cpuinfo_log_warning("failed to format filename for core id of processor %"PRIu32, processor); + return 0; + } + + uint32_t core_id; + if (cpuinfo_linux_parse_small_file(core_id_filename, CORE_ID_FILESIZE, uint32_parser, &core_id)) { + cpuinfo_log_debug("parsed core id value of %"PRIu32" for logical processor %"PRIu32" from %s", + core_id, processor, core_id_filename); + *core_id_ptr = core_id; + return true; + } else { + cpuinfo_log_info("failed to parse core id for processor %"PRIu32" from %s", + processor, core_id_filename); + return false; + } +} + +bool cpuinfo_linux_get_processor_package_id(uint32_t processor, uint32_t package_id_ptr[restrict static 1]) { + char package_id_filename[PACKAGE_ID_FILENAME_SIZE]; + const int chars_formatted = snprintf( + package_id_filename, PACKAGE_ID_FILENAME_SIZE, PACKAGE_ID_FILENAME_FORMAT, processor); + if ((unsigned int) chars_formatted >= PACKAGE_ID_FILENAME_SIZE) { + cpuinfo_log_warning("failed to format filename for package id of processor %"PRIu32, processor); + return 0; + } + + uint32_t package_id; + if (cpuinfo_linux_parse_small_file(package_id_filename, PACKAGE_ID_FILESIZE, uint32_parser, &package_id)) { + cpuinfo_log_debug("parsed package id value of %"PRIu32" for logical processor %"PRIu32" from %s", + package_id, processor, package_id_filename); + *package_id_ptr = package_id; + return true; + } else { + cpuinfo_log_info("failed to parse package id for processor %"PRIu32" from %s", + processor, package_id_filename); + return false; + } +} + +static bool max_processor_number_parser(uint32_t processor_list_start, uint32_t processor_list_end, void* context) { + uint32_t* processor_number_ptr = (uint32_t*) context; + const uint32_t processor_list_last = processor_list_end - 1; + if (*processor_number_ptr < processor_list_last) { + *processor_number_ptr = processor_list_last; + } + return true; +} + +uint32_t cpuinfo_linux_get_max_possible_processor(uint32_t max_processors_count) { + uint32_t max_possible_processor = 0; + if (!cpuinfo_linux_parse_cpulist(POSSIBLE_CPULIST_FILENAME, max_processor_number_parser, &max_possible_processor)) { + #if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64 + cpuinfo_log_error("failed to parse the list of possible processors in %s", POSSIBLE_CPULIST_FILENAME); + #else + cpuinfo_log_warning("failed to parse the list of possible processors in %s", POSSIBLE_CPULIST_FILENAME); + #endif + return UINT32_MAX; + } + if (max_possible_processor >= max_processors_count) { + cpuinfo_log_warning( + "maximum possible processor number %"PRIu32" exceeds system limit %"PRIu32": truncating to the latter", + max_possible_processor, max_processors_count - 1); + max_possible_processor = max_processors_count - 1; + } + return max_possible_processor; +} + +uint32_t cpuinfo_linux_get_max_present_processor(uint32_t max_processors_count) { + uint32_t max_present_processor = 0; + if (!cpuinfo_linux_parse_cpulist(PRESENT_CPULIST_FILENAME, max_processor_number_parser, &max_present_processor)) { + #if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64 + cpuinfo_log_error("failed to parse the list of present processors in %s", PRESENT_CPULIST_FILENAME); + #else + cpuinfo_log_warning("failed to parse the list of present processors in %s", PRESENT_CPULIST_FILENAME); + #endif + return UINT32_MAX; + } + if (max_present_processor >= max_processors_count) { + cpuinfo_log_warning( + "maximum present processor number %"PRIu32" exceeds system limit %"PRIu32": truncating to the latter", + max_present_processor, max_processors_count - 1); + max_present_processor = max_processors_count - 1; + } + return max_present_processor; +} + +struct detect_processors_context { + uint32_t max_processors_count; + uint32_t* processor0_flags; + uint32_t processor_struct_size; + uint32_t detected_flag; +}; + +static bool detect_processor_parser(uint32_t processor_list_start, uint32_t processor_list_end, void* context) { + const uint32_t max_processors_count = ((struct detect_processors_context*) context)->max_processors_count; + const uint32_t* processor0_flags = ((struct detect_processors_context*) context)->processor0_flags; + const uint32_t processor_struct_size = ((struct detect_processors_context*) context)->processor_struct_size; + const uint32_t detected_flag = ((struct detect_processors_context*) context)->detected_flag; + + for (uint32_t processor = processor_list_start; processor < processor_list_end; processor++) { + if (processor >= max_processors_count) { + break; + } + *((uint32_t*) ((uintptr_t) processor0_flags + processor_struct_size * processor)) |= detected_flag; + } + return true; +} + +bool cpuinfo_linux_detect_possible_processors(uint32_t max_processors_count, + uint32_t* processor0_flags, uint32_t processor_struct_size, uint32_t possible_flag) +{ + struct detect_processors_context context = { + .max_processors_count = max_processors_count, + .processor0_flags = processor0_flags, + .processor_struct_size = processor_struct_size, + .detected_flag = possible_flag, + }; + if (cpuinfo_linux_parse_cpulist(POSSIBLE_CPULIST_FILENAME, detect_processor_parser, &context)) { + return true; + } else { + cpuinfo_log_warning("failed to parse the list of possible processors in %s", POSSIBLE_CPULIST_FILENAME); + return false; + } +} + +bool cpuinfo_linux_detect_present_processors(uint32_t max_processors_count, + uint32_t* processor0_flags, uint32_t processor_struct_size, uint32_t present_flag) +{ + struct detect_processors_context context = { + .max_processors_count = max_processors_count, + .processor0_flags = processor0_flags, + .processor_struct_size = processor_struct_size, + .detected_flag = present_flag, + }; + if (cpuinfo_linux_parse_cpulist(PRESENT_CPULIST_FILENAME, detect_processor_parser, &context)) { + return true; + } else { + cpuinfo_log_warning("failed to parse the list of present processors in %s", PRESENT_CPULIST_FILENAME); + return false; + } +} + +struct siblings_context { + const char* group_name; + uint32_t max_processors_count; + uint32_t processor; + cpuinfo_siblings_callback callback; + void* callback_context; +}; + +static bool siblings_parser(uint32_t sibling_list_start, uint32_t sibling_list_end, struct siblings_context* context) { + const char* group_name = context->group_name; + const uint32_t max_processors_count = context->max_processors_count; + const uint32_t processor = context->processor; + + if (sibling_list_end > max_processors_count) { + cpuinfo_log_warning("ignore %s siblings %"PRIu32"-%"PRIu32" of processor %"PRIu32, + group_name, max_processors_count, sibling_list_end - 1, processor); + sibling_list_end = max_processors_count; + } + + return context->callback(processor, sibling_list_start, sibling_list_end, context->callback_context); +} + +bool cpuinfo_linux_detect_core_siblings( + uint32_t max_processors_count, + uint32_t processor, + cpuinfo_siblings_callback callback, + void* context) +{ + char core_siblings_filename[CORE_SIBLINGS_FILENAME_SIZE]; + const int chars_formatted = snprintf( + core_siblings_filename, CORE_SIBLINGS_FILENAME_SIZE, CORE_SIBLINGS_FILENAME_FORMAT, processor); + if ((unsigned int) chars_formatted >= CORE_SIBLINGS_FILENAME_SIZE) { + cpuinfo_log_warning("failed to format filename for core siblings of processor %"PRIu32, processor); + return false; + } + + struct siblings_context siblings_context = { + .group_name = "package", + .max_processors_count = max_processors_count, + .processor = processor, + .callback = callback, + .callback_context = context, + }; + if (cpuinfo_linux_parse_cpulist(core_siblings_filename, + (cpuinfo_cpulist_callback) siblings_parser, &siblings_context)) + { + return true; + } else { + cpuinfo_log_info("failed to parse the list of core siblings for processor %"PRIu32" from %s", + processor, core_siblings_filename); + return false; + } +} + +bool cpuinfo_linux_detect_thread_siblings( + uint32_t max_processors_count, + uint32_t processor, + cpuinfo_siblings_callback callback, + void* context) +{ + char thread_siblings_filename[THREAD_SIBLINGS_FILENAME_SIZE]; + const int chars_formatted = snprintf( + thread_siblings_filename, THREAD_SIBLINGS_FILENAME_SIZE, THREAD_SIBLINGS_FILENAME_FORMAT, processor); + if ((unsigned int) chars_formatted >= THREAD_SIBLINGS_FILENAME_SIZE) { + cpuinfo_log_warning("failed to format filename for thread siblings of processor %"PRIu32, processor); + return false; + } + + struct siblings_context siblings_context = { + .group_name = "core", + .max_processors_count = max_processors_count, + .processor = processor, + .callback = callback, + .callback_context = context, + }; + if (cpuinfo_linux_parse_cpulist(thread_siblings_filename, + (cpuinfo_cpulist_callback) siblings_parser, &siblings_context)) + { + return true; + } else { + cpuinfo_log_info("failed to parse the list of thread siblings for processor %"PRIu32" from %s", + processor, thread_siblings_filename); + return false; + } +} + diff --git a/dep/cpuinfo/src/linux/smallfile.c b/dep/cpuinfo/src/linux/smallfile.c new file mode 100644 index 000000000..98cde00e5 --- /dev/null +++ b/dep/cpuinfo/src/linux/smallfile.c @@ -0,0 +1,70 @@ +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#if CPUINFO_MOCK + #include +#endif +#include +#include + + +bool cpuinfo_linux_parse_small_file(const char* filename, size_t buffer_size, cpuinfo_smallfile_callback callback, void* context) { + int file = -1; + bool status = false; + char* buffer = (char*) alloca(buffer_size); + + #if CPUINFO_LOG_DEBUG_PARSERS + cpuinfo_log_debug("parsing small file %s", filename); + #endif + +#if CPUINFO_MOCK + file = cpuinfo_mock_open(filename, O_RDONLY); +#else + file = open(filename, O_RDONLY); +#endif + if (file == -1) { + cpuinfo_log_info("failed to open %s: %s", filename, strerror(errno)); + goto cleanup; + } + + size_t buffer_position = 0; + ssize_t bytes_read; + do { +#if CPUINFO_MOCK + bytes_read = cpuinfo_mock_read(file, &buffer[buffer_position], buffer_size - buffer_position); +#else + bytes_read = read(file, &buffer[buffer_position], buffer_size - buffer_position); +#endif + if (bytes_read < 0) { + cpuinfo_log_info("failed to read file %s at position %zu: %s", filename, buffer_position, strerror(errno)); + goto cleanup; + } + buffer_position += (size_t) bytes_read; + if (buffer_position >= buffer_size) { + cpuinfo_log_error("failed to read file %s: insufficient buffer of size %zu", filename, buffer_size); + goto cleanup; + } + } while (bytes_read != 0); + + status = callback(buffer, &buffer[buffer_position], context); + +cleanup: + if (file != -1) { +#if CPUINFO_MOCK + cpuinfo_mock_close(file); +#else + close(file); +#endif + file = -1; + } + return status; +} diff --git a/dep/cpuinfo/src/mach/api.h b/dep/cpuinfo/src/mach/api.h new file mode 100644 index 000000000..fdef5bdff --- /dev/null +++ b/dep/cpuinfo/src/mach/api.h @@ -0,0 +1,16 @@ +#pragma once + +#include + +#define CPUINFO_MACH_MAX_CACHE_LEVELS 8 + + +struct cpuinfo_mach_topology { + uint32_t packages; + uint32_t cores; + uint32_t threads; + uint32_t threads_per_cache[CPUINFO_MACH_MAX_CACHE_LEVELS]; +}; + + +struct cpuinfo_mach_topology cpuinfo_mach_detect_topology(void); diff --git a/dep/cpuinfo/src/mach/topology.c b/dep/cpuinfo/src/mach/topology.c new file mode 100644 index 000000000..b56343bb2 --- /dev/null +++ b/dep/cpuinfo/src/mach/topology.c @@ -0,0 +1,73 @@ +#include +#include +#include + +#include +#include + +#include +#include + +#include + + +struct cpuinfo_mach_topology cpuinfo_mach_detect_topology(void) { + int cores = 1; + size_t sizeof_cores = sizeof(cores); + if (sysctlbyname("hw.physicalcpu_max", &cores, &sizeof_cores, NULL, 0) != 0) { + cpuinfo_log_error("sysctlbyname(\"hw.physicalcpu_max\") failed: %s", strerror(errno)); + } else if (cores <= 0) { + cpuinfo_log_error("sysctlbyname(\"hw.physicalcpu_max\") returned invalid value %d", cores); + cores = 1; + } + + int threads = 1; + size_t sizeof_threads = sizeof(threads); + if (sysctlbyname("hw.logicalcpu_max", &threads, &sizeof_threads, NULL, 0) != 0) { + cpuinfo_log_error("sysctlbyname(\"hw.logicalcpu_max\") failed: %s", strerror(errno)); + } else if (threads <= 0) { + cpuinfo_log_error("sysctlbyname(\"hw.logicalcpu_max\") returned invalid value %d", threads); + threads = cores; + } + + int packages = 1; +#if !TARGET_OS_IPHONE + size_t sizeof_packages = sizeof(packages); + if (sysctlbyname("hw.packages", &packages, &sizeof_packages, NULL, 0) != 0) { + cpuinfo_log_error("sysctlbyname(\"hw.packages\") failed: %s", strerror(errno)); + } else if (packages <= 0) { + cpuinfo_log_error("sysctlbyname(\"hw.packages\") returned invalid value %d", packages); + packages = 1; + } +#endif + + cpuinfo_log_debug("mach topology: packages = %d, cores = %d, threads = %d", packages, (int) cores, (int) threads); + struct cpuinfo_mach_topology topology = { + .packages = (uint32_t) packages, + .cores = (uint32_t) cores, + .threads = (uint32_t) threads + }; + +#if !TARGET_OS_IPHONE + size_t cacheconfig_size = 0; + if (sysctlbyname("hw.cacheconfig", NULL, &cacheconfig_size, NULL, 0) != 0) { + cpuinfo_log_error("sysctlbyname(\"hw.cacheconfig\") failed: %s", strerror(errno)); + } else { + uint64_t* cacheconfig = alloca(cacheconfig_size); + if (sysctlbyname("hw.cacheconfig", cacheconfig, &cacheconfig_size, NULL, 0) != 0) { + cpuinfo_log_error("sysctlbyname(\"hw.cacheconfig\") failed: %s", strerror(errno)); + } else { + size_t cache_configs = cacheconfig_size / sizeof(uint64_t); + cpuinfo_log_debug("mach hw.cacheconfig count: %zu", cache_configs); + if (cache_configs > CPUINFO_MACH_MAX_CACHE_LEVELS) { + cache_configs = CPUINFO_MACH_MAX_CACHE_LEVELS; + } + for (size_t i = 0; i < cache_configs; i++) { + cpuinfo_log_debug("mach hw.cacheconfig[%zu]: %"PRIu64, i, cacheconfig[i]); + topology.threads_per_cache[i] = cacheconfig[i]; + } + } + } +#endif + return topology; +} diff --git a/dep/cpuinfo/src/x86/api.h b/dep/cpuinfo/src/x86/api.h new file mode 100644 index 000000000..213c2d881 --- /dev/null +++ b/dep/cpuinfo/src/x86/api.h @@ -0,0 +1,159 @@ +#pragma once + +#include +#include + +#include +#include + + +struct cpuid_regs { + uint32_t eax; + uint32_t ebx; + uint32_t ecx; + uint32_t edx; +}; + +struct cpuinfo_x86_cache { + uint32_t size; + uint32_t associativity; + uint32_t sets; + uint32_t partitions; + uint32_t line_size; + uint32_t flags; + uint32_t apic_bits; +}; + +struct cpuinfo_x86_caches { + struct cpuinfo_trace_cache trace; + struct cpuinfo_x86_cache l1i; + struct cpuinfo_x86_cache l1d; + struct cpuinfo_x86_cache l2; + struct cpuinfo_x86_cache l3; + struct cpuinfo_x86_cache l4; + uint32_t prefetch_size; +}; + +struct cpuinfo_x86_model_info { + uint32_t model; + uint32_t family; + + uint32_t base_model; + uint32_t base_family; + uint32_t stepping; + uint32_t extended_model; + uint32_t extended_family; + uint32_t processor_type; +}; + +struct cpuinfo_x86_topology { + uint32_t apic_id; + uint32_t thread_bits_offset; + uint32_t thread_bits_length; + uint32_t core_bits_offset; + uint32_t core_bits_length; +}; + +struct cpuinfo_x86_processor { + uint32_t cpuid; + enum cpuinfo_vendor vendor; + enum cpuinfo_uarch uarch; +#ifdef __linux__ + int linux_id; +#endif + struct cpuinfo_x86_caches cache; + struct { + struct cpuinfo_tlb itlb_4KB; + struct cpuinfo_tlb itlb_2MB; + struct cpuinfo_tlb itlb_4MB; + struct cpuinfo_tlb dtlb0_4KB; + struct cpuinfo_tlb dtlb0_2MB; + struct cpuinfo_tlb dtlb0_4MB; + struct cpuinfo_tlb dtlb_4KB; + struct cpuinfo_tlb dtlb_2MB; + struct cpuinfo_tlb dtlb_4MB; + struct cpuinfo_tlb dtlb_1GB; + struct cpuinfo_tlb stlb2_4KB; + struct cpuinfo_tlb stlb2_2MB; + struct cpuinfo_tlb stlb2_1GB; + } tlb; + struct cpuinfo_x86_topology topology; + char brand_string[CPUINFO_PACKAGE_NAME_MAX]; +}; + +CPUINFO_INTERNAL void cpuinfo_x86_init_processor(struct cpuinfo_x86_processor* processor); + +CPUINFO_INTERNAL enum cpuinfo_vendor cpuinfo_x86_decode_vendor(uint32_t ebx, uint32_t ecx, uint32_t edx); +CPUINFO_INTERNAL struct cpuinfo_x86_model_info cpuinfo_x86_decode_model_info(uint32_t eax); +CPUINFO_INTERNAL enum cpuinfo_uarch cpuinfo_x86_decode_uarch( + enum cpuinfo_vendor vendor, + const struct cpuinfo_x86_model_info* model_info); + +CPUINFO_INTERNAL struct cpuinfo_x86_isa cpuinfo_x86_detect_isa( + const struct cpuid_regs basic_info, const struct cpuid_regs extended_info, + uint32_t max_base_index, uint32_t max_extended_index, + enum cpuinfo_vendor vendor, enum cpuinfo_uarch uarch); + +CPUINFO_INTERNAL void cpuinfo_x86_detect_topology( + uint32_t max_base_index, + uint32_t max_extended_index, + struct cpuid_regs leaf1, + struct cpuinfo_x86_topology* topology); + +CPUINFO_INTERNAL void cpuinfo_x86_detect_cache( + uint32_t max_base_index, uint32_t max_extended_index, + bool amd_topology_extensions, + enum cpuinfo_vendor vendor, + const struct cpuinfo_x86_model_info* model_info, + struct cpuinfo_x86_caches* cache, + struct cpuinfo_tlb* itlb_4KB, + struct cpuinfo_tlb* itlb_2MB, + struct cpuinfo_tlb* itlb_4MB, + struct cpuinfo_tlb* dtlb0_4KB, + struct cpuinfo_tlb* dtlb0_2MB, + struct cpuinfo_tlb* dtlb0_4MB, + struct cpuinfo_tlb* dtlb_4KB, + struct cpuinfo_tlb* dtlb_2MB, + struct cpuinfo_tlb* dtlb_4MB, + struct cpuinfo_tlb* dtlb_1GB, + struct cpuinfo_tlb* stlb2_4KB, + struct cpuinfo_tlb* stlb2_2MB, + struct cpuinfo_tlb* stlb2_1GB, + uint32_t* log2_package_cores_max); + +CPUINFO_INTERNAL void cpuinfo_x86_decode_cache_descriptor( + uint8_t descriptor, enum cpuinfo_vendor vendor, + const struct cpuinfo_x86_model_info* model_info, + struct cpuinfo_x86_caches* cache, + struct cpuinfo_tlb* itlb_4KB, + struct cpuinfo_tlb* itlb_2MB, + struct cpuinfo_tlb* itlb_4MB, + struct cpuinfo_tlb* dtlb0_4KB, + struct cpuinfo_tlb* dtlb0_2MB, + struct cpuinfo_tlb* dtlb0_4MB, + struct cpuinfo_tlb* dtlb_4KB, + struct cpuinfo_tlb* dtlb_2MB, + struct cpuinfo_tlb* dtlb_4MB, + struct cpuinfo_tlb* dtlb_1GB, + struct cpuinfo_tlb* stlb2_4KB, + struct cpuinfo_tlb* stlb2_2MB, + struct cpuinfo_tlb* stlb2_1GB, + uint32_t* prefetch_size); + +CPUINFO_INTERNAL bool cpuinfo_x86_decode_deterministic_cache_parameters( + struct cpuid_regs regs, + struct cpuinfo_x86_caches* cache, + uint32_t* package_cores_max); + +CPUINFO_INTERNAL bool cpuinfo_x86_decode_cache_properties( + struct cpuid_regs regs, + struct cpuinfo_x86_caches* cache); + +CPUINFO_INTERNAL uint32_t cpuinfo_x86_normalize_brand_string( + const char raw_name[48], + char normalized_name[48]); + +CPUINFO_INTERNAL uint32_t cpuinfo_x86_format_package_name( + enum cpuinfo_vendor vendor, + const char normalized_brand_string[48], + char package_name[CPUINFO_PACKAGE_NAME_MAX]); diff --git a/dep/cpuinfo/src/x86/cache/descriptor.c b/dep/cpuinfo/src/x86/cache/descriptor.c new file mode 100644 index 000000000..69d38cc39 --- /dev/null +++ b/dep/cpuinfo/src/x86/cache/descriptor.c @@ -0,0 +1,1726 @@ +#include + +#include +#include + + +void cpuinfo_x86_decode_cache_descriptor( + uint8_t descriptor, enum cpuinfo_vendor vendor, + const struct cpuinfo_x86_model_info* model_info, + struct cpuinfo_x86_caches* cache, + struct cpuinfo_tlb* itlb_4KB, + struct cpuinfo_tlb* itlb_2MB, + struct cpuinfo_tlb* itlb_4MB, + struct cpuinfo_tlb* dtlb0_4KB, + struct cpuinfo_tlb* dtlb0_2MB, + struct cpuinfo_tlb* dtlb0_4MB, + struct cpuinfo_tlb* dtlb_4KB, + struct cpuinfo_tlb* dtlb_2MB, + struct cpuinfo_tlb* dtlb_4MB, + struct cpuinfo_tlb* dtlb_1GB, + struct cpuinfo_tlb* stlb2_4KB, + struct cpuinfo_tlb* stlb2_2MB, + struct cpuinfo_tlb* stlb2_1GB, + uint32_t* prefetch_size) +{ + /* + * Descriptors are parsed according to: + * - Application Note 485: Intel Processor Indentification and CPUID Instruction, May 2012, Order Number 241618-039 + * - Intel 64 and IA-32 Architectures Software Developer’s Manual, Volume 2 (2A, 2B, 2C & 2D): Instruction Set + * Reference, A-Z, December 2016. Order Number: 325383-061US + * - Cyrix CPU Detection Guide, Preliminary Revision 1.01 + * - Geode(TM) GX1 Processor Series: Low Power Integrated x86 Solution + */ + switch (descriptor) { + case 0x01: + /* + * Intel ISA Reference: + * "Instruction TLB: 4 KByte pages, 4-way set associative, 32 entries" + * Application Note 485: + * "Instruction TLB: 4-KB Pages, 4-way set associative, 32 entries" + */ + *itlb_4KB = (struct cpuinfo_tlb) { + .entries = 32, + .associativity = 4, + .pages = CPUINFO_PAGE_SIZE_4KB, + }; + break; + case 0x02: + /* + * Intel ISA Reference: + * "Instruction TLB: 4 MByte pages, fully associative, 2 entries" + * Application Note 485: + * "Instruction TLB: 4-MB Pages, fully associative, 2 entries" + */ + *itlb_4MB = (struct cpuinfo_tlb) { + .entries = 2, + .associativity = 2, + .pages = CPUINFO_PAGE_SIZE_4MB, + }; + break; + case 0x03: + /* + * Intel ISA Reference: + * "Data TLB: 4 KByte pages, 4-way set associative, 64 entries" + * Application Note 485: + * "Data TLB: 4-KB Pages, 4-way set associative, 64 entries" + */ + *dtlb_4KB = (struct cpuinfo_tlb) { + .entries = 64, + .associativity = 4, + .pages = CPUINFO_PAGE_SIZE_4KB, + }; + break; + case 0x04: + /* + * Intel ISA Reference: + * "Data TLB: 4 MByte pages, 4-way set associative, 8 entries" + * Application Note 485: + * "Data TLB: 4-MB Pages, 4-way set associative, 8 entries" + */ + *dtlb_4MB = (struct cpuinfo_tlb) { + .entries = 8, + .associativity = 4, + .pages = CPUINFO_PAGE_SIZE_4MB, + }; + break; + case 0x05: + /* + * Intel ISA Reference: + * "Data TLB1: 4 MByte pages, 4-way set associative, 32 entries" + * Application Note 485: + * "Data TLB: 4-MB Pages, 4-way set associative, 32 entries" + */ + *dtlb_4MB = (struct cpuinfo_tlb) { + .entries = 32, + .associativity = 4, + .pages = CPUINFO_PAGE_SIZE_4MB, + }; + break; + case 0x06: + /* + * Intel ISA Reference: + * "1st-level instruction cache: 8 KBytes, 4-way set associative, 32 byte line size" + * Application Note 485: + * "1st-level instruction cache: 8-KB, 4-way set associative, 32-byte line size" + */ + cache->l1i = (struct cpuinfo_x86_cache) { + .size = 8 * 1024, + .associativity = 4, + .sets = 64, + .partitions = 1, + .line_size = 32, + }; + break; + case 0x08: + /* + * Intel ISA Reference: + * "1st-level instruction cache: 16 KBytes, 4-way set associative, 32 byte line size" + * Application Note 485: + * "1st-level instruction cache: 16-KB, 4-way set associative, 32-byte line size" + */ + cache->l1i = (struct cpuinfo_x86_cache) { + .size = 16 * 1024, + .associativity = 4, + .sets = 128, + .partitions = 1, + .line_size = 32, + }; + break; + case 0x09: + /* + * Intel ISA Reference: + * "1st-level instruction cache: 32KBytes, 4-way set associative, 64 byte line size" + * Application Note 485: + * "1st-level Instruction Cache: 32-KB, 4-way set associative, 64-byte line size" + */ + cache->l1i = (struct cpuinfo_x86_cache) { + .size = 32 * 1024, + .associativity = 4, + .sets = 128, + .partitions = 1, + .line_size = 64, + }; + break; + case 0x0A: + /* + * Intel ISA Reference: + * "1st-level data cache: 8 KBytes, 2-way set associative, 32 byte line size" + * Application Note 485: + * "1st-level data cache: 8-KB, 2-way set associative, 32-byte line size" + */ + cache->l1d = (struct cpuinfo_x86_cache) { + .size = 8 * 1024, + .associativity = 2, + .sets = 128, + .partitions = 1, + .line_size = 32, + }; + break; + case 0x0B: + /* + * Intel ISA Reference: + * "Instruction TLB: 4 MByte pages, 4-way set associative, 4 entries" + * Application Note 485: + * "Instruction TLB: 4-MB pages, 4-way set associative, 4 entries" + */ + *itlb_4MB = (struct cpuinfo_tlb) { + .entries = 4, + .associativity = 4, + .pages = CPUINFO_PAGE_SIZE_4MB, + }; + break; + case 0x0C: + /* + * Intel ISA Reference: + * "1st-level data cache: 16 KBytes, 4-way set associative, 32 byte line size" + * Application Note 485: + * "1st-level data cache: 16-KB, 4-way set associative, 32-byte line size" + */ + cache->l1d = (struct cpuinfo_x86_cache) { + .size = 16 * 1024, + .associativity = 4, + .sets = 128, + .partitions = 1, + .line_size = 32, + }; + break; + case 0x0D: + /* + * Intel ISA Reference: + * "1st-level data cache: 16 KBytes, 4-way set associative, 64 byte line size" + * Application Note 485: + * "1st-level Data Cache: 16-KB, 4-way set associative, 64-byte line size" + */ + cache->l1d = (struct cpuinfo_x86_cache) { + .size = 16 * 1024, + .associativity = 4, + .sets = 64, + .partitions = 1, + .line_size = 64, + }; + break; + case 0x0E: + /* + * Intel ISA Reference: + * "1st-level data cache: 24 KBytes, 6-way set associative, 64 byte line size" + * Application Note 485: + * "1st-level Data Cache: 24-KB, 6-way set associative, 64-byte line size" + */ + cache->l1d = (struct cpuinfo_x86_cache) { + .size = 24 * 1024, + .associativity = 6, + .sets = 64, + .partitions = 1, + .line_size = 64, + }; + break; + case 0x1D: + /* + * Intel ISA Reference: + * "2nd-level cache: 128 KBytes, 2-way set associative, 64 byte line size" + */ + cache->l2 = (struct cpuinfo_x86_cache) { + .size = 128 * 1024, + .associativity = 2, + .sets = 1024, + .partitions = 1, + .line_size = 64, + .flags = CPUINFO_CACHE_INCLUSIVE, + }; + case 0x21: + /* + * Intel ISA Reference: + * "2nd-level cache: 256 KBytes, 8-way set associative, 64 byte line size" + * Application Note 485: + * "2nd-level cache: 256-KB, 8-way set associative, 64-byte line size" + */ + cache->l2 = (struct cpuinfo_x86_cache) { + .size = 256 * 1024, + .associativity = 8, + .sets = 512, + .partitions = 1, + .line_size = 64, + .flags = CPUINFO_CACHE_INCLUSIVE, + }; + break; + case 0x22: + /* + * Intel ISA Reference: + * "3rd-level cache: 512 KBytes, 4-way set associative, 64 byte line size, 2 lines per sector" + * Application Note 485: + * "3rd-level cache: 512-KB, 4-way set associative, sectored cache, 64-byte line size" + */ + cache->l3 = (struct cpuinfo_x86_cache) { + .size = 512 * 1024, + .associativity = 4, + .sets = 2048, + .partitions = 1, + .line_size = 64, + .flags = CPUINFO_CACHE_INCLUSIVE, + }; + break; + case 0x23: + /* + * Intel ISA Reference: + * "3rd-level cache: 1 MBytes, 8-way set associative, 64 byte line size, 2 lines per sector" + * Application Note 485: + * "3rd-level cache: 1-MB, 8-way set associative, sectored cache, 64-byte line size" + */ + cache->l3 = (struct cpuinfo_x86_cache) { + .size = 1024 * 1024, + .associativity = 8, + .sets = 2048, + .partitions = 1, + .line_size = 64, + .flags = CPUINFO_CACHE_INCLUSIVE, + }; + break; + case 0x24: + /* + * Intel ISA Reference: + * "2nd-level cache: 1 MBytes, 16-way set associative, 64 byte line size" + */ + cache->l2 = (struct cpuinfo_x86_cache) { + .size = 1024 * 1024, + .associativity = 16, + .sets = 1024, + .partitions = 1, + .line_size = 64, + .flags = CPUINFO_CACHE_INCLUSIVE, + }; + break; + case 0x25: + /* + * Intel ISA Reference: + * "3rd-level cache: 2 MBytes, 8-way set associative, 64 byte line size, 2 lines per sector" + * Application Note 485: + * "3rd-level cache: 2-MB, 8-way set associative, sectored cache, 64-byte line size" + */ + cache->l3 = (struct cpuinfo_x86_cache) { + .size = 2 * 1024 * 1024, + .associativity = 8, + .sets = 4096, + .partitions = 1, + .line_size = 64, + .flags = CPUINFO_CACHE_INCLUSIVE, + }; + break; + case 0x29: + /* + * Intel ISA Reference: + * "3rd-level cache: 4 MBytes, 8-way set associative, 64 byte line size, 2 lines per sector" + * Application Note 485: + * "3rd-level cache: 4-MB, 8-way set associative, sectored cache, 64-byte line size" + */ + cache->l3 = (struct cpuinfo_x86_cache) { + .size = 4 * 1024 * 1024, + .associativity = 8, + .sets = 8192, + .partitions = 1, + .line_size = 64, + .flags = CPUINFO_CACHE_INCLUSIVE, + }; + break; + case 0x2C: + /* + * Intel ISA Reference: + * "1st-level data cache: 32 KBytes, 8-way set associative, 64 byte line size" + * Application Note 485: + * "1st-level data cache: 32-KB, 8-way set associative, 64-byte line size" + */ + cache->l1d = (struct cpuinfo_x86_cache) { + .size = 32 * 1024, + .associativity = 8, + .sets = 64, + .partitions = 1, + .line_size = 64, + }; + break; + case 0x30: + /* + * Intel ISA Reference: + * "1st-level instruction cache: 32 KBytes, 8-way set associative, 64 byte line size" + * Application Note 485: + * "1st-level instruction cache: 32-KB, 8-way set associative, 64-byte line size" + */ + cache->l1i = (struct cpuinfo_x86_cache) { + .size = 32 * 1024, + .associativity = 8, + .sets = 64, + .partitions = 1, + .line_size = 64, + }; + break; + case 0x39: + /* Where does this come from? */ + cache->l2 = (struct cpuinfo_x86_cache) { + .size = 128 * 1024, + .associativity = 4, + .sets = 512, + .partitions = 1, + .line_size = 64, + .flags = CPUINFO_CACHE_INCLUSIVE, + }; + break; + case 0x3A: + /* Where does this come from? */ + cache->l2 = (struct cpuinfo_x86_cache) { + .size = 192 * 1024, + .associativity = 6, + .sets = 512, + .partitions = 1, + .line_size = 64, + .flags = CPUINFO_CACHE_INCLUSIVE, + }; + break; + case 0x3B: + /* Where does this come from? */ + cache->l2 = (struct cpuinfo_x86_cache) { + .size = 128 * 1024, + .associativity = 2, + .sets = 1024, + .partitions = 1, + .line_size = 64, + .flags = CPUINFO_CACHE_INCLUSIVE, + }; + break; + case 0x3C: + /* Where does this come from? */ + cache->l2 = (struct cpuinfo_x86_cache) { + .size = 256 * 1024, + .associativity = 4, + .sets = 1024, + .partitions = 1, + .line_size = 64, + .flags = CPUINFO_CACHE_INCLUSIVE, + }; + break; + case 0x3D: + /* Where does this come from? */ + cache->l2 = (struct cpuinfo_x86_cache) { + .size = 384 * 1024, + .associativity = 6, + .sets = 1024, + .partitions = 1, + .line_size = 64, + .flags = CPUINFO_CACHE_INCLUSIVE, + }; + break; + case 0x3E: + /* Where does this come from? */ + cache->l2 = (struct cpuinfo_x86_cache) { + .size = 512 * 1024, + .associativity = 4, + .sets = 2048, + .partitions = 1, + .line_size = 64, + .flags = CPUINFO_CACHE_INCLUSIVE, + }; + break; + case 0x40: + /* + * Intel ISA Reference: + * "No 2nd-level cache or, if processor contains a valid 2nd-level cache, no 3rd-level cache" + * Application Note 485: + * "No 2nd-level cache or, if processor contains a valid 2nd-level cache, no 3rd-level cache" + */ + break; + case 0x41: + /* + * Intel ISA Reference: + * "2nd-level cache: 128 KBytes, 4-way set associative, 32 byte line size" + * Application Note 485: + * "2nd-level cache: 128-KB, 4-way set associative, 32-byte line size" + */ + cache->l2 = (struct cpuinfo_x86_cache) { + .size = 128 * 1024, + .associativity = 4, + .sets = 1024, + .partitions = 1, + .line_size = 32, + .flags = CPUINFO_CACHE_INCLUSIVE, + }; + break; + case 0x42: + /* + * Intel ISA Reference: + * "2nd-level cache: 256 KBytes, 4-way set associative, 32 byte line size" + * Application Note 485: + * "2nd-level cache: 256-KB, 4-way set associative, 32-byte line size" + */ + cache->l2 = (struct cpuinfo_x86_cache) { + .size = 256 * 1024, + .associativity = 4, + .sets = 2048, + .partitions = 1, + .line_size = 32, + .flags = CPUINFO_CACHE_INCLUSIVE, + }; + break; + case 0x43: + /* + * Intel ISA Reference: + * "2nd-level cache: 512 KBytes, 4-way set associative, 32 byte line size" + * Application Note 485: + * "2nd-level cache: 512-KB, 4-way set associative, 32-byte line size" + */ + cache->l2 = (struct cpuinfo_x86_cache) { + .size = 512 * 1024, + .associativity = 4, + .sets = 4096, + .partitions = 1, + .line_size = 32, + .flags = CPUINFO_CACHE_INCLUSIVE, + }; + break; + case 0x44: + /* + * Intel ISA Reference: + * "2nd-level cache: 1 MByte, 4-way set associative, 32 byte line size" + * Application Note 485: + * "2nd-level cache: 1-MB, 4-way set associative, 32-byte line size" + */ + cache->l2 = (struct cpuinfo_x86_cache) { + .size = 1024 * 1024, + .associativity = 4, + .sets = 8192, + .partitions = 1, + .line_size = 32, + .flags = CPUINFO_CACHE_INCLUSIVE, + }; + break; + case 0x45: + /* + * Intel ISA Reference: + * "2nd-level cache: 2 MByte, 4-way set associative, 32 byte line size" + * Application Note 485: + * "2nd-level cache: 2-MB, 4-way set associative, 32-byte line size" + */ + cache->l2 = (struct cpuinfo_x86_cache) { + .size = 2 * 1024 * 1024, + .associativity = 4, + .sets = 16384, + .partitions = 1, + .line_size = 32, + .flags = CPUINFO_CACHE_INCLUSIVE, + }; + break; + case 0x46: + /* + * Intel ISA Reference: + * "3rd-level cache: 4 MByte, 4-way set associative, 64 byte line size" + * Application Note 485: + * "3rd-level cache: 4-MB, 4-way set associative, 64-byte line size" + */ + cache->l3 = (struct cpuinfo_x86_cache) { + .size = 4 * 1024 * 1024, + .associativity = 4, + .sets = 16384, + .partitions = 1, + .line_size = 64, + .flags = CPUINFO_CACHE_INCLUSIVE, + }; + break; + case 0x47: + /* + * Intel ISA Reference: + * "3rd-level cache: 8 MByte, 8-way set associative, 64 byte line size" + * Application Note 485: + * "3rd-level cache: 8-MB, 8-way set associative, 64-byte line size" + */ + cache->l3 = (struct cpuinfo_x86_cache) { + .size = 8 * 1024 * 1024, + .associativity = 8, + .sets = 16384, + .partitions = 1, + .line_size = 64, + .flags = CPUINFO_CACHE_INCLUSIVE, + }; + break; + case 0x48: + /* + * Intel ISA Reference: + * "2nd-level cache: 3MByte, 12-way set associative, 64 byte line size" + * Application Note 485: + * "2nd-level cache: 3-MB, 12-way set associative, 64-byte line size, unified on-die" + */ + cache->l2 = (struct cpuinfo_x86_cache) { + .size = 3 * 1024 * 1024, + .associativity = 12, + .sets = 4096, + .partitions = 1, + .line_size = 64, + .flags = CPUINFO_CACHE_INCLUSIVE, + }; + break; + case 0x49: + /* + * Intel ISA Reference: + * "3rd-level cache: 4MB, 16-way set associative, 64-byte line size (Intel Xeon processor MP, + * Family 0FH, Model 06H); 2nd-level cache: 4 MByte, 16-way set associative, 64 byte line size" + * Application Note 485: + * "3rd-level cache: 4-MB, 16-way set associative, 64-byte line size (Intel Xeon processor MP, + * Family 0Fh, Model 06h) + * 2nd-level cache: 4-MB, 16-way set associative, 64-byte line size" + */ + if ((vendor == cpuinfo_vendor_intel) && (model_info->model == 0x06) && (model_info->family == 0x0F)) { + cache->l3 = (struct cpuinfo_x86_cache) { + .size = 4 * 1024 * 1024, + .associativity = 16, + .sets = 4096, + .partitions = 1, + .line_size = 64, + .flags = CPUINFO_CACHE_INCLUSIVE, + }; + } else { + cache->l2 = (struct cpuinfo_x86_cache) { + .size = 4 * 1024 * 1024, + .associativity = 16, + .sets = 4096, + .partitions = 1, + .line_size = 64, + .flags = CPUINFO_CACHE_INCLUSIVE, + }; + } + break; + case 0x4A: + /* + * Intel ISA Reference: + * "3rd-level cache: 6MByte, 12-way set associative, 64 byte line size" + * Application Note 485: + * "3rd-level cache: 6-MB, 12-way set associative, 64-byte line size" + */ + cache->l3 = (struct cpuinfo_x86_cache) { + .size = 6 * 1024 * 1024, + .associativity = 12, + .sets = 8192, + .partitions = 1, + .line_size = 64, + .flags = CPUINFO_CACHE_INCLUSIVE, + }; + break; + case 0x4B: + /* + * Intel ISA Reference: + * "3rd-level cache: 8MByte, 16-way set associative, 64 byte line size" + * Application Note 485: + * "3rd-level cache: 8-MB, 16-way set associative, 64-byte line size" + */ + cache->l3 = (struct cpuinfo_x86_cache) { + .size = 8 * 1024 * 1024, + .associativity = 16, + .sets = 8192, + .partitions = 1, + .line_size = 64, + .flags = CPUINFO_CACHE_INCLUSIVE, + }; + break; + case 0x4C: + /* + * Intel ISA Reference: + * "3rd-level cache: 12MByte, 12-way set associative, 64 byte line size" + * Application Note 485: + * "3rd-level cache: 12-MB, 12-way set associative, 64-byte line size" + */ + cache->l3 = (struct cpuinfo_x86_cache) { + .size = 12 * 1024 * 1024, + .associativity = 12, + .sets = 16384, + .partitions = 1, + .line_size = 64, + .flags = CPUINFO_CACHE_INCLUSIVE, + }; + break; + case 0x4D: + /* + * Intel ISA Reference: + * "3rd-level cache: 16MByte, 16-way set associative, 64 byte line size" + * Application Note 485: + * "3rd-level cache: 16-MB, 16-way set associative, 64-byte line size" + */ + cache->l3 = (struct cpuinfo_x86_cache) { + .size = 16 * 1024 * 1024, + .associativity = 16, + .sets = 16384, + .partitions = 1, + .line_size = 64, + .flags = CPUINFO_CACHE_INCLUSIVE, + }; + break; + case 0x4E: + /* + * Intel ISA Reference: + * "2nd-level cache: 6MByte, 24-way set associative, 64 byte line size" + * Application Note 485: + * "2nd-level cache: 6-MB, 24-way set associative, 64-byte line size" + */ + cache->l2 = (struct cpuinfo_x86_cache) { + .size = 6 * 1024 * 1024, + .associativity = 24, + .sets = 4096, + .partitions = 1, + .line_size = 64, + .flags = CPUINFO_CACHE_INCLUSIVE, + }; + break; + case 0x4F: + /* + * Intel ISA Reference: + * "Instruction TLB: 4 KByte pages, 32 entries" + * Application Note 485: + * "Instruction TLB: 4-KB pages, 32 entries" + */ + *itlb_4KB = (struct cpuinfo_tlb) { + .entries = 32, + /* Assume full associativity from nearby entries: manual lacks detail */ + .associativity = 32, + .pages = CPUINFO_PAGE_SIZE_4KB, + }; + break; + case 0x50: + /* + * Intel ISA Reference: + * "Instruction TLB: 4 KByte and 2-MByte or 4-MByte pages, 64 entries" + * Application Note 485: + * "Instruction TLB: 4-KB, 2-MB or 4-MB pages, fully associative, 64 entries" + */ + *itlb_4KB = *itlb_2MB = *itlb_4MB = (struct cpuinfo_tlb) { + .entries = 64, + .associativity = 64, + .pages = CPUINFO_PAGE_SIZE_4KB | CPUINFO_PAGE_SIZE_2MB | CPUINFO_PAGE_SIZE_4MB, + }; + break; + case 0x51: + /* + * Intel ISA Reference: + * "Instruction TLB: 4 KByte and 2-MByte or 4-MByte pages, 128 entries" + * Application Note 485: + * "Instruction TLB: 4-KB, 2-MB or 4-MB pages, fully associative, 128 entries" + */ + *itlb_4KB = *itlb_2MB = *itlb_4MB = (struct cpuinfo_tlb) { + .entries = 128, + .associativity = 128, + .pages = CPUINFO_PAGE_SIZE_4KB | CPUINFO_PAGE_SIZE_2MB | CPUINFO_PAGE_SIZE_4MB, + }; + break; + case 0x52: + /* + * Intel ISA Reference: + * "Instruction TLB: 4 KByte and 2-MByte or 4-MByte pages, 256 entries" + * Application Note 485: + * "Instruction TLB: 4-KB, 2-MB or 4-MB pages, fully associative, 256 entries" + */ + *itlb_4KB = *itlb_2MB = *itlb_4MB = (struct cpuinfo_tlb) { + .entries = 256, + .associativity = 256, + .pages = CPUINFO_PAGE_SIZE_4KB | CPUINFO_PAGE_SIZE_2MB | CPUINFO_PAGE_SIZE_4MB, + }; + break; + case 0x55: + /* + * Intel ISA Reference: + * "Instruction TLB: 2-MByte or 4-MByte pages, fully associative, 7 entries" + * Application Note 485: + * "Instruction TLB: 2-MB or 4-MB pages, fully associative, 7 entries" + */ + *itlb_2MB = *itlb_4MB = (struct cpuinfo_tlb) { + .entries = 7, + .associativity = 7, + .pages = CPUINFO_PAGE_SIZE_2MB | CPUINFO_PAGE_SIZE_4MB, + }; + break; + case 0x56: + /* + * Intel ISA Reference: + * "Data TLB0: 4 MByte pages, 4-way set associative, 16 entries" + * Application Note 485: + * "L1 Data TLB: 4-MB pages, 4-way set associative, 16 entries" + */ + *dtlb0_4MB = (struct cpuinfo_tlb) { + .entries = 16, + .associativity = 4, + .pages = CPUINFO_PAGE_SIZE_4MB, + }; + break; + case 0x57: + /* + * Intel ISA Reference: + * "Data TLB0: 4 KByte pages, 4-way associative, 16 entries" + * Application Note 485: + * "L1 Data TLB: 4-KB pages, 4-way set associative, 16 entries" + */ + *dtlb0_4KB = (struct cpuinfo_tlb) { + .entries = 16, + .associativity = 4, + .pages = CPUINFO_PAGE_SIZE_4KB, + }; + break; + case 0x59: + /* + * Intel ISA Reference: + * "Data TLB0: 4 KByte pages, fully associative, 16 entries" + * Application Note 485: + * "Data TLB0: 4-KB pages, fully associative, 16 entries" + */ + *dtlb0_4KB = (struct cpuinfo_tlb) { + .entries = 16, + .associativity = 16, + .pages = CPUINFO_PAGE_SIZE_4KB, + }; + break; + case 0x5A: + /* + * Intel ISA Reference: + * "Data TLB0: 2 MByte or 4 MByte pages, 4-way set associative, 32 entries" + * Application Note 485: + * "Data TLB0: 2-MB or 4-MB pages, 4-way associative, 32 entries" + */ + *dtlb0_2MB = *dtlb0_4MB = (struct cpuinfo_tlb) { + .entries = 32, + .associativity = 4, + .pages = CPUINFO_PAGE_SIZE_2MB | CPUINFO_PAGE_SIZE_4MB, + }; + break; + case 0x5B: + /* + * Intel ISA Reference: + * "Data TLB: 4 KByte and 4 MByte pages, 64 entries" + * Application Note 485: + * "Data TLB: 4-KB or 4-MB pages, fully associative, 64 entries" + */ + *dtlb_4KB = *dtlb_4MB = (struct cpuinfo_tlb) { + .entries = 64, + .associativity = 64, + .pages = CPUINFO_PAGE_SIZE_4KB | CPUINFO_PAGE_SIZE_4MB, + }; + break; + case 0x5C: + /* + * Intel ISA Reference: + * "Data TLB: 4 KByte and 4 MByte pages, 128 entries" + * Application Note 485: + * "Data TLB: 4-KB or 4-MB pages, fully associative, 128 entries" + */ + *dtlb_4KB = *dtlb_4MB = (struct cpuinfo_tlb) { + .entries = 128, + .associativity = 128, + .pages = CPUINFO_PAGE_SIZE_4KB | CPUINFO_PAGE_SIZE_4MB, + }; + break; + case 0x5D: + /* + * Intel ISA Reference: + * "Data TLB: 4 KByte and 4 MByte pages, 256 entries" + * Application Note 485: + * "Data TLB: 4-KB or 4-MB pages, fully associative, 256 entries" + */ + *dtlb_4KB = *dtlb_4MB = (struct cpuinfo_tlb) { + .entries = 256, + .associativity = 256, + .pages = CPUINFO_PAGE_SIZE_4KB | CPUINFO_PAGE_SIZE_4MB, + }; + break; + case 0x60: + /* + * Application Note 485: + * "1st-level data cache: 16-KB, 8-way set associative, sectored cache, 64-byte line size" + */ + cache->l1d = (struct cpuinfo_x86_cache) { + .size = 16 * 1024, + .associativity = 8, + .sets = 32, + .partitions = 1, + .line_size = 64, + }; + break; + case 0x61: + /* + * Intel ISA Reference: + * "Instruction TLB: 4 KByte pages, fully associative, 48 entries" + */ + *itlb_4KB = (struct cpuinfo_tlb) { + .entries = 48, + .associativity = 48, + .pages = CPUINFO_PAGE_SIZE_4KB, + }; + break; + case 0x63: + /* + * Intel ISA Reference: + * "Data TLB: 2 MByte or 4 MByte pages, 4-way set associative, 32 entries and + * a separate array with 1 GByte pages, 4-way set associative, 4 entries" + */ + *dtlb_2MB = *dtlb_4MB = (struct cpuinfo_tlb) { + .entries = 32, + .associativity = 4, + .pages = CPUINFO_PAGE_SIZE_2MB | CPUINFO_PAGE_SIZE_4MB, + }; + *dtlb_1GB = (struct cpuinfo_tlb) { + .entries = 4, + .associativity = 4, + .pages = CPUINFO_PAGE_SIZE_1GB, + }; + break; + case 0x64: + /* + * Intel ISA Reference: + * "Data TLB: 4 KByte pages, 4-way set associative, 512 entries" + * + */ + *dtlb_4KB = (struct cpuinfo_tlb) { + .entries = 512, + .associativity = 4, + .pages = CPUINFO_PAGE_SIZE_4KB, + }; + break; + case 0x66: + /* + * Application Note 485: + * "1st-level data cache: 8-KB, 4-way set associative, sectored cache, 64-byte line size" + */ + cache->l1d = (struct cpuinfo_x86_cache) { + .size = 8 * 1024, + .associativity = 4, + .sets = 32, + .partitions = 1, + .line_size = 64, + }; + break; + case 0x67: + /* + * Application Note 485: + * "1st-level data cache: 16-KB, 4-way set associative, sectored cache, 64-byte line size" + */ + cache->l1d = (struct cpuinfo_x86_cache) { + .size = 16 * 1024, + .associativity = 4, + .sets = 64, + .partitions = 1, + .line_size = 64, + }; + break; + case 0x68: + /* + * Application Note 485: + * "1st-level data cache: 32-KB, 4 way set associative, sectored cache, 64-byte line size" + */ + cache->l1d = (struct cpuinfo_x86_cache) { + .size = 32 * 1024, + .associativity = 4, + .sets = 128, + .partitions = 1, + .line_size = 64, + }; + break; + case 0x6A: + /* + * Intel ISA Reference: + * "uTLB: 4 KByte pages, 8-way set associative, 64 entries" + */ + + /* uTLB is, an fact, a normal 1-level DTLB on Silvermont & Knoghts Landing */ + *dtlb_4KB = (struct cpuinfo_tlb) { + .entries = 64, + .associativity = 8, + .pages = CPUINFO_PAGE_SIZE_4KB, + }; + break; + case 0x6B: + /* + * Intel ISA Reference: + * "DTLB: 4 KByte pages, 8-way set associative, 256 entries" + */ + *dtlb_4KB = (struct cpuinfo_tlb) { + .entries = 256, + .associativity = 8, + .pages = CPUINFO_PAGE_SIZE_4KB, + }; + break; + case 0x6C: + /* + * Intel ISA Reference: + * "DTLB: 2M/4M pages, 8-way set associative, 128 entries" + */ + *dtlb_2MB = *dtlb_4MB = (struct cpuinfo_tlb) { + .entries = 128, + .associativity = 8, + .pages = CPUINFO_PAGE_SIZE_2MB | CPUINFO_PAGE_SIZE_4MB, + }; + break; + case 0x6D: + /* + * Intel ISA Reference: + * "DTLB: 1 GByte pages, fully associative, 16 entries" + */ + *dtlb_1GB = (struct cpuinfo_tlb) { + .entries = 16, + .associativity = 16, + .pages = CPUINFO_PAGE_SIZE_1GB, + }; + break; + case 0x70: + /* + * Intel ISA Reference: + * "Trace cache: 12 K-uop, 8-way set associative" + * Application Note 485: + * "Trace cache: 12K-uops, 8-way set associative" + * Cyrix CPU Detection Guide and Geode GX1 Processor Series: + * "TLB, 32 entries, 4-way set associative, 4K-Byte Pages" + */ + switch (vendor) { +#if CPUINFO_ARCH_X86 + case cpuinfo_vendor_cyrix: + case cpuinfo_vendor_nsc: + *dtlb_4KB = *itlb_4KB = (struct cpuinfo_tlb) { + .entries = 32, + .associativity = 4, + .pages = CPUINFO_PAGE_SIZE_4KB, + }; + break; +#endif /* CPUINFO_ARCH_X86 */ + default: + cache->trace = (struct cpuinfo_trace_cache) { + .uops = 12 * 1024, + .associativity = 8, + }; + } + break; + case 0x71: + /* + * Intel ISA Reference: + * "Trace cache: 16 K-uop, 8-way set associative" + * Application Note 485: + * "Trace cache: 16K-uops, 8-way set associative" + */ + cache->trace = (struct cpuinfo_trace_cache) { + .uops = 16 * 1024, + .associativity = 8, + }; + break; + case 0x72: + /* + * Intel ISA Reference: + * "Trace cache: 32 K-μop, 8-way set associative" + * Application Note 485: + * "Trace cache: 32K-uops, 8-way set associative" + */ + cache->trace = (struct cpuinfo_trace_cache) { + .uops = 32 * 1024, + .associativity = 8, + }; + break; + case 0x73: + /* Where does this come from? */ + cache->trace = (struct cpuinfo_trace_cache) { + .uops = 64 * 1024, + .associativity = 8, + }; + break; + case 0x76: + /* + * Intel ISA Reference: + * "Instruction TLB: 2M/4M pages, fully associative, 8 entries" + * Application Note 485: + * "Instruction TLB: 2M/4M pages, fully associative, 8 entries" + */ + *itlb_2MB = *itlb_4MB = (struct cpuinfo_tlb) { + .entries = 8, + .associativity = 8, + .pages = CPUINFO_PAGE_SIZE_2MB | CPUINFO_PAGE_SIZE_4MB, + }; + break; + case 0x78: + /* + * Intel ISA Reference: + * "2nd-level cache: 1 MByte, 4-way set associative, 64byte line size" + * Application Note 485: + * "2nd-level cache: 1-MB, 4-way set associative, 64-byte line size" + */ + cache->l2 = (struct cpuinfo_x86_cache) { + .size = 1024 * 1024, + .associativity = 4, + .sets = 4096, + .partitions = 1, + .line_size = 64, + .flags = CPUINFO_CACHE_INCLUSIVE, + }; + break; + case 0x79: + /* + * Intel ISA Reference: + * "2nd-level cache: 128 KByte, 8-way set associative, 64 byte line size, 2 lines per sector" + * Application Note 485: + * "2nd-level cache: 128-KB, 8-way set associative, sectored cache, 64-byte line size" + */ + cache->l2 = (struct cpuinfo_x86_cache) { + .size = 128 * 1024, + .associativity = 8, + .sets = 256, + .partitions = 1, + .line_size = 64, + .flags = CPUINFO_CACHE_INCLUSIVE, + }; + break; + case 0x7A: + /* + * Intel ISA Reference: + * "2nd-level cache: 256 KByte, 8-way set associative, 64 byte line size, 2 lines per sector" + * Application Note 485: + * "2nd-level cache: 256-KB, 8-way set associative, sectored cache, 64-byte line size" + */ + cache->l2 = (struct cpuinfo_x86_cache) { + .size = 256 * 1024, + .associativity = 8, + .sets = 512, + .partitions = 1, + .line_size = 64, + .flags = CPUINFO_CACHE_INCLUSIVE, + }; + break; + case 0x7B: + /* + * Intel ISA Reference: + * "2nd-level cache: 512 KByte, 8-way set associative, 64 byte line size, 2 lines per sector" + * Application Note 485: + * "2nd-level cache: 512-KB, 8-way set associative, sectored cache, 64-byte line size" + */ + cache->l2 = (struct cpuinfo_x86_cache) { + .size = 512 * 1024, + .associativity = 8, + .sets = 1024, + .partitions = 1, + .line_size = 64, + .flags = CPUINFO_CACHE_INCLUSIVE, + }; + break; + case 0x7C: + /* + * Intel ISA Reference: + * "2nd-level cache: 1 MByte, 8-way set associative, 64 byte line size, 2 lines per sector" + * Application Note 485: + * "2nd-level cache: 1-MB, 8-way set associative, sectored cache, 64-byte line size" + */ + cache->l2 = (struct cpuinfo_x86_cache) { + .size = 1024 * 1024, + .associativity = 8, + .sets = 2048, + .partitions = 1, + .line_size = 64, + .flags = CPUINFO_CACHE_INCLUSIVE, + }; + break; + case 0x7D: + /* + * Intel ISA Reference: + * "2nd-level cache: 2 MByte, 8-way set associative, 64byte line size" + * Application Note 485: + * "2nd-level cache: 2-MB, 8-way set associative, 64-byte line size" + */ + cache->l2 = (struct cpuinfo_x86_cache) { + .size = 2 * 1024 * 1024, + .associativity = 8, + .sets = 4096, + .partitions = 1, + .line_size = 64, + .flags = CPUINFO_CACHE_INCLUSIVE, + }; + break; + case 0x7F: + /* + * Intel ISA Reference: + * "2nd-level cache: 512 KByte, 2-way set associative, 64-byte line size" + * Application Note 485: + * "2nd-level cache: 512-KB, 2-way set associative, 64-byte line size" + */ + cache->l2 = (struct cpuinfo_x86_cache) { + .size = 512 * 1024, + .associativity = 2, + .sets = 4096, + .partitions = 1, + .line_size = 64, + .flags = CPUINFO_CACHE_INCLUSIVE, + }; + break; + case 0x80: + /* + * Intel ISA Reference: + * "2nd-level cache: 512 KByte, 8-way set associative, 64-byte line size" + * Application Note 485: + * "2nd-level cache: 512-KB, 8-way set associative, 64-byte line size" + * Cyrix CPU Detection Guide and Geode GX1 Processor Series: + * "Level 1 Cache, 16K, 4-way set associative, 16 Bytes/Line" + */ + switch (vendor) { +#if CPUINFO_ARCH_X86 && !defined(__ANDROID__) + case cpuinfo_vendor_cyrix: + case cpuinfo_vendor_nsc: + cache->l1i = cache->l1d = (struct cpuinfo_x86_cache) { + .size = 16 * 1024, + .associativity = 4, + .sets = 256, + .partitions = 1, + .line_size = 16, + .flags = CPUINFO_CACHE_UNIFIED, + }; + break; +#endif /* CPUINFO_ARCH_X86 */ + default: + cache->l2 = (struct cpuinfo_x86_cache) { + .size = 512 * 1024, + .associativity = 8, + .sets = 1024, + .partitions = 1, + .line_size = 64, + .flags = CPUINFO_CACHE_INCLUSIVE, + }; + } + break; + case 0x82: + /* + * Intel ISA Reference: + * "2nd-level cache: 256 KByte, 8-way set associative, 32 byte line size" + * Application Note 485: + * "2nd-level cache: 256-KB, 8-way set associative, 32-byte line size" + */ + cache->l2 = (struct cpuinfo_x86_cache) { + .size = 256 * 1024, + .associativity = 4, + .sets = 2048, + .partitions = 1, + .line_size = 32, + .flags = CPUINFO_CACHE_INCLUSIVE, + }; + break; + case 0x83: + /* + * Intel ISA Reference: + * "2nd-level cache: 512 KByte, 8-way set associative, 32 byte line size" + * Application Note 485: + * "2nd-level cache: 512-KB, 8-way set associative, 32-byte line size" + */ + cache->l2 = (struct cpuinfo_x86_cache) { + .size = 512 * 1024, + .associativity = 8, + .sets = 2048, + .partitions = 1, + .line_size = 32, + .flags = CPUINFO_CACHE_INCLUSIVE, + }; + break; + case 0x84: + /* + * Intel ISA Reference: + * "2nd-level cache: 1 MByte, 8-way set associative, 32 byte line size" + * Application Note 485: + * "2nd-level cache: 1-MB, 8-way set associative, 32-byte line size" + */ + cache->l2 = (struct cpuinfo_x86_cache) { + .size = 1024 * 1024, + .associativity = 8, + .sets = 4096, + .partitions = 1, + .line_size = 32, + .flags = CPUINFO_CACHE_INCLUSIVE, + }; + break; + case 0x85: + /* + * Intel ISA Reference: + * "2nd-level cache: 2 MByte, 8-way set associative, 32 byte line size" + * Application Note 485: + * "2nd-level cache: 2-MB, 8-way set associative, 32-byte line size" + */ + cache->l2 = (struct cpuinfo_x86_cache) { + .size = 2 * 1024 * 1024, + .associativity = 8, + .sets = 8192, + .partitions = 1, + .line_size = 32, + .flags = CPUINFO_CACHE_INCLUSIVE, + }; + break; + case 0x86: + /* + * Intel ISA Reference: + * "2nd-level cache: 512 KByte, 4-way set associative, 64 byte line size" + * Application Note 485: + * "2nd-level cache: 512-KB, 4-way set associative, 64-byte line size" + */ + cache->l2 = (struct cpuinfo_x86_cache) { + .size = 512 * 1024, + .associativity = 4, + .sets = 2048, + .partitions = 1, + .line_size = 64, + .flags = CPUINFO_CACHE_INCLUSIVE, + }; + break; + case 0x87: + /* + * Intel ISA Reference: + * "2nd-level cache: 1 MByte, 8-way set associative, 64 byte line size" + * Application Note 485: + * "2nd-level cache: 1-MB, 8-way set associative, 64-byte line size" + */ + cache->l2 = (struct cpuinfo_x86_cache) { + .size = 1024 * 1024, + .associativity = 8, + .sets = 2048, + .partitions = 1, + .line_size = 64, + .flags = CPUINFO_CACHE_INCLUSIVE, + }; + break; + case 0xA0: + /* + * Intel ISA Reference: + * "DTLB: 4k pages, fully associative, 32 entries" + */ + *dtlb_4KB = (struct cpuinfo_tlb) { + .entries = 32, + .associativity = 32, + .pages = CPUINFO_PAGE_SIZE_4KB, + }; + break; + case 0xB0: + /* + * Intel ISA Reference: + * "Instruction TLB: 4 KByte pages, 4-way set associative, 128 entries" + * Application Note 485: + * "Instruction TLB: 4-KB Pages, 4-way set associative, 128 entries" + */ + *itlb_4KB = (struct cpuinfo_tlb) { + .entries = 128, + .associativity = 4, + .pages = CPUINFO_PAGE_SIZE_4KB, + }; + break; + case 0xB1: + /* + * Intel ISA Reference: + * "Instruction TLB: 2M pages, 4-way, 8 entries or 4M pages, 4-way, 4 entries" + * Application Note 485: + * "Instruction TLB: 2-MB pages, 4-way, 8 entries or 4M pages, 4-way, 4 entries" + */ + *itlb_2MB = (struct cpuinfo_tlb) { + .entries = 8, + .associativity = 4, + .pages = CPUINFO_PAGE_SIZE_2MB | CPUINFO_PAGE_SIZE_4MB, + }; + *itlb_4MB = (struct cpuinfo_tlb) { + .entries = 4, + .associativity = 4, + .pages = CPUINFO_PAGE_SIZE_2MB | CPUINFO_PAGE_SIZE_4MB, + }; + break; + case 0xB2: + /* + * Intel ISA Reference: + * "Instruction TLB: 4KByte pages, 4-way set associative, 64 entries" + * Application Note 485: + * "Instruction TLB: 4-KB pages, 4-way set associative, 64 entries" + */ + *itlb_4KB = (struct cpuinfo_tlb) { + .entries = 64, + .associativity = 4, + .pages = CPUINFO_PAGE_SIZE_4KB, + }; + break; + case 0xB3: + /* + * Intel ISA Reference: + * "Data TLB: 4 KByte pages, 4-way set associative, 128 entries" + * Application Note 485: + * "Data TLB: 4-KB Pages, 4-way set associative, 128 entries" + */ + *dtlb_4KB = (struct cpuinfo_tlb) { + .entries = 128, + .associativity = 4, + .pages = CPUINFO_PAGE_SIZE_4KB, + }; + break; + case 0xB4: + /* + * Intel ISA Reference: + * "Data TLB1: 4 KByte pages, 4-way associative, 256 entries" + * Application Note 485: + * "Data TLB: 4-KB Pages, 4-way set associative, 256 entries" + */ + *dtlb_4KB = (struct cpuinfo_tlb) { + .entries = 256, + .associativity = 4, + .pages = CPUINFO_PAGE_SIZE_4KB, + }; + break; + case 0xB5: + /* + * Intel ISA Reference: + * "Instruction TLB: 4KByte pages, 8-way set associative, 64 entries" + */ + *itlb_4KB = (struct cpuinfo_tlb) { + .entries = 64, + .associativity = 8, + .pages = CPUINFO_PAGE_SIZE_4KB, + }; + break; + case 0xB6: + /* + * Intel ISA Reference: + * "Instruction TLB: 4KByte pages, 8-way set associative, 128 entries" + */ + *itlb_4KB = (struct cpuinfo_tlb) { + .entries = 128, + .associativity = 8, + .pages = CPUINFO_PAGE_SIZE_4KB, + }; + break; + case 0xBA: + /* + * Intel ISA Reference: + * "Data TLB1: 4 KByte pages, 4-way associative, 64 entries" + * Application Note 485: + * "Data TLB: 4-KB Pages, 4-way set associative, 64 entries" + */ + *itlb_4KB = (struct cpuinfo_tlb) { + .entries = 64, + .associativity = 4, + .pages = CPUINFO_PAGE_SIZE_4KB, + }; + break; + case 0xC0: + /* + * Intel ISA Reference: + * "Data TLB: 4 KByte and 4 MByte pages, 4-way associative, 8 entries" + * Application Note 485: + * "Data TLB: 4-KB or 4-MB Pages, 4-way set associative, 8 entries" + */ + *itlb_4KB = *itlb_4MB = (struct cpuinfo_tlb) { + .entries = 8, + .associativity = 4, + .pages = CPUINFO_PAGE_SIZE_4KB | CPUINFO_PAGE_SIZE_4MB, + }; + break; + case 0xC1: + /* + * Intel ISA Reference: + * "Shared 2nd-Level TLB: 4 KByte/2MByte pages, 8-way associative, 1024 entries" + */ + *stlb2_4KB = *stlb2_2MB = (struct cpuinfo_tlb) { + .entries = 1024, + .associativity = 8, + .pages = CPUINFO_PAGE_SIZE_4KB | CPUINFO_PAGE_SIZE_2MB, + }; + break; + case 0xC2: + /* + * Intel ISA Reference: + * "DTLB: 4 KByte/2 MByte pages, 4-way associative, 16 entries" + */ + *dtlb_4KB = *dtlb_2MB = (struct cpuinfo_tlb) { + .entries = 16, + .associativity = 4, + .pages = CPUINFO_PAGE_SIZE_4KB | CPUINFO_PAGE_SIZE_2MB, + }; + break; + case 0xC3: + /* + * Intel ISA Reference: + * "Shared 2nd-Level TLB: 4 KByte/2 MByte pages, 6-way associative, 1536 entries. + * Also 1GBbyte pages, 4-way, 16 entries." + */ + *stlb2_4KB = *stlb2_2MB = (struct cpuinfo_tlb) { + .entries = 1536, + .associativity = 6, + .pages = CPUINFO_PAGE_SIZE_4KB | CPUINFO_PAGE_SIZE_2MB, + }; + *stlb2_1GB = (struct cpuinfo_tlb) { + .entries = 16, + .associativity = 4, + .pages = CPUINFO_PAGE_SIZE_1GB, + }; + break; + case 0xC4: + /* + * Intel ISA Reference: + * "DTLB: 2M/4M Byte pages, 4-way associative, 32 entries" + */ + *dtlb_2MB = *dtlb_4MB = (struct cpuinfo_tlb) { + .entries = 32, + .associativity = 4, + .pages = CPUINFO_PAGE_SIZE_2MB | CPUINFO_PAGE_SIZE_4MB, + }; + break; + case 0xCA: + /* + * Intel ISA Reference: + * "Shared 2nd-Level TLB: 4 KByte pages, 4-way associative, 512 entries" + * Application Note 485: + * "Shared 2nd-level TLB: 4 KB pages, 4-way set associative, 512 entries" + */ + *stlb2_4KB = (struct cpuinfo_tlb) { + .entries = 512, + .associativity = 4, + .pages = CPUINFO_PAGE_SIZE_4KB, + }; + break; + case 0xD0: + /* + * Intel ISA Reference: + * "3rd-level cache: 512 KByte, 4-way set associative, 64 byte line size" + * Application Note 485: + * "3rd-level cache: 512-kB, 4-way set associative, 64-byte line size" + */ + cache->l3 = (struct cpuinfo_x86_cache) { + .size = 512 * 1024, + .associativity = 4, + .sets = 2048, + .partitions = 1, + .line_size = 64, + .flags = CPUINFO_CACHE_INCLUSIVE, + }; + break; + case 0xD1: + /* + * Intel ISA Reference: + * "3rd-level cache: 1 MByte, 4-way set associative, 64 byte line size" + * Application Note 485: + * "3rd-level cache: 1-MB, 4-way set associative, 64-byte line size" + */ + cache->l3 = (struct cpuinfo_x86_cache) { + .size = 1024 * 1024, + .associativity = 4, + .sets = 4096, + .partitions = 1, + .line_size = 64, + .flags = CPUINFO_CACHE_INCLUSIVE, + }; + break; + case 0xD2: + /* + * Intel ISA Reference: + * "3rd-level cache: 2 MByte, 4-way set associative, 64 byte line size" + * Application Note 485: + * "3rd-level cache: 2-MB, 4-way set associative, 64-byte line size" + */ + cache->l3 = (struct cpuinfo_x86_cache) { + .size = 2 * 1024 * 2014, + .associativity = 4, + .sets = 8192, + .partitions = 1, + .line_size = 64, + .flags = CPUINFO_CACHE_INCLUSIVE, + }; + break; + case 0xD6: + /* + * Intel ISA Reference: + * "3rd-level cache: 1 MByte, 8-way set associative, 64 byte line size" + * Application Note 485: + * "3rd-level cache: 1-MB, 8-way set associative, 64-byte line size" + */ + cache->l3 = (struct cpuinfo_x86_cache) { + .size = 1024 * 1024, + .associativity = 8, + .sets = 2048, + .partitions = 1, + .line_size = 64, + .flags = CPUINFO_CACHE_INCLUSIVE, + }; + break; + case 0xD7: + /* + * Intel ISA Reference: + * "3rd-level cache: 2 MByte, 8-way set associative, 64 byte line size" + * Application Note 485: + * "3rd-level cache: 2-MB, 8-way set associative, 64-byte line size" + */ + cache->l3 = (struct cpuinfo_x86_cache) { + .size = 2 * 1024 * 1024, + .associativity = 8, + .sets = 4096, + .partitions = 1, + .line_size = 64, + .flags = CPUINFO_CACHE_INCLUSIVE, + }; + break; + case 0xD8: + /* + * Intel ISA Reference: + * "3rd-level cache: 4 MByte, 8-way set associative, 64 byte line size" + * Application Note 485: + * "3rd-level cache: 4-MB, 8-way set associative, 64-byte line size" + */ + cache->l3 = (struct cpuinfo_x86_cache) { + .size = 4 * 1024 * 1024, + .associativity = 8, + .sets = 8192, + .partitions = 1, + .line_size = 64, + .flags = CPUINFO_CACHE_INCLUSIVE, + }; + break; + case 0xDC: + /* + * Intel ISA Reference: + * "3rd-level cache: 1.5 MByte, 12-way set associative, 64 byte line size" + * Application Note 485: + * "3rd-level cache: 1.5-MB, 12-way set associative, 64-byte line size" + */ + cache->l3 = (struct cpuinfo_x86_cache) { + .size = 3 * 512 * 1024, + .associativity = 12, + .sets = 2048, + .partitions = 1, + .line_size = 64, + .flags = CPUINFO_CACHE_INCLUSIVE, + }; + break; + case 0xDD: + /* + * Intel ISA Reference: + * "3rd-level cache: 3 MByte, 12-way set associative, 64 byte line size" + * Application Note 485: + * "3rd-level cache: 3-MB, 12-way set associative, 64-byte line size" + */ + cache->l3 = (struct cpuinfo_x86_cache) { + .size = 3 * 1024 * 1024, + .associativity = 12, + .sets = 4096, + .partitions = 1, + .line_size = 64, + .flags = CPUINFO_CACHE_INCLUSIVE, + }; + break; + case 0xDE: + /* + * Intel ISA Reference: + * "3rd-level cache: 6 MByte, 12-way set associative, 64 byte line size" + * Application Note 485: + * "3rd-level cache: 6-MB, 12-way set associative, 64-byte line size" + */ + cache->l3 = (struct cpuinfo_x86_cache) { + .size = 6 * 1024 * 1024, + .associativity = 12, + .sets = 8192, + .partitions = 1, + .line_size = 64, + .flags = CPUINFO_CACHE_INCLUSIVE, + }; + break; + case 0xE2: + /* + * Intel ISA Reference: + * "3rd-level cache: 2 MByte, 16-way set associative, 64 byte line size" + * Application Note 485: + * "3rd-level cache: 2-MB, 16-way set associative, 64-byte line size" + */ + cache->l3 = (struct cpuinfo_x86_cache) { + .size = 2 * 1024 * 1024, + .associativity = 16, + .sets = 2048, + .partitions = 1, + .line_size = 64, + .flags = CPUINFO_CACHE_INCLUSIVE, + }; + break; + case 0xE3: + /* + * Intel ISA Reference: + * "3rd-level cache: 4 MByte, 16-way set associative, 64 byte line size" + * Application Note 485: + * "3rd-level cache: 4-MB, 16-way set associative, 64-byte line size" + */ + cache->l3 = (struct cpuinfo_x86_cache) { + .size = 4 * 1024 * 1024, + .associativity = 16, + .sets = 4096, + .partitions = 1, + .line_size = 64, + .flags = CPUINFO_CACHE_INCLUSIVE, + }; + break; + case 0xE4: + /* + * Intel ISA Reference: + * "3rd-level cache: 8 MByte, 16-way set associative, 64 byte line size" + * Application Note 485: + * "3rd-level cache: 8-MB, 16-way set associative, 64-byte line size" + */ + cache->l3 = (struct cpuinfo_x86_cache) { + .size = 8 * 1024 * 1024, + .associativity = 16, + .sets = 8192, + .partitions = 1, + .line_size = 64, + .flags = CPUINFO_CACHE_INCLUSIVE, + }; + break; + case 0xEA: + /* + * Intel ISA Reference: + * "3rd-level cache: 12MByte, 24-way set associative, 64 byte line size" + * Application Note 485: + * "3rd-level cache: 12-MB, 24-way set associative, 64-byte line size" + */ + cache->l3 = (struct cpuinfo_x86_cache) { + .size = 12 * 1024 * 1024, + .associativity = 24, + .sets = 8192, + .partitions = 1, + .line_size = 64, + .flags = CPUINFO_CACHE_INCLUSIVE, + }; + break; + case 0xEB: + /* + * Intel ISA Reference: + * "3rd-level cache: 18MByte, 24-way set associative, 64 byte line size" + * Application Note 485: + * "3rd-level cache: 18-MB, 24-way set associative, 64-byte line size" + */ + cache->l3 = (struct cpuinfo_x86_cache) { + .size = 18 * 1024 * 1024, + .associativity = 24, + .sets = 12288, + .partitions = 1, + .line_size = 64, + .flags = CPUINFO_CACHE_INCLUSIVE, + }; + break; + case 0xEC: + /* + * Intel ISA Reference: + * "3rd-level cache: 24MByte, 24-way set associative, 64 byte line size" + * Application Note 485: + * "3rd-level cache: 24-MB, 24-way set associative, 64-byte line size" + */ + cache->l3 = (struct cpuinfo_x86_cache) { + .size = 24 * 1024 * 1024, + .associativity = 24, + .sets = 16384, + .partitions = 1, + .line_size = 64, + .flags = CPUINFO_CACHE_INCLUSIVE, + }; + break; + case 0xF0: + /* + * Intel ISA Reference: + * "64-Byte prefetching" + * Application Note 485: + * "64-byte Prefetching" + */ + cache->prefetch_size = 64; + break; + case 0xF1: + /* + * Intel ISA Reference: + * "128-Byte prefetching" + * Application Note 485: + * "128-byte Prefetching" + */ + cache->prefetch_size = 128; + break; + } +} diff --git a/dep/cpuinfo/src/x86/cache/deterministic.c b/dep/cpuinfo/src/x86/cache/deterministic.c new file mode 100644 index 000000000..befd5029a --- /dev/null +++ b/dep/cpuinfo/src/x86/cache/deterministic.c @@ -0,0 +1,257 @@ +#include + +#include +#include +#include +#include + + +enum cache_type { + cache_type_none = 0, + cache_type_data = 1, + cache_type_instruction = 2, + cache_type_unified = 3, +}; + +bool cpuinfo_x86_decode_deterministic_cache_parameters( + struct cpuid_regs regs, + struct cpuinfo_x86_caches* cache, + uint32_t* package_cores_max) +{ + const uint32_t type = regs.eax & UINT32_C(0x1F); + if (type == cache_type_none) { + return false; + } + + /* Level starts at 1 */ + const uint32_t level = (regs.eax >> 5) & UINT32_C(0x7); + + const uint32_t sets = 1 + regs.ecx; + const uint32_t line_size = 1 + (regs.ebx & UINT32_C(0x00000FFF)); + const uint32_t partitions = 1 + ((regs.ebx >> 12) & UINT32_C(0x000003FF)); + const uint32_t associativity = 1 + (regs.ebx >> 22); + + *package_cores_max = 1 + (regs.eax >> 26); + const uint32_t processors = 1 + ((regs.eax >> 14) & UINT32_C(0x00000FFF)); + const uint32_t apic_bits = bit_length(processors); + + uint32_t flags = 0; + if (regs.edx & UINT32_C(0x00000002)) { + flags |= CPUINFO_CACHE_INCLUSIVE; + } + if (regs.edx & UINT32_C(0x00000004)) { + flags |= CPUINFO_CACHE_COMPLEX_INDEXING; + } + switch (level) { + case 1: + switch (type) { + case cache_type_unified: + cache->l1d = cache->l1i = (struct cpuinfo_x86_cache) { + .size = associativity * partitions * line_size * sets, + .associativity = associativity, + .sets = sets, + .partitions = partitions, + .line_size = line_size, + .flags = flags | CPUINFO_CACHE_UNIFIED, + .apic_bits = apic_bits + }; + break; + case cache_type_data: + cache->l1d = (struct cpuinfo_x86_cache) { + .size = associativity * partitions * line_size * sets, + .associativity = associativity, + .sets = sets, + .partitions = partitions, + .line_size = line_size, + .flags = flags, + .apic_bits = apic_bits + }; + break; + case cache_type_instruction: + cache->l1i = (struct cpuinfo_x86_cache) { + .size = associativity * partitions * line_size * sets, + .associativity = associativity, + .sets = sets, + .partitions = partitions, + .line_size = line_size, + .flags = flags, + .apic_bits = apic_bits + }; + break; + } + break; + case 2: + switch (type) { + case cache_type_instruction: + cpuinfo_log_warning("unexpected L2 instruction cache reported in leaf 0x00000004 is ignored"); + break; + case cache_type_unified: + flags |= CPUINFO_CACHE_UNIFIED; + case cache_type_data: + cache->l2 = (struct cpuinfo_x86_cache) { + .size = associativity * partitions * line_size * sets, + .associativity = associativity, + .sets = sets, + .partitions = partitions, + .line_size = line_size, + .flags = flags, + .apic_bits = apic_bits + }; + break; + } + break; + case 3: + switch (type) { + case cache_type_instruction: + cpuinfo_log_warning("unexpected L3 instruction cache reported in leaf 0x00000004 is ignored"); + break; + case cache_type_unified: + flags |= CPUINFO_CACHE_UNIFIED; + case cache_type_data: + cache->l3 = (struct cpuinfo_x86_cache) { + .size = associativity * partitions * line_size * sets, + .associativity = associativity, + .sets = sets, + .partitions = partitions, + .line_size = line_size, + .flags = flags, + .apic_bits = apic_bits + }; + break; + } + break; + case 4: + switch (type) { + case cache_type_instruction: + cpuinfo_log_warning("unexpected L4 instruction cache reported in leaf 0x00000004 is ignored"); + break; + case cache_type_unified: + flags |= CPUINFO_CACHE_UNIFIED; + case cache_type_data: + cache->l4 = (struct cpuinfo_x86_cache) { + .size = associativity * partitions * line_size * sets, + .associativity = associativity, + .sets = sets, + .partitions = partitions, + .line_size = line_size, + .flags = flags, + .apic_bits = apic_bits + }; + break; + } + break; + default: + cpuinfo_log_warning("unexpected L%"PRIu32" cache reported in leaf 0x00000004 is ignored", level); + break; + } + return true; +} + + +bool cpuinfo_x86_decode_cache_properties( + struct cpuid_regs regs, + struct cpuinfo_x86_caches* cache) +{ + const uint32_t type = regs.eax & UINT32_C(0x1F); + if (type == cache_type_none) { + return false; + } + + const uint32_t level = (regs.eax >> 5) & UINT32_C(0x7); + const uint32_t cores = 1 + ((regs.eax >> 14) & UINT32_C(0x00000FFF)); + const uint32_t apic_bits = bit_length(cores); + + const uint32_t sets = 1 + regs.ecx; + const uint32_t line_size = 1 + (regs.ebx & UINT32_C(0x00000FFF)); + const uint32_t partitions = 1 + ((regs.ebx >> 12) & UINT32_C(0x000003FF)); + const uint32_t associativity = 1 + (regs.ebx >> 22); + + uint32_t flags = 0; + if (regs.edx & UINT32_C(0x00000002)) { + flags |= CPUINFO_CACHE_INCLUSIVE; + } + + switch (level) { + case 1: + switch (type) { + case cache_type_unified: + cache->l1d = cache->l1i = (struct cpuinfo_x86_cache) { + .size = associativity * partitions * line_size * sets, + .associativity = associativity, + .sets = sets, + .partitions = partitions, + .line_size = line_size, + .flags = flags | CPUINFO_CACHE_UNIFIED, + .apic_bits = apic_bits + }; + break; + case cache_type_data: + cache->l1d = (struct cpuinfo_x86_cache) { + .size = associativity * partitions * line_size * sets, + .associativity = associativity, + .sets = sets, + .partitions = partitions, + .line_size = line_size, + .flags = flags, + .apic_bits = apic_bits + }; + break; + case cache_type_instruction: + cache->l1i = (struct cpuinfo_x86_cache) { + .size = associativity * partitions * line_size * sets, + .associativity = associativity, + .sets = sets, + .partitions = partitions, + .line_size = line_size, + .flags = flags, + .apic_bits = apic_bits + }; + break; + } + break; + case 2: + switch (type) { + case cache_type_instruction: + cpuinfo_log_warning("unexpected L2 instruction cache reported in leaf 0x8000001D is ignored"); + break; + case cache_type_unified: + flags |= CPUINFO_CACHE_UNIFIED; + case cache_type_data: + cache->l2 = (struct cpuinfo_x86_cache) { + .size = associativity * partitions * line_size * sets, + .associativity = associativity, + .sets = sets, + .partitions = partitions, + .line_size = line_size, + .flags = flags, + .apic_bits = apic_bits + }; + break; + } + break; + case 3: + switch (type) { + case cache_type_instruction: + cpuinfo_log_warning("unexpected L3 instruction cache reported in leaf 0x8000001D is ignored"); + break; + case cache_type_unified: + flags |= CPUINFO_CACHE_UNIFIED; + case cache_type_data: + cache->l3 = (struct cpuinfo_x86_cache) { + .size = associativity * partitions * line_size * sets, + .associativity = associativity, + .sets = sets, + .partitions = partitions, + .line_size = line_size, + .flags = flags, + .apic_bits = apic_bits + }; + break; + } + break; + default: + cpuinfo_log_warning("unexpected L%"PRIu32" cache reported in leaf 0x8000001D is ignored", level); + break; + } + return true; +} diff --git a/dep/cpuinfo/src/x86/cache/init.c b/dep/cpuinfo/src/x86/cache/init.c new file mode 100644 index 000000000..dd1f1ea37 --- /dev/null +++ b/dep/cpuinfo/src/x86/cache/init.c @@ -0,0 +1,88 @@ +#include + +#include +#include +#include +#include +#include + + +union cpuinfo_x86_cache_descriptors { + struct cpuid_regs regs; + uint8_t as_bytes[16]; +}; + +enum cache_type { + cache_type_none = 0, + cache_type_data = 1, + cache_type_instruction = 2, + cache_type_unified = 3, +}; + +void cpuinfo_x86_detect_cache( + uint32_t max_base_index, uint32_t max_extended_index, + bool amd_topology_extensions, + enum cpuinfo_vendor vendor, + const struct cpuinfo_x86_model_info* model_info, + struct cpuinfo_x86_caches* cache, + struct cpuinfo_tlb* itlb_4KB, + struct cpuinfo_tlb* itlb_2MB, + struct cpuinfo_tlb* itlb_4MB, + struct cpuinfo_tlb* dtlb0_4KB, + struct cpuinfo_tlb* dtlb0_2MB, + struct cpuinfo_tlb* dtlb0_4MB, + struct cpuinfo_tlb* dtlb_4KB, + struct cpuinfo_tlb* dtlb_2MB, + struct cpuinfo_tlb* dtlb_4MB, + struct cpuinfo_tlb* dtlb_1GB, + struct cpuinfo_tlb* stlb2_4KB, + struct cpuinfo_tlb* stlb2_2MB, + struct cpuinfo_tlb* stlb2_1GB, + uint32_t* log2_package_cores_max) +{ + if (max_base_index >= 2) { + union cpuinfo_x86_cache_descriptors descriptors; + descriptors.regs = cpuid(2); + uint32_t iterations = (uint8_t) descriptors.as_bytes[0]; + if (iterations != 0) { +iterate_descriptors: + for (uint32_t i = 1 /* note: not 0 */; i < 16; i++) { + const uint8_t descriptor = descriptors.as_bytes[i]; + if (descriptor != 0) { + cpuinfo_x86_decode_cache_descriptor( + descriptor, vendor, model_info, + cache, + itlb_4KB, itlb_2MB, itlb_4MB, + dtlb0_4KB, dtlb0_2MB, dtlb0_4MB, + dtlb_4KB, dtlb_2MB, dtlb_4MB, dtlb_1GB, + stlb2_4KB, stlb2_2MB, stlb2_1GB, + &cache->prefetch_size); + } + } + if (--iterations != 0) { + descriptors.regs = cpuid(2); + goto iterate_descriptors; + } + } + + if (vendor != cpuinfo_vendor_amd && vendor != cpuinfo_vendor_hygon && max_base_index >= 4) { + struct cpuid_regs leaf4; + uint32_t input_ecx = 0; + uint32_t package_cores_max = 0; + do { + leaf4 = cpuidex(4, input_ecx++); + } while (cpuinfo_x86_decode_deterministic_cache_parameters( + leaf4, cache, &package_cores_max)); + if (package_cores_max != 0) { + *log2_package_cores_max = bit_length(package_cores_max); + } + } + } + if (amd_topology_extensions && max_extended_index >= UINT32_C(0x8000001D)) { + struct cpuid_regs leaf0x8000001D; + uint32_t input_ecx = 0; + do { + leaf0x8000001D = cpuidex(UINT32_C(0x8000001D), input_ecx++); + } while (cpuinfo_x86_decode_cache_properties(leaf0x8000001D, cache)); + } +} diff --git a/dep/cpuinfo/src/x86/cpuid.h b/dep/cpuinfo/src/x86/cpuid.h new file mode 100644 index 000000000..9e9e01318 --- /dev/null +++ b/dep/cpuinfo/src/x86/cpuid.h @@ -0,0 +1,79 @@ +#pragma once +#include + +#if defined(__GNUC__) + #include +#elif defined(_MSC_VER) + #include +#endif + +#if CPUINFO_MOCK + #include +#endif +#include + + +#if defined(__GNUC__) || defined(_MSC_VER) + static inline struct cpuid_regs cpuid(uint32_t eax) { + #if CPUINFO_MOCK + uint32_t regs_array[4]; + cpuinfo_mock_get_cpuid(eax, regs_array); + return (struct cpuid_regs) { + .eax = regs_array[0], + .ebx = regs_array[1], + .ecx = regs_array[2], + .edx = regs_array[3], + }; + #else + struct cpuid_regs regs; + #if defined(__GNUC__) + __cpuid(eax, regs.eax, regs.ebx, regs.ecx, regs.edx); + #else + int regs_array[4]; + __cpuid(regs_array, (int) eax); + regs.eax = regs_array[0]; + regs.ebx = regs_array[1]; + regs.ecx = regs_array[2]; + regs.edx = regs_array[3]; + #endif + return regs; + #endif + } + + static inline struct cpuid_regs cpuidex(uint32_t eax, uint32_t ecx) { + #if CPUINFO_MOCK + uint32_t regs_array[4]; + cpuinfo_mock_get_cpuidex(eax, ecx, regs_array); + return (struct cpuid_regs) { + .eax = regs_array[0], + .ebx = regs_array[1], + .ecx = regs_array[2], + .edx = regs_array[3], + }; + #else + struct cpuid_regs regs; + #if defined(__GNUC__) + __cpuid_count(eax, ecx, regs.eax, regs.ebx, regs.ecx, regs.edx); + #else + int regs_array[4]; + __cpuidex(regs_array, (int) eax, (int) ecx); + regs.eax = regs_array[0]; + regs.ebx = regs_array[1]; + regs.ecx = regs_array[2]; + regs.edx = regs_array[3]; + #endif + return regs; + #endif + } +#endif + +static inline uint64_t xgetbv(uint32_t ext_ctrl_reg) { + #ifdef _MSC_VER + return (uint64_t)_xgetbv((unsigned int)ext_ctrl_reg); + #else + uint32_t lo, hi; + __asm__(".byte 0x0F, 0x01, 0xD0" : "=a" (lo), "=d" (hi) : "c" (ext_ctrl_reg)); + return ((uint64_t) hi << 32) | (uint64_t) lo; + #endif +} + diff --git a/dep/cpuinfo/src/x86/info.c b/dep/cpuinfo/src/x86/info.c new file mode 100644 index 000000000..ceb6b8457 --- /dev/null +++ b/dep/cpuinfo/src/x86/info.c @@ -0,0 +1,19 @@ +#include + +#include +#include + + +struct cpuinfo_x86_model_info cpuinfo_x86_decode_model_info(uint32_t eax) { + struct cpuinfo_x86_model_info model_info; + model_info.stepping = eax & 0xF; + model_info.base_model = (eax >> 4) & 0xF; + model_info.base_family = (eax >> 8) & 0xF; + model_info.processor_type = (eax >> 12) & 0x3; + model_info.extended_model = (eax >> 16) & 0xF; + model_info.extended_family = (eax >> 20) & 0xFF; + + model_info.family = model_info.base_family + model_info.extended_family; + model_info.model = model_info.base_model + (model_info.extended_model << 4); + return model_info; +} diff --git a/dep/cpuinfo/src/x86/init.c b/dep/cpuinfo/src/x86/init.c new file mode 100644 index 000000000..244359cdd --- /dev/null +++ b/dep/cpuinfo/src/x86/init.c @@ -0,0 +1,75 @@ +#include +#include + +#include +#include +#include +#include +#include +#include + + +struct cpuinfo_x86_isa cpuinfo_isa = { 0 }; +CPUINFO_INTERNAL uint32_t cpuinfo_x86_clflush_size = 0; + +void cpuinfo_x86_init_processor(struct cpuinfo_x86_processor* processor) { + const struct cpuid_regs leaf0 = cpuid(0); + const uint32_t max_base_index = leaf0.eax; + const enum cpuinfo_vendor vendor = processor->vendor = + cpuinfo_x86_decode_vendor(leaf0.ebx, leaf0.ecx, leaf0.edx); + + const struct cpuid_regs leaf0x80000000 = cpuid(UINT32_C(0x80000000)); + const uint32_t max_extended_index = + leaf0x80000000.eax >= UINT32_C(0x80000000) ? leaf0x80000000.eax : 0; + + const struct cpuid_regs leaf0x80000001 = max_extended_index >= UINT32_C(0x80000001) ? + cpuid(UINT32_C(0x80000001)) : (struct cpuid_regs) { 0, 0, 0, 0 }; + + if (max_base_index >= 1) { + const struct cpuid_regs leaf1 = cpuid(1); + processor->cpuid = leaf1.eax; + + const struct cpuinfo_x86_model_info model_info = cpuinfo_x86_decode_model_info(leaf1.eax); + const enum cpuinfo_uarch uarch = processor->uarch = + cpuinfo_x86_decode_uarch(vendor, &model_info); + + cpuinfo_x86_clflush_size = ((leaf1.ebx >> 8) & UINT32_C(0x000000FF)) * 8; + + /* + * Topology extensions support: + * - AMD: ecx[bit 22] in extended info (reserved bit on Intel CPUs). + */ + const bool amd_topology_extensions = !!(leaf0x80000001.ecx & UINT32_C(0x00400000)); + + cpuinfo_x86_detect_cache( + max_base_index, max_extended_index, amd_topology_extensions, vendor, &model_info, + &processor->cache, + &processor->tlb.itlb_4KB, + &processor->tlb.itlb_2MB, + &processor->tlb.itlb_4MB, + &processor->tlb.dtlb0_4KB, + &processor->tlb.dtlb0_2MB, + &processor->tlb.dtlb0_4MB, + &processor->tlb.dtlb_4KB, + &processor->tlb.dtlb_2MB, + &processor->tlb.dtlb_4MB, + &processor->tlb.dtlb_1GB, + &processor->tlb.stlb2_4KB, + &processor->tlb.stlb2_2MB, + &processor->tlb.stlb2_1GB, + &processor->topology.core_bits_length); + + cpuinfo_x86_detect_topology(max_base_index, max_extended_index, leaf1, &processor->topology); + + cpuinfo_isa = cpuinfo_x86_detect_isa(leaf1, leaf0x80000001, + max_base_index, max_extended_index, vendor, uarch); + } + if (max_extended_index >= UINT32_C(0x80000004)) { + struct cpuid_regs brand_string[3]; + for (uint32_t i = 0; i < 3; i++) { + brand_string[i] = cpuid(UINT32_C(0x80000002) + i); + } + memcpy(processor->brand_string, brand_string, sizeof(processor->brand_string)); + cpuinfo_log_debug("raw CPUID brand string: \"%48s\"", processor->brand_string); + } +} diff --git a/dep/cpuinfo/src/x86/isa.c b/dep/cpuinfo/src/x86/isa.c new file mode 100644 index 000000000..f2e5a281b --- /dev/null +++ b/dep/cpuinfo/src/x86/isa.c @@ -0,0 +1,724 @@ +#include +#include +#include + +#include +#include + + +#if CPUINFO_ARCH_X86 + #ifdef _MSC_VER + #pragma pack(push, 2) + #endif + struct fxsave_region { + uint16_t fpu_control_word; + uint16_t fpu_status_word; + uint16_t fpu_tag_word; + uint16_t fpu_opcode; + uint32_t fpu_instruction_pointer_offset; + uint32_t fpu_instruction_pointer_selector; + uint32_t fpu_operand_pointer_offset; + uint32_t fpu_operand_pointer_selector; + uint32_t mxcsr_state; + uint32_t mxcsr_mask; + uint64_t fpu_registers[8 * 2]; + uint64_t xmm_registers[8 * 2]; + uint64_t padding[28]; + } + #ifndef _MSC_VER + __attribute__((__aligned__(16), __packed__)) + #endif + ; /* end of fxsave_region structure */ + #ifdef _MSC_VER + #pragma pack(pop, 2) + #endif +#endif + + +struct cpuinfo_x86_isa cpuinfo_x86_detect_isa( + const struct cpuid_regs basic_info, const struct cpuid_regs extended_info, + uint32_t max_base_index, uint32_t max_extended_index, + enum cpuinfo_vendor vendor, enum cpuinfo_uarch uarch) +{ + struct cpuinfo_x86_isa isa = { 0 }; + + const struct cpuid_regs structured_feature_info0 = + (max_base_index >= 7) ? cpuidex(7, 0) : (struct cpuid_regs) { 0, 0, 0, 0}; + const struct cpuid_regs structured_feature_info1 = + (max_base_index >= 7) ? cpuidex(7, 1) : (struct cpuid_regs) { 0, 0, 0, 0}; + + const uint32_t processor_capacity_info_index = UINT32_C(0x80000008); + const struct cpuid_regs processor_capacity_info = + (max_extended_index >= processor_capacity_info_index) ? + cpuid(processor_capacity_info_index) : (struct cpuid_regs) { 0, 0, 0, 0 }; + + bool avx_regs = false, avx512_regs = false, mpx_regs = false; + /* + * OSXSAVE: Operating system enabled XSAVE instructions for application use: + * - Intel, AMD: ecx[bit 26] in basic info = XSAVE/XRSTOR instructions supported by a chip. + * - Intel, AMD: ecx[bit 27] in basic info = XSAVE/XRSTOR instructions enabled by OS. + */ + const uint32_t osxsave_mask = UINT32_C(0x0C000000); + if ((basic_info.ecx & osxsave_mask) == osxsave_mask) { + uint64_t xcr0_valid_bits = 0; + if (max_base_index >= 0xD) { + const struct cpuid_regs regs = cpuidex(0xD, 0); + xcr0_valid_bits = ((uint64_t) regs.edx << 32) | regs.eax; + } + + const uint64_t xfeature_enabled_mask = xgetbv(0); + + /* + * AVX registers: + * - Intel, AMD: XFEATURE_ENABLED_MASK[bit 1] for low 128 bits of ymm registers + * - Intel, AMD: XFEATURE_ENABLED_MASK[bit 2] for high 128 bits of ymm registers + */ + const uint64_t avx_regs_mask = UINT64_C(0x0000000000000006); + if ((xcr0_valid_bits & avx_regs_mask) == avx_regs_mask) { + avx_regs = (xfeature_enabled_mask & avx_regs_mask) == avx_regs_mask; + } + + /* + * AVX512 registers: + * - Intel, AMD: XFEATURE_ENABLED_MASK[bit 1] for low 128 bits of zmm registers + * - Intel, AMD: XFEATURE_ENABLED_MASK[bit 2] for bits 128-255 of zmm registers + * - Intel: XFEATURE_ENABLED_MASK[bit 5] for 8 64-bit OpMask registers (k0-k7) + * - Intel: XFEATURE_ENABLED_MASK[bit 6] for the high 256 bits of the zmm registers zmm0-zmm15 + * - Intel: XFEATURE_ENABLED_MASK[bit 7] for the 512-bit zmm registers zmm16-zmm31 + */ + const uint64_t avx512_regs_mask = UINT64_C(0x00000000000000E6); + if ((xcr0_valid_bits & avx512_regs_mask) == avx512_regs_mask) { + avx512_regs = (xfeature_enabled_mask & avx512_regs_mask) == avx512_regs_mask; + } + + /* + * MPX registers: + * - Intel: XFEATURE_ENABLED_MASK[bit 3] for BNDREGS + * - Intel: XFEATURE_ENABLED_MASK[bit 4] for BNDCSR + */ + const uint64_t mpx_regs_mask = UINT64_C(0x0000000000000018); + if ((xcr0_valid_bits & mpx_regs_mask) == mpx_regs_mask) { + mpx_regs = (xfeature_enabled_mask & mpx_regs_mask) == mpx_regs_mask; + } + } + +#if CPUINFO_ARCH_X86 + /* + * RDTSC instruction: + * - Intel, AMD: edx[bit 4] in basic info. + * - AMD: edx[bit 4] in extended info (reserved bit on Intel CPUs). + */ + isa.rdtsc = !!((basic_info.edx | extended_info.edx) & UINT32_C(0x00000010)); +#endif + + /* + * SYSENTER/SYSEXIT instructions: + * - Intel, AMD: edx[bit 11] in basic info. + */ + isa.sysenter = !!(basic_info.edx & UINT32_C(0x00000800)); + +#if CPUINFO_ARCH_X86 + /* + * SYSCALL/SYSRET instructions: + * - Intel, AMD: edx[bit 11] in extended info. + */ + isa.syscall = !!(extended_info.edx & UINT32_C(0x00000800)); +#endif + + /* + * RDMSR/WRMSR instructions: + * - Intel, AMD: edx[bit 5] in basic info. + * - AMD: edx[bit 5] in extended info (reserved bit on Intel CPUs). + */ + isa.msr = !!((basic_info.edx | extended_info.edx) & UINT32_C(0x00000020)); + + /* + * CLZERO instruction: + * - AMD: ebx[bit 0] in processor capacity info (reserved bit on Intel CPUs). + */ + isa.clzero = !!(processor_capacity_info.ebx & UINT32_C(0x00000001)); + + /* + * CLFLUSH instruction: + * - Intel, AMD: edx[bit 19] in basic info. + */ + isa.clflush = !!(basic_info.edx & UINT32_C(0x00080000)); + + /* + * CLFLUSHOPT instruction: + * - Intel: ebx[bit 23] in structured feature info (ecx = 0). + */ + isa.clflushopt = !!(structured_feature_info0.ebx & UINT32_C(0x00800000)); + + /* + * MWAIT/MONITOR instructions: + * - Intel, AMD: ecx[bit 3] in basic info. + */ + isa.mwait = !!(basic_info.ecx & UINT32_C(0x00000008)); + + /* + * MWAITX/MONITORX instructions: + * - AMD: ecx[bit 29] in extended info. + */ + isa.mwaitx = !!(extended_info.ecx & UINT32_C(0x20000000)); + + /* + * FXSAVE/FXRSTOR instructions: + * - Intel, AMD: edx[bit 24] in basic info. + * - AMD: edx[bit 24] in extended info (zero bit on Intel CPUs, EMMX bit on Cyrix CPUs). + */ + switch (vendor) { +#if CPUINFO_ARCH_X86 + case cpuinfo_vendor_cyrix: + case cpuinfo_vendor_nsc: + isa.emmx = !!(extended_info.edx & UINT32_C(0x01000000)); + break; +#endif + default: + isa.fxsave = !!((basic_info.edx | extended_info.edx) & UINT32_C(0x01000000)); + break; + } + + /* + * XSAVE/XRSTOR instructions: + * - Intel, AMD: ecx[bit 26] in basic info. + */ + isa.xsave = !!(basic_info.ecx & UINT32_C(0x04000000)); + +#if CPUINFO_ARCH_X86 + /* + * x87 FPU instructions: + * - Intel, AMD: edx[bit 0] in basic info. + * - AMD: edx[bit 0] in extended info (reserved bit on Intel CPUs). + */ + isa.fpu = !!((basic_info.edx | extended_info.edx) & UINT32_C(0x00000001)); + + /* + * MMX instructions: + * - Intel, AMD: edx[bit 23] in basic info. + * - AMD: edx[bit 23] in extended info (zero bit on Intel CPUs). + */ + isa.mmx = !!((basic_info.edx | extended_info.edx) & UINT32_C(0x00800000)); + + /* + * MMX+/Integer SSE instructions: + * - Intel, AMD: edx[bit 25] in basic info (SSE feature flag). + * - Pre-SSE AMD: edx[bit 22] in extended info (zero bit on Intel CPUs). + */ + isa.mmx_plus = !!((basic_info.edx & UINT32_C(0x02000000)) | (extended_info.edx & UINT32_C(0x00400000))); +#endif + + /* + * 3dnow! instructions: + * - AMD: edx[bit 31] of extended info (zero bit on Intel CPUs). + */ + isa.three_d_now = !!(extended_info.edx & UINT32_C(0x80000000)); + + /* + * 3dnow!+ instructions: + * - AMD: edx[bit 30] of extended info (zero bit on Intel CPUs). + */ + isa.three_d_now_plus = !!(extended_info.edx & UINT32_C(0x40000000)); + +#if CPUINFO_ARCH_X86 + /* + * 3dnow! Geode instructions: + * - No CPUID bit, detect as Geode microarchitecture + 3dnow!+ support + */ + isa.three_d_now_geode = isa.three_d_now_plus && (uarch == cpuinfo_uarch_geode); +#endif + + /* + * PREFETCH instruction: + * - AMD: ecx[bit 8] of extended info (one of 3dnow! prefetch instructions). + * On Intel this bit indicates PREFETCHW, but not PREFETCH support. + * - AMD: edx[bit 31] of extended info (implied by 3dnow! support). Reserved bit on Intel CPUs. + * - AMD: edx[bit 30] of extended info (implied by 3dnow!+ support). Reserved bit on Intel CPUs. + * - AMD: edx[bit 29] of extended info (x86-64 support). Does not imply PREFETCH support on non-AMD CPUs!!! + */ + switch (vendor) { + case cpuinfo_vendor_intel: + /* + * Instruction is not documented in the manual, + * and the 3dnow! prefetch CPUID bit indicates PREFETCHW instruction. + */ + break; + case cpuinfo_vendor_amd: + case cpuinfo_vendor_hygon: + isa.prefetch = !!((extended_info.ecx & UINT32_C(0x00000100)) | (extended_info.edx & UINT32_C(0xE0000000))); + break; + default: + /* + * Conservatively assume, that 3dnow!/3dnow!+ support implies PREFETCH support, but + * 3dnow! prefetch CPUID bit follows Intel spec (PREFETCHW, but not PREFETCH). + */ + isa.prefetch = !!(extended_info.edx & UINT32_C(0xC0000000)); + break; + } + + /* + * PREFETCHW instruction: + * - AMD: ecx[bit 8] of extended info (one of 3dnow! prefetch instructions). + * - Intel: ecx[bit 8] of extended info (PREFETCHW instruction only). + * - AMD: edx[bit 31] of extended info (implied by 3dnow! support). Reserved bit on Intel CPUs. + * - AMD: edx[bit 30] of extended info (implied by 3dnow!+ support). Reserved bit on Intel CPUs. + * - AMD: edx[bit 29] of extended info (x86-64 support). Does not imply PREFETCHW support on non-AMD CPUs!!! + */ + switch (vendor) { + case cpuinfo_vendor_amd: + case cpuinfo_vendor_hygon: + isa.prefetchw = !!((extended_info.ecx & UINT32_C(0x00000100)) | (extended_info.edx & UINT32_C(0xE0000000))); + break; + default: + /* Assume, that 3dnow!/3dnow!+ support implies PREFETCHW support, not implications from x86-64 support */ + isa.prefetchw = !!((extended_info.ecx & UINT32_C(0x00000100)) | (extended_info.edx & UINT32_C(0xC0000000))); + break; + } + + /* + * PREFETCHWT1 instruction: + * - Intel: ecx[bit 0] of structured feature info (ecx = 0). Reserved bit on AMD. + */ + isa.prefetchwt1 = !!(structured_feature_info0.ecx & UINT32_C(0x00000001)); + +#if CPUINFO_ARCH_X86 + /* + * SSE instructions: + * - Intel, AMD: edx[bit 25] in basic info. + */ + isa.sse = !!(basic_info.edx & UINT32_C(0x02000000)); + + /* + * SSE2 instructions: + * - Intel, AMD: edx[bit 26] in basic info. + */ + isa.sse2 = !!(basic_info.edx & UINT32_C(0x04000000)); +#endif + + /* + * SSE3 instructions: + * - Intel, AMD: ecx[bit 0] in basic info. + */ + isa.sse3 = !!(basic_info.ecx & UINT32_C(0x00000001)); + +#if CPUINFO_ARCH_X86 + /* + * CPUs with x86-64 or SSE3 always support DAZ (denormals-as-zero) mode. + * Only early Pentium 4 models may not support it. + */ + if (isa.sse3) { + isa.daz = true; + } else { + /* Detect DAZ support from masked MXCSR bits */ + if (isa.sse && isa.fxsave) { + struct fxsave_region region = { 0 }; + #ifdef _MSC_VER + _fxsave(®ion); + #else + __asm__ __volatile__ ("fxsave %[region];" : [region] "+m" (region)); + #endif + + /* + * Denormals-as-zero (DAZ) flag: + * - Intel, AMD: MXCSR[bit 6] + */ + isa.daz = !!(region.mxcsr_mask & UINT32_C(0x00000040)); + } + } +#endif + + /* + * SSSE3 instructions: + * - Intel, AMD: ecx[bit 9] in basic info. + */ + isa.ssse3 = !!(basic_info.ecx & UINT32_C(0x0000200)); + + + /* + * SSE4.1 instructions: + * - Intel, AMD: ecx[bit 19] in basic info. + */ + isa.sse4_1 = !!(basic_info.ecx & UINT32_C(0x00080000)); + + /* + * SSE4.2 instructions: + * - Intel: ecx[bit 20] in basic info (reserved bit on AMD CPUs). + */ + isa.sse4_2 = !!(basic_info.ecx & UINT32_C(0x00100000)); + + /* + * SSE4A instructions: + * - AMD: ecx[bit 6] in extended info (reserved bit on Intel CPUs). + */ + isa.sse4a = !!(extended_info.ecx & UINT32_C(0x00000040)); + + /* + * Misaligned memory operands in SSE instructions: + * - AMD: ecx[bit 7] in extended info (reserved bit on Intel CPUs). + */ + isa.misaligned_sse = !!(extended_info.ecx & UINT32_C(0x00000080)); + + /* + * AVX instructions: + * - Intel, AMD: ecx[bit 28] in basic info. + */ + isa.avx = avx_regs && !!(basic_info.ecx & UINT32_C(0x10000000)); + + /* + * FMA3 instructions: + * - Intel: ecx[bit 12] in basic info (reserved bit on AMD CPUs). + */ + isa.fma3 = avx_regs && !!(basic_info.ecx & UINT32_C(0x00001000)); + + /* + * FMA4 instructions: + * - AMD: ecx[bit 16] in extended info (reserved bit on Intel CPUs). + */ + isa.fma4 = avx_regs && !!(extended_info.ecx & UINT32_C(0x00010000)); + + /* + * XOP instructions: + * - AMD: ecx[bit 11] in extended info (reserved bit on Intel CPUs). + */ + isa.xop = avx_regs && !!(extended_info.ecx & UINT32_C(0x00000800)); + + /* + * F16C instructions: + * - Intel, AMD: ecx[bit 29] in basic info. + */ + isa.f16c = avx_regs && !!(basic_info.ecx & UINT32_C(0x20000000)); + + /* + * AVX2 instructions: + * - Intel: ebx[bit 5] in structured feature info (ecx = 0). + */ + isa.avx2 = avx_regs && !!(structured_feature_info0.ebx & UINT32_C(0x00000020)); + + /* + * AVX512F instructions: + * - Intel: ebx[bit 16] in structured feature info (ecx = 0). + */ + isa.avx512f = avx512_regs && !!(structured_feature_info0.ebx & UINT32_C(0x00010000)); + + /* + * AVX512PF instructions: + * - Intel: ebx[bit 26] in structured feature info (ecx = 0). + */ + isa.avx512pf = avx512_regs && !!(structured_feature_info0.ebx & UINT32_C(0x04000000)); + + /* + * AVX512ER instructions: + * - Intel: ebx[bit 27] in structured feature info (ecx = 0). + */ + isa.avx512er = avx512_regs && !!(structured_feature_info0.ebx & UINT32_C(0x08000000)); + + /* + * AVX512CD instructions: + * - Intel: ebx[bit 28] in structured feature info (ecx = 0). + */ + isa.avx512cd = avx512_regs && !!(structured_feature_info0.ebx & UINT32_C(0x10000000)); + + /* + * AVX512DQ instructions: + * - Intel: ebx[bit 17] in structured feature info (ecx = 0). + */ + isa.avx512dq = avx512_regs && !!(structured_feature_info0.ebx & UINT32_C(0x00020000)); + + /* + * AVX512BW instructions: + * - Intel: ebx[bit 30] in structured feature info (ecx = 0). + */ + isa.avx512bw = avx512_regs && !!(structured_feature_info0.ebx & UINT32_C(0x40000000)); + + /* + * AVX512VL instructions: + * - Intel: ebx[bit 31] in structured feature info (ecx = 0). + */ + isa.avx512vl = avx512_regs && !!(structured_feature_info0.ebx & UINT32_C(0x80000000)); + + /* + * AVX512IFMA instructions: + * - Intel: ebx[bit 21] in structured feature info (ecx = 0). + */ + isa.avx512ifma = avx512_regs && !!(structured_feature_info0.ebx & UINT32_C(0x00200000)); + + /* + * AVX512VBMI instructions: + * - Intel: ecx[bit 1] in structured feature info (ecx = 0). + */ + isa.avx512vbmi = avx512_regs && !!(structured_feature_info0.ecx & UINT32_C(0x00000002)); + + /* + * AVX512VBMI2 instructions: + * - Intel: ecx[bit 6] in structured feature info (ecx = 0). + */ + isa.avx512vbmi2 = avx512_regs && !!(structured_feature_info0.ecx & UINT32_C(0x00000040)); + + /* + * AVX512BITALG instructions: + * - Intel: ecx[bit 12] in structured feature info (ecx = 0). + */ + isa.avx512bitalg = avx512_regs && !!(structured_feature_info0.ecx & UINT32_C(0x00001000)); + + /* + * AVX512VPOPCNTDQ instructions: + * - Intel: ecx[bit 14] in structured feature info (ecx = 0). + */ + isa.avx512vpopcntdq = avx512_regs && !!(structured_feature_info0.ecx & UINT32_C(0x00004000)); + + /* + * AVX512VNNI instructions: + * - Intel: ecx[bit 11] in structured feature info (ecx = 0). + */ + isa.avx512vnni = avx512_regs && !!(structured_feature_info0.ecx & UINT32_C(0x00000800)); + + /* + * AVX512_4VNNIW instructions: + * - Intel: edx[bit 2] in structured feature info (ecx = 0). + */ + isa.avx512_4vnniw = avx512_regs && !!(structured_feature_info0.edx & UINT32_C(0x00000004)); + + /* + * AVX512_4FMAPS instructions: + * - Intel: edx[bit 3] in structured feature info (ecx = 0). + */ + isa.avx512_4fmaps = avx512_regs && !!(structured_feature_info0.edx & UINT32_C(0x00000008)); + + /* + * AVX512_VP2INTERSECT instructions: + * - Intel: edx[bit 8] in structured feature info (ecx = 0). + */ + isa.avx512vp2intersect = avx512_regs && !!(structured_feature_info0.edx & UINT32_C(0x00000100)); + + /* + * AVX512_BF16 instructions: + * - Intel: eax[bit 5] in structured feature info (ecx = 1). + */ + isa.avx512bf16 = avx512_regs && !!(structured_feature_info1.eax & UINT32_C(0x00000020)); + + /* + * HLE instructions: + * - Intel: ebx[bit 4] in structured feature info (ecx = 0). + */ + isa.hle = !!(structured_feature_info0.ebx & UINT32_C(0x00000010)); + + /* + * RTM instructions: + * - Intel: ebx[bit 11] in structured feature info (ecx = 0). + */ + isa.rtm = !!(structured_feature_info0.ebx & UINT32_C(0x00000800)); + + /* + * XTEST instruction: + * - Intel: either HLE or RTM is supported + */ + isa.xtest = isa.hle || isa.rtm; + + /* + * MPX registers and instructions: + * - Intel: ebx[bit 14] in structured feature info (ecx = 0). + */ + isa.mpx = mpx_regs && !!(structured_feature_info0.ebx & UINT32_C(0x00004000)); + +#if CPUINFO_ARCH_X86 + /* + * CMOV instructions: + * - Intel, AMD: edx[bit 15] in basic info. + * - AMD: edx[bit 15] in extended info (zero bit on Intel CPUs). + */ + isa.cmov = !!((basic_info.edx | extended_info.edx) & UINT32_C(0x00008000)); + + /* + * CMPXCHG8B instruction: + * - Intel, AMD: edx[bit 8] in basic info. + * - AMD: edx[bit 8] in extended info (reserved bit on Intel CPUs). + */ + isa.cmpxchg8b = !!((basic_info.edx | extended_info.edx) & UINT32_C(0x00000100)); +#endif + + /* + * CMPXCHG16B instruction: + * - Intel, AMD: ecx[bit 13] in basic info. + */ + isa.cmpxchg16b = !!(basic_info.ecx & UINT32_C(0x00002000)); + + /* + * CLWB instruction: + * - Intel: ebx[bit 24] in structured feature info (ecx = 0). + */ + isa.clwb = !!(structured_feature_info0.ebx & UINT32_C(0x01000000)); + + /* + * MOVBE instruction: + * - Intel: ecx[bit 22] in basic info. + */ + isa.movbe = !!(basic_info.ecx & UINT32_C(0x00400000)); + +#if CPUINFO_ARCH_X86_64 + /* + * Some early x86-64 CPUs lack LAHF & SAHF instructions. + * A special CPU feature bit must be checked to ensure their availability: + * - Intel, AMD: ecx[bit 0] in extended info. + */ + isa.lahf_sahf = !!(extended_info.ecx & UINT32_C(0x00000001)); +#endif + + /* + * RDFSBASE/RDGSBASE/WRFSBASE/WRGSBASE instructions. + * - Intel: ebx[bit 0] in structured feature info (ecx = 0). + */ + isa.fs_gs_base = !!(structured_feature_info0.ebx & UINT32_C(0x00000001)); + + /* + * LZCNT instruction: + * - Intel, AMD: ecx[bit 5] in extended info. + */ + isa.lzcnt = !!(extended_info.ecx & UINT32_C(0x00000020)); + + /* + * POPCNT instruction: + * - Intel, AMD: ecx[bit 23] in basic info. + */ + isa.popcnt = !!(basic_info.ecx & UINT32_C(0x00800000)); + + /* + * TBM instructions: + * - AMD: ecx[bit 21] in extended info (reserved bit on Intel CPUs). + */ + isa.tbm = !!(extended_info.ecx & UINT32_C(0x00200000)); + + /* + * BMI instructions: + * - Intel, AMD: ebx[bit 3] in structured feature info (ecx = 0). + */ + isa.bmi = !!(structured_feature_info0.ebx & UINT32_C(0x00000008)); + + /* + * BMI2 instructions: + * - Intel: ebx[bit 8] in structured feature info (ecx = 0). + */ + isa.bmi2 = !!(structured_feature_info0.ebx & UINT32_C(0x00000100)); + + /* + * ADCX/ADOX instructions: + * - Intel: ebx[bit 19] in structured feature info (ecx = 0). + */ + isa.adx = !!(structured_feature_info0.ebx & UINT32_C(0x00080000)); + + /* + * AES instructions: + * - Intel: ecx[bit 25] in basic info (reserved bit on AMD CPUs). + */ + isa.aes = !!(basic_info.ecx & UINT32_C(0x02000000)); + + /* + * VAES instructions: + * - Intel: ecx[bit 9] in structured feature info (ecx = 0). + */ + isa.vaes = !!(structured_feature_info0.ecx & UINT32_C(0x00000200)); + + /* + * PCLMULQDQ instruction: + * - Intel: ecx[bit 1] in basic info (reserved bit on AMD CPUs). + */ + isa.pclmulqdq = !!(basic_info.ecx & UINT32_C(0x00000002)); + + /* + * VPCLMULQDQ instruction: + * - Intel: ecx[bit 10] in structured feature info (ecx = 0). + */ + isa.vpclmulqdq = !!(structured_feature_info0.ecx & UINT32_C(0x00000400)); + + /* + * GFNI instructions: + * - Intel: ecx[bit 8] in structured feature info (ecx = 0). + */ + isa.gfni = !!(structured_feature_info0.ecx & UINT32_C(0x00000100)); + + /* + * RDRAND instruction: + * - Intel: ecx[bit 30] in basic info (reserved bit on AMD CPUs). + */ + isa.rdrand = !!(basic_info.ecx & UINT32_C(0x40000000)); + + /* + * RDSEED instruction: + * - Intel: ebx[bit 18] in structured feature info (ecx = 0). + */ + isa.rdseed = !!(structured_feature_info0.ebx & UINT32_C(0x00040000)); + + /* + * SHA instructions: + * - Intel: ebx[bit 29] in structured feature info (ecx = 0). + */ + isa.sha = !!(structured_feature_info0.ebx & UINT32_C(0x20000000)); + + if (vendor == cpuinfo_vendor_via) { + const struct cpuid_regs padlock_meta_info = cpuid(UINT32_C(0xC0000000)); + const uint32_t max_padlock_index = padlock_meta_info.eax; + const uint32_t padlock_info_index = UINT32_C(0xC0000001); + if (max_padlock_index >= padlock_info_index) { + const struct cpuid_regs padlock_info = cpuid(padlock_info_index); + + /* + * Padlock RNG extension: + * - VIA: edx[bit 2] in padlock info = RNG exists on chip flag. + * - VIA: edx[bit 3] in padlock info = RNG enabled by OS. + */ + const uint32_t padlock_rng_mask = UINT32_C(0x0000000C); + isa.rng = (padlock_info.edx & padlock_rng_mask) == padlock_rng_mask; + + /* + * Padlock ACE extension: + * - VIA: edx[bit 6] in padlock info = ACE exists on chip flag. + * - VIA: edx[bit 7] in padlock info = ACE enabled by OS. + */ + const uint32_t padlock_ace_mask = UINT32_C(0x000000C0); + isa.ace = (padlock_info.edx & padlock_ace_mask) == padlock_ace_mask; + + /* + * Padlock ACE 2 extension: + * - VIA: edx[bit 8] in padlock info = ACE2 exists on chip flag. + * - VIA: edx[bit 9] in padlock info = ACE 2 enabled by OS. + */ + const uint32_t padlock_ace2_mask = UINT32_C(0x00000300); + isa.ace2 = (padlock_info.edx & padlock_ace2_mask) == padlock_ace2_mask; + + /* + * Padlock PHE extension: + * - VIA: edx[bit 10] in padlock info = PHE exists on chip flag. + * - VIA: edx[bit 11] in padlock info = PHE enabled by OS. + */ + const uint32_t padlock_phe_mask = UINT32_C(0x00000C00); + isa.phe = (padlock_info.edx & padlock_phe_mask) == padlock_phe_mask; + + /* + * Padlock PMM extension: + * - VIA: edx[bit 12] in padlock info = PMM exists on chip flag. + * - VIA: edx[bit 13] in padlock info = PMM enabled by OS. + */ + const uint32_t padlock_pmm_mask = UINT32_C(0x00003000); + isa.pmm = (padlock_info.edx & padlock_pmm_mask) == padlock_pmm_mask; + } + } + + /* + * LWP instructions: + * - AMD: ecx[bit 15] in extended info (reserved bit on Intel CPUs). + */ + isa.lwp = !!(extended_info.ecx & UINT32_C(0x00008000)); + + /* + * RDTSCP instruction: + * - Intel, AMD: edx[bit 27] in extended info. + */ + isa.rdtscp = !!(extended_info.edx & UINT32_C(0x08000000)); + + /* + * RDPID instruction: + * - Intel: ecx[bit 22] in structured feature info (ecx = 0). + */ + isa.rdpid = !!(structured_feature_info0.ecx & UINT32_C(0x00400000)); + + return isa; +} diff --git a/dep/cpuinfo/src/x86/linux/api.h b/dep/cpuinfo/src/x86/linux/api.h new file mode 100644 index 000000000..1c9485b19 --- /dev/null +++ b/dep/cpuinfo/src/x86/linux/api.h @@ -0,0 +1,20 @@ +#pragma once + +#include +#include + +#include +#include +#include +#include + + +struct cpuinfo_x86_linux_processor { + uint32_t apic_id; + uint32_t linux_id; + uint32_t flags; +}; + +CPUINFO_INTERNAL bool cpuinfo_x86_linux_parse_proc_cpuinfo( + uint32_t max_processors_count, + struct cpuinfo_x86_linux_processor processors[restrict static max_processors_count]); diff --git a/dep/cpuinfo/src/x86/linux/cpuinfo.c b/dep/cpuinfo/src/x86/linux/cpuinfo.c new file mode 100644 index 000000000..90ff81435 --- /dev/null +++ b/dep/cpuinfo/src/x86/linux/cpuinfo.c @@ -0,0 +1,207 @@ +#include +#include +#include +#include +#include + +#include +#include +#include + +/* + * Size, in chars, of the on-stack buffer used for parsing lines of /proc/cpuinfo. + * This is also the limit on the length of a single line. + */ +#define BUFFER_SIZE 2048 + + +static uint32_t parse_processor_number( + const char* processor_start, + const char* processor_end) +{ + const size_t processor_length = (size_t) (processor_end - processor_start); + + if (processor_length == 0) { + cpuinfo_log_warning("Processor number in /proc/cpuinfo is ignored: string is empty"); + return 0; + } + + uint32_t processor_number = 0; + for (const char* digit_ptr = processor_start; digit_ptr != processor_end; digit_ptr++) { + const uint32_t digit = (uint32_t) (*digit_ptr - '0'); + if (digit > 10) { + cpuinfo_log_warning("non-decimal suffix %.*s in /proc/cpuinfo processor number is ignored", + (int) (processor_end - digit_ptr), digit_ptr); + break; + } + + processor_number = processor_number * 10 + digit; + } + + return processor_number; +} + +/* + * Decode APIC ID reported by Linux kernel for x86/x86-64 architecture. + * Example of APIC ID reported in /proc/cpuinfo: + * + * apicid : 2 + */ +static void parse_apic_id( + const char* apic_start, + const char* apic_end, + struct cpuinfo_x86_linux_processor processor[restrict static 1]) +{ + uint32_t apic_id = 0; + for (const char* digit_ptr = apic_start; digit_ptr != apic_end; digit_ptr++) { + const uint32_t digit = *digit_ptr - '0'; + if (digit >= 10) { + cpuinfo_log_warning("APIC ID %.*s in /proc/cpuinfo is ignored due to unexpected non-digit character '%c' at offset %zu", + (int) (apic_end - apic_start), apic_start, + *digit_ptr, (size_t) (digit_ptr - apic_start)); + return; + } + + apic_id = apic_id * 10 + digit; + } + + processor->apic_id = apic_id; + processor->flags |= CPUINFO_LINUX_FLAG_APIC_ID; +} + +struct proc_cpuinfo_parser_state { + uint32_t processor_index; + uint32_t max_processors_count; + struct cpuinfo_x86_linux_processor* processors; + struct cpuinfo_x86_linux_processor dummy_processor; +}; + +/* + * Decode a single line of /proc/cpuinfo information. + * Lines have format [ ]*:[ ] + */ +static bool parse_line( + const char* line_start, + const char* line_end, + struct proc_cpuinfo_parser_state state[restrict static 1], + uint64_t line_number) +{ + /* Empty line. Skip. */ + if (line_start == line_end) { + return true; + } + + /* Search for ':' on the line. */ + const char* separator = line_start; + for (; separator != line_end; separator++) { + if (*separator == ':') { + break; + } + } + /* Skip line if no ':' separator was found. */ + if (separator == line_end) { + cpuinfo_log_info("Line %.*s in /proc/cpuinfo is ignored: key/value separator ':' not found", + (int) (line_end - line_start), line_start); + return true; + } + + /* Skip trailing spaces in key part. */ + const char* key_end = separator; + for (; key_end != line_start; key_end--) { + if (key_end[-1] != ' ' && key_end[-1] != '\t') { + break; + } + } + /* Skip line if key contains nothing but spaces. */ + if (key_end == line_start) { + cpuinfo_log_info("Line %.*s in /proc/cpuinfo is ignored: key contains only spaces", + (int) (line_end - line_start), line_start); + return true; + } + + /* Skip leading spaces in value part. */ + const char* value_start = separator + 1; + for (; value_start != line_end; value_start++) { + if (*value_start != ' ') { + break; + } + } + /* Value part contains nothing but spaces. Skip line. */ + if (value_start == line_end) { + cpuinfo_log_info("Line %.*s in /proc/cpuinfo is ignored: value contains only spaces", + (int) (line_end - line_start), line_start); + return true; + } + + /* Skip trailing spaces in value part (if any) */ + const char* value_end = line_end; + for (; value_end != value_start; value_end--) { + if (value_end[-1] != ' ') { + break; + } + } + + const uint32_t processor_index = state->processor_index; + const uint32_t max_processors_count = state->max_processors_count; + struct cpuinfo_x86_linux_processor* processors = state->processors; + struct cpuinfo_x86_linux_processor* processor = &state->dummy_processor; + if (processor_index < max_processors_count) { + processor = &processors[processor_index]; + } + + const size_t key_length = key_end - line_start; + switch (key_length) { + case 6: + if (memcmp(line_start, "apicid", key_length) == 0) { + parse_apic_id(value_start, value_end, processor); + } else { + goto unknown; + } + break; + case 9: + if (memcmp(line_start, "processor", key_length) == 0) { + const uint32_t new_processor_index = parse_processor_number(value_start, value_end); + if (new_processor_index < processor_index) { + /* Strange: decreasing processor number */ + cpuinfo_log_warning( + "unexpectedly low processor number %"PRIu32" following processor %"PRIu32" in /proc/cpuinfo", + new_processor_index, processor_index); + } else if (new_processor_index > processor_index + 1) { + /* Strange, but common: skipped processor $(processor_index + 1) */ + cpuinfo_log_info( + "unexpectedly high processor number %"PRIu32" following processor %"PRIu32" in /proc/cpuinfo", + new_processor_index, processor_index); + } + if (new_processor_index >= max_processors_count) { + /* Log and ignore processor */ + cpuinfo_log_warning("processor %"PRIu32" in /proc/cpuinfo is ignored: index exceeds system limit %"PRIu32, + new_processor_index, max_processors_count - 1); + } else { + processors[new_processor_index].flags |= CPUINFO_LINUX_FLAG_PROC_CPUINFO; + } + state->processor_index = new_processor_index; + return true; + } else { + goto unknown; + } + break; + default: + unknown: + cpuinfo_log_debug("unknown /proc/cpuinfo key: %.*s", (int) key_length, line_start); + + } + return true; +} + +bool cpuinfo_x86_linux_parse_proc_cpuinfo( + uint32_t max_processors_count, + struct cpuinfo_x86_linux_processor processors[restrict static max_processors_count]) +{ + struct proc_cpuinfo_parser_state state = { + .processor_index = 0, + .max_processors_count = max_processors_count, + .processors = processors, + }; + return cpuinfo_linux_parse_multiline_file("/proc/cpuinfo", BUFFER_SIZE, + (cpuinfo_line_callback) parse_line, &state); +} diff --git a/dep/cpuinfo/src/x86/linux/init.c b/dep/cpuinfo/src/x86/linux/init.c new file mode 100644 index 000000000..f5657890e --- /dev/null +++ b/dep/cpuinfo/src/x86/linux/init.c @@ -0,0 +1,629 @@ +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + + +static inline uint32_t bit_mask(uint32_t bits) { + return (UINT32_C(1) << bits) - UINT32_C(1); +} + +static inline bool bitmask_all(uint32_t bitfield, uint32_t mask) { + return (bitfield & mask) == mask; +} + +static inline uint32_t min(uint32_t a, uint32_t b) { + return a < b ? a : b; +} + +static inline int cmp(uint32_t a, uint32_t b) { + return (a > b) - (a < b); +} + +static int cmp_x86_linux_processor(const void* ptr_a, const void* ptr_b) { + const struct cpuinfo_x86_linux_processor* processor_a = (const struct cpuinfo_x86_linux_processor*) ptr_a; + const struct cpuinfo_x86_linux_processor* processor_b = (const struct cpuinfo_x86_linux_processor*) ptr_b; + + /* Move usable processors towards the start of the array */ + const bool usable_a = bitmask_all(processor_a->flags, CPUINFO_LINUX_FLAG_VALID); + const bool usable_b = bitmask_all(processor_b->flags, CPUINFO_LINUX_FLAG_VALID); + if (usable_a != usable_b) { + return (int) usable_b - (int) usable_a; + } + + /* Compare based on APIC ID (i.e. processor 0 < processor 1) */ + const uint32_t id_a = processor_a->apic_id; + const uint32_t id_b = processor_b->apic_id; + return cmp(id_a, id_b); +} + +static void cpuinfo_x86_count_objects( + uint32_t linux_processors_count, + const struct cpuinfo_x86_linux_processor linux_processors[restrict static linux_processors_count], + const struct cpuinfo_x86_processor processor[restrict static 1], + uint32_t valid_processor_mask, + uint32_t llc_apic_bits, + uint32_t cores_count_ptr[restrict static 1], + uint32_t clusters_count_ptr[restrict static 1], + uint32_t packages_count_ptr[restrict static 1], + uint32_t l1i_count_ptr[restrict static 1], + uint32_t l1d_count_ptr[restrict static 1], + uint32_t l2_count_ptr[restrict static 1], + uint32_t l3_count_ptr[restrict static 1], + uint32_t l4_count_ptr[restrict static 1]) +{ + const uint32_t core_apic_mask = + ~(bit_mask(processor->topology.thread_bits_length) << processor->topology.thread_bits_offset); + const uint32_t package_apic_mask = + core_apic_mask & ~(bit_mask(processor->topology.core_bits_length) << processor->topology.core_bits_offset); + const uint32_t llc_apic_mask = ~bit_mask(llc_apic_bits); + const uint32_t cluster_apic_mask = package_apic_mask | llc_apic_mask; + + uint32_t cores_count = 0, clusters_count = 0, packages_count = 0; + uint32_t l1i_count = 0, l1d_count = 0, l2_count = 0, l3_count = 0, l4_count = 0; + uint32_t last_core_id = UINT32_MAX, last_cluster_id = UINT32_MAX, last_package_id = UINT32_MAX; + uint32_t last_l1i_id = UINT32_MAX, last_l1d_id = UINT32_MAX; + uint32_t last_l2_id = UINT32_MAX, last_l3_id = UINT32_MAX, last_l4_id = UINT32_MAX; + for (uint32_t i = 0; i < linux_processors_count; i++) { + if (bitmask_all(linux_processors[i].flags, valid_processor_mask)) { + const uint32_t apic_id = linux_processors[i].apic_id; + cpuinfo_log_debug("APID ID %"PRIu32": system processor %"PRIu32, apic_id, linux_processors[i].linux_id); + + /* All bits of APIC ID except thread ID mask */ + const uint32_t core_id = apic_id & core_apic_mask; + if (core_id != last_core_id) { + last_core_id = core_id; + cores_count++; + } + /* All bits of APIC ID except thread ID and core ID masks */ + const uint32_t package_id = apic_id & package_apic_mask; + if (package_id != last_package_id) { + last_package_id = package_id; + packages_count++; + } + /* Bits of APIC ID which are part of either LLC or package ID mask */ + const uint32_t cluster_id = apic_id & cluster_apic_mask; + if (cluster_id != last_cluster_id) { + last_cluster_id = cluster_id; + clusters_count++; + } + if (processor->cache.l1i.size != 0) { + const uint32_t l1i_id = apic_id & ~bit_mask(processor->cache.l1i.apic_bits); + if (l1i_id != last_l1i_id) { + last_l1i_id = l1i_id; + l1i_count++; + } + } + if (processor->cache.l1d.size != 0) { + const uint32_t l1d_id = apic_id & ~bit_mask(processor->cache.l1d.apic_bits); + if (l1d_id != last_l1d_id) { + last_l1d_id = l1d_id; + l1d_count++; + } + } + if (processor->cache.l2.size != 0) { + const uint32_t l2_id = apic_id & ~bit_mask(processor->cache.l2.apic_bits); + if (l2_id != last_l2_id) { + last_l2_id = l2_id; + l2_count++; + } + } + if (processor->cache.l3.size != 0) { + const uint32_t l3_id = apic_id & ~bit_mask(processor->cache.l3.apic_bits); + if (l3_id != last_l3_id) { + last_l3_id = l3_id; + l3_count++; + } + } + if (processor->cache.l4.size != 0) { + const uint32_t l4_id = apic_id & ~bit_mask(processor->cache.l4.apic_bits); + if (l4_id != last_l4_id) { + last_l4_id = l4_id; + l4_count++; + } + } + } + } + *cores_count_ptr = cores_count; + *clusters_count_ptr = clusters_count; + *packages_count_ptr = packages_count; + *l1i_count_ptr = l1i_count; + *l1d_count_ptr = l1d_count; + *l2_count_ptr = l2_count; + *l3_count_ptr = l3_count; + *l4_count_ptr = l4_count; +} + +void cpuinfo_x86_linux_init(void) { + struct cpuinfo_x86_linux_processor* x86_linux_processors = NULL; + struct cpuinfo_processor* processors = NULL; + struct cpuinfo_core* cores = NULL; + struct cpuinfo_cluster* clusters = NULL; + struct cpuinfo_package* packages = NULL; + const struct cpuinfo_processor** linux_cpu_to_processor_map = NULL; + const struct cpuinfo_core** linux_cpu_to_core_map = NULL; + struct cpuinfo_cache* l1i = NULL; + struct cpuinfo_cache* l1d = NULL; + struct cpuinfo_cache* l2 = NULL; + struct cpuinfo_cache* l3 = NULL; + struct cpuinfo_cache* l4 = NULL; + + const uint32_t max_processors_count = cpuinfo_linux_get_max_processors_count(); + cpuinfo_log_debug("system maximum processors count: %"PRIu32, max_processors_count); + + const uint32_t max_possible_processors_count = 1 + + cpuinfo_linux_get_max_possible_processor(max_processors_count); + cpuinfo_log_debug("maximum possible processors count: %"PRIu32, max_possible_processors_count); + const uint32_t max_present_processors_count = 1 + + cpuinfo_linux_get_max_present_processor(max_processors_count); + cpuinfo_log_debug("maximum present processors count: %"PRIu32, max_present_processors_count); + + uint32_t valid_processor_mask = 0; + uint32_t x86_linux_processors_count = max_processors_count; + if (max_present_processors_count != 0) { + x86_linux_processors_count = min(x86_linux_processors_count, max_present_processors_count); + valid_processor_mask = CPUINFO_LINUX_FLAG_PRESENT; + } else { + valid_processor_mask = CPUINFO_LINUX_FLAG_PROC_CPUINFO; + } + if (max_possible_processors_count != 0) { + x86_linux_processors_count = min(x86_linux_processors_count, max_possible_processors_count); + valid_processor_mask |= CPUINFO_LINUX_FLAG_POSSIBLE; + } + + x86_linux_processors = calloc(x86_linux_processors_count, sizeof(struct cpuinfo_x86_linux_processor)); + if (x86_linux_processors == NULL) { + cpuinfo_log_error( + "failed to allocate %zu bytes for descriptions of %"PRIu32" x86 logical processors", + x86_linux_processors_count * sizeof(struct cpuinfo_x86_linux_processor), + x86_linux_processors_count); + return; + } + + if (max_possible_processors_count != 0) { + cpuinfo_linux_detect_possible_processors( + x86_linux_processors_count, &x86_linux_processors->flags, + sizeof(struct cpuinfo_x86_linux_processor), + CPUINFO_LINUX_FLAG_POSSIBLE); + } + + if (max_present_processors_count != 0) { + cpuinfo_linux_detect_present_processors( + x86_linux_processors_count, &x86_linux_processors->flags, + sizeof(struct cpuinfo_x86_linux_processor), + CPUINFO_LINUX_FLAG_PRESENT); + } + + if (!cpuinfo_x86_linux_parse_proc_cpuinfo(x86_linux_processors_count, x86_linux_processors)) { + cpuinfo_log_error("failed to parse processor information from /proc/cpuinfo"); + return; + } + + for (uint32_t i = 0; i < x86_linux_processors_count; i++) { + if (bitmask_all(x86_linux_processors[i].flags, valid_processor_mask)) { + x86_linux_processors[i].flags |= CPUINFO_LINUX_FLAG_VALID; + } + } + + struct cpuinfo_x86_processor x86_processor; + memset(&x86_processor, 0, sizeof(x86_processor)); + cpuinfo_x86_init_processor(&x86_processor); + char brand_string[48]; + cpuinfo_x86_normalize_brand_string(x86_processor.brand_string, brand_string); + + uint32_t processors_count = 0; + for (uint32_t i = 0; i < x86_linux_processors_count; i++) { + if (bitmask_all(x86_linux_processors[i].flags, CPUINFO_LINUX_FLAG_VALID)) { + x86_linux_processors[i].linux_id = i; + processors_count++; + } + } + + qsort(x86_linux_processors, x86_linux_processors_count, sizeof(struct cpuinfo_x86_linux_processor), + cmp_x86_linux_processor); + + processors = calloc(processors_count, sizeof(struct cpuinfo_processor)); + if (processors == NULL) { + cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" logical processors", + processors_count * sizeof(struct cpuinfo_processor), processors_count); + goto cleanup; + } + + uint32_t llc_apic_bits = 0; + if (x86_processor.cache.l4.size != 0) { + llc_apic_bits = x86_processor.cache.l4.apic_bits; + } else if (x86_processor.cache.l3.size != 0) { + llc_apic_bits = x86_processor.cache.l3.apic_bits; + } else if (x86_processor.cache.l2.size != 0) { + llc_apic_bits = x86_processor.cache.l2.apic_bits; + } else if (x86_processor.cache.l1d.size != 0) { + llc_apic_bits = x86_processor.cache.l1d.apic_bits; + } + uint32_t packages_count = 0, clusters_count = 0, cores_count = 0; + uint32_t l1i_count = 0, l1d_count = 0, l2_count = 0, l3_count = 0, l4_count = 0; + cpuinfo_x86_count_objects( + x86_linux_processors_count, x86_linux_processors, &x86_processor, valid_processor_mask, llc_apic_bits, + &cores_count, &clusters_count, &packages_count, &l1i_count, &l1d_count, &l2_count, &l3_count, &l4_count); + + cpuinfo_log_debug("detected %"PRIu32" cores", cores_count); + cpuinfo_log_debug("detected %"PRIu32" clusters", clusters_count); + cpuinfo_log_debug("detected %"PRIu32" packages", packages_count); + cpuinfo_log_debug("detected %"PRIu32" L1I caches", l1i_count); + cpuinfo_log_debug("detected %"PRIu32" L1D caches", l1d_count); + cpuinfo_log_debug("detected %"PRIu32" L2 caches", l2_count); + cpuinfo_log_debug("detected %"PRIu32" L3 caches", l3_count); + cpuinfo_log_debug("detected %"PRIu32" L4 caches", l4_count); + + linux_cpu_to_processor_map = calloc(x86_linux_processors_count, sizeof(struct cpuinfo_processor*)); + if (linux_cpu_to_processor_map == NULL) { + cpuinfo_log_error("failed to allocate %zu bytes for mapping entries of %"PRIu32" logical processors", + x86_linux_processors_count * sizeof(struct cpuinfo_processor*), + x86_linux_processors_count); + goto cleanup; + } + + linux_cpu_to_core_map = calloc(x86_linux_processors_count, sizeof(struct cpuinfo_core*)); + if (linux_cpu_to_core_map == NULL) { + cpuinfo_log_error("failed to allocate %zu bytes for mapping entries of %"PRIu32" cores", + x86_linux_processors_count * sizeof(struct cpuinfo_core*), + x86_linux_processors_count); + goto cleanup; + } + + cores = calloc(cores_count, sizeof(struct cpuinfo_core)); + if (cores == NULL) { + cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" cores", + cores_count * sizeof(struct cpuinfo_core), cores_count); + goto cleanup; + } + + clusters = calloc(clusters_count, sizeof(struct cpuinfo_cluster)); + if (clusters == NULL) { + cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" core clusters", + clusters_count * sizeof(struct cpuinfo_cluster), clusters_count); + goto cleanup; + } + + packages = calloc(packages_count, sizeof(struct cpuinfo_package)); + if (packages == NULL) { + cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" physical packages", + packages_count * sizeof(struct cpuinfo_package), packages_count); + goto cleanup; + } + + if (l1i_count != 0) { + l1i = calloc(l1i_count, sizeof(struct cpuinfo_cache)); + if (l1i == NULL) { + cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" L1I caches", + l1i_count * sizeof(struct cpuinfo_cache), l1i_count); + goto cleanup; + } + } + if (l1d_count != 0) { + l1d = calloc(l1d_count, sizeof(struct cpuinfo_cache)); + if (l1d == NULL) { + cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" L1D caches", + l1d_count * sizeof(struct cpuinfo_cache), l1d_count); + goto cleanup; + } + } + if (l2_count != 0) { + l2 = calloc(l2_count, sizeof(struct cpuinfo_cache)); + if (l2 == NULL) { + cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" L2 caches", + l2_count * sizeof(struct cpuinfo_cache), l2_count); + goto cleanup; + } + } + if (l3_count != 0) { + l3 = calloc(l3_count, sizeof(struct cpuinfo_cache)); + if (l3 == NULL) { + cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" L3 caches", + l3_count * sizeof(struct cpuinfo_cache), l3_count); + goto cleanup; + } + } + if (l4_count != 0) { + l4 = calloc(l4_count, sizeof(struct cpuinfo_cache)); + if (l4 == NULL) { + cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" L4 caches", + l4_count * sizeof(struct cpuinfo_cache), l4_count); + goto cleanup; + } + } + + const uint32_t core_apic_mask = + ~(bit_mask(x86_processor.topology.thread_bits_length) << x86_processor.topology.thread_bits_offset); + const uint32_t package_apic_mask = + core_apic_mask & ~(bit_mask(x86_processor.topology.core_bits_length) << x86_processor.topology.core_bits_offset); + const uint32_t llc_apic_mask = ~bit_mask(llc_apic_bits); + const uint32_t cluster_apic_mask = package_apic_mask | llc_apic_mask; + + uint32_t processor_index = UINT32_MAX, core_index = UINT32_MAX, cluster_index = UINT32_MAX, package_index = UINT32_MAX; + uint32_t l1i_index = UINT32_MAX, l1d_index = UINT32_MAX, l2_index = UINT32_MAX, l3_index = UINT32_MAX, l4_index = UINT32_MAX; + uint32_t cluster_id = 0, core_id = 0, smt_id = 0; + uint32_t last_apic_core_id = UINT32_MAX, last_apic_cluster_id = UINT32_MAX, last_apic_package_id = UINT32_MAX; + uint32_t last_l1i_id = UINT32_MAX, last_l1d_id = UINT32_MAX; + uint32_t last_l2_id = UINT32_MAX, last_l3_id = UINT32_MAX, last_l4_id = UINT32_MAX; + for (uint32_t i = 0; i < x86_linux_processors_count; i++) { + if (bitmask_all(x86_linux_processors[i].flags, CPUINFO_LINUX_FLAG_VALID)) { + const uint32_t apic_id = x86_linux_processors[i].apic_id; + processor_index++; + smt_id++; + + /* All bits of APIC ID except thread ID mask */ + const uint32_t apid_core_id = apic_id & core_apic_mask; + if (apid_core_id != last_apic_core_id) { + core_index++; + core_id++; + smt_id = 0; + } + /* Bits of APIC ID which are part of either LLC or package ID mask */ + const uint32_t apic_cluster_id = apic_id & cluster_apic_mask; + if (apic_cluster_id != last_apic_cluster_id) { + cluster_index++; + cluster_id++; + } + /* All bits of APIC ID except thread ID and core ID masks */ + const uint32_t apic_package_id = apic_id & package_apic_mask; + if (apic_package_id != last_apic_package_id) { + package_index++; + core_id = 0; + cluster_id = 0; + } + + /* Initialize logical processor object */ + processors[processor_index].smt_id = smt_id; + processors[processor_index].core = cores + core_index; + processors[processor_index].cluster = clusters + cluster_index; + processors[processor_index].package = packages + package_index; + processors[processor_index].linux_id = x86_linux_processors[i].linux_id; + processors[processor_index].apic_id = x86_linux_processors[i].apic_id; + + if (apid_core_id != last_apic_core_id) { + /* new core */ + cores[core_index] = (struct cpuinfo_core) { + .processor_start = processor_index, + .processor_count = 1, + .core_id = core_id, + .cluster = clusters + cluster_index, + .package = packages + package_index, + .vendor = x86_processor.vendor, + .uarch = x86_processor.uarch, + .cpuid = x86_processor.cpuid, + }; + clusters[cluster_index].core_count += 1; + packages[package_index].core_count += 1; + last_apic_core_id = apid_core_id; + } else { + /* another logical processor on the same core */ + cores[core_index].processor_count++; + } + + if (apic_cluster_id != last_apic_cluster_id) { + /* new cluster */ + clusters[cluster_index].processor_start = processor_index; + clusters[cluster_index].processor_count = 1; + clusters[cluster_index].core_start = core_index; + clusters[cluster_index].cluster_id = cluster_id; + clusters[cluster_index].package = packages + package_index; + clusters[cluster_index].vendor = x86_processor.vendor; + clusters[cluster_index].uarch = x86_processor.uarch; + clusters[cluster_index].cpuid = x86_processor.cpuid; + packages[package_index].cluster_count += 1; + last_apic_cluster_id = apic_cluster_id; + } else { + /* another logical processor on the same cluster */ + clusters[cluster_index].processor_count++; + } + + if (apic_package_id != last_apic_package_id) { + /* new package */ + packages[package_index].processor_start = processor_index; + packages[package_index].processor_count = 1; + packages[package_index].core_start = core_index; + packages[package_index].cluster_start = cluster_index; + cpuinfo_x86_format_package_name(x86_processor.vendor, brand_string, packages[package_index].name); + last_apic_package_id = apic_package_id; + } else { + /* another logical processor on the same package */ + packages[package_index].processor_count++; + } + + linux_cpu_to_processor_map[x86_linux_processors[i].linux_id] = processors + processor_index; + linux_cpu_to_core_map[x86_linux_processors[i].linux_id] = cores + core_index; + + if (x86_processor.cache.l1i.size != 0) { + const uint32_t l1i_id = apic_id & ~bit_mask(x86_processor.cache.l1i.apic_bits); + processors[i].cache.l1i = &l1i[l1i_index]; + if (l1i_id != last_l1i_id) { + /* new cache */ + last_l1i_id = l1i_id; + l1i[++l1i_index] = (struct cpuinfo_cache) { + .size = x86_processor.cache.l1i.size, + .associativity = x86_processor.cache.l1i.associativity, + .sets = x86_processor.cache.l1i.sets, + .partitions = x86_processor.cache.l1i.partitions, + .line_size = x86_processor.cache.l1i.line_size, + .flags = x86_processor.cache.l1i.flags, + .processor_start = processor_index, + .processor_count = 1, + }; + } else { + /* another processor sharing the same cache */ + l1i[l1i_index].processor_count += 1; + } + processors[i].cache.l1i = &l1i[l1i_index]; + } else { + /* reset cache id */ + last_l1i_id = UINT32_MAX; + } + if (x86_processor.cache.l1d.size != 0) { + const uint32_t l1d_id = apic_id & ~bit_mask(x86_processor.cache.l1d.apic_bits); + processors[i].cache.l1d = &l1d[l1d_index]; + if (l1d_id != last_l1d_id) { + /* new cache */ + last_l1d_id = l1d_id; + l1d[++l1d_index] = (struct cpuinfo_cache) { + .size = x86_processor.cache.l1d.size, + .associativity = x86_processor.cache.l1d.associativity, + .sets = x86_processor.cache.l1d.sets, + .partitions = x86_processor.cache.l1d.partitions, + .line_size = x86_processor.cache.l1d.line_size, + .flags = x86_processor.cache.l1d.flags, + .processor_start = processor_index, + .processor_count = 1, + }; + } else { + /* another processor sharing the same cache */ + l1d[l1d_index].processor_count += 1; + } + processors[i].cache.l1d = &l1d[l1d_index]; + } else { + /* reset cache id */ + last_l1d_id = UINT32_MAX; + } + if (x86_processor.cache.l2.size != 0) { + const uint32_t l2_id = apic_id & ~bit_mask(x86_processor.cache.l2.apic_bits); + processors[i].cache.l2 = &l2[l2_index]; + if (l2_id != last_l2_id) { + /* new cache */ + last_l2_id = l2_id; + l2[++l2_index] = (struct cpuinfo_cache) { + .size = x86_processor.cache.l2.size, + .associativity = x86_processor.cache.l2.associativity, + .sets = x86_processor.cache.l2.sets, + .partitions = x86_processor.cache.l2.partitions, + .line_size = x86_processor.cache.l2.line_size, + .flags = x86_processor.cache.l2.flags, + .processor_start = processor_index, + .processor_count = 1, + }; + } else { + /* another processor sharing the same cache */ + l2[l2_index].processor_count += 1; + } + processors[i].cache.l2 = &l2[l2_index]; + } else { + /* reset cache id */ + last_l2_id = UINT32_MAX; + } + if (x86_processor.cache.l3.size != 0) { + const uint32_t l3_id = apic_id & ~bit_mask(x86_processor.cache.l3.apic_bits); + processors[i].cache.l3 = &l3[l3_index]; + if (l3_id != last_l3_id) { + /* new cache */ + last_l3_id = l3_id; + l3[++l3_index] = (struct cpuinfo_cache) { + .size = x86_processor.cache.l3.size, + .associativity = x86_processor.cache.l3.associativity, + .sets = x86_processor.cache.l3.sets, + .partitions = x86_processor.cache.l3.partitions, + .line_size = x86_processor.cache.l3.line_size, + .flags = x86_processor.cache.l3.flags, + .processor_start = processor_index, + .processor_count = 1, + }; + } else { + /* another processor sharing the same cache */ + l3[l3_index].processor_count += 1; + } + processors[i].cache.l3 = &l3[l3_index]; + } else { + /* reset cache id */ + last_l3_id = UINT32_MAX; + } + if (x86_processor.cache.l4.size != 0) { + const uint32_t l4_id = apic_id & ~bit_mask(x86_processor.cache.l4.apic_bits); + processors[i].cache.l4 = &l4[l4_index]; + if (l4_id != last_l4_id) { + /* new cache */ + last_l4_id = l4_id; + l4[++l4_index] = (struct cpuinfo_cache) { + .size = x86_processor.cache.l4.size, + .associativity = x86_processor.cache.l4.associativity, + .sets = x86_processor.cache.l4.sets, + .partitions = x86_processor.cache.l4.partitions, + .line_size = x86_processor.cache.l4.line_size, + .flags = x86_processor.cache.l4.flags, + .processor_start = processor_index, + .processor_count = 1, + }; + } else { + /* another processor sharing the same cache */ + l4[l4_index].processor_count += 1; + } + processors[i].cache.l4 = &l4[l4_index]; + } else { + /* reset cache id */ + last_l4_id = UINT32_MAX; + } + } + } + + /* Commit changes */ + cpuinfo_processors = processors; + cpuinfo_cores = cores; + cpuinfo_clusters = clusters; + cpuinfo_packages = packages; + cpuinfo_cache[cpuinfo_cache_level_1i] = l1i; + cpuinfo_cache[cpuinfo_cache_level_1d] = l1d; + cpuinfo_cache[cpuinfo_cache_level_2] = l2; + cpuinfo_cache[cpuinfo_cache_level_3] = l3; + cpuinfo_cache[cpuinfo_cache_level_4] = l4; + + cpuinfo_processors_count = processors_count; + cpuinfo_cores_count = cores_count; + cpuinfo_clusters_count = clusters_count; + cpuinfo_packages_count = packages_count; + cpuinfo_cache_count[cpuinfo_cache_level_1i] = l1i_count; + cpuinfo_cache_count[cpuinfo_cache_level_1d] = l1d_count; + cpuinfo_cache_count[cpuinfo_cache_level_2] = l2_count; + cpuinfo_cache_count[cpuinfo_cache_level_3] = l3_count; + cpuinfo_cache_count[cpuinfo_cache_level_4] = l4_count; + cpuinfo_max_cache_size = cpuinfo_compute_max_cache_size(&processors[0]); + + cpuinfo_global_uarch = (struct cpuinfo_uarch_info) { + .uarch = x86_processor.uarch, + .cpuid = x86_processor.cpuid, + .processor_count = processors_count, + .core_count = cores_count, + }; + + cpuinfo_linux_cpu_max = x86_linux_processors_count; + cpuinfo_linux_cpu_to_processor_map = linux_cpu_to_processor_map; + cpuinfo_linux_cpu_to_core_map = linux_cpu_to_core_map; + + __sync_synchronize(); + + cpuinfo_is_initialized = true; + + processors = NULL; + cores = NULL; + clusters = NULL; + packages = NULL; + l1i = l1d = l2 = l3 = l4 = NULL; + linux_cpu_to_processor_map = NULL; + linux_cpu_to_core_map = NULL; + +cleanup: + free(x86_linux_processors); + free(processors); + free(cores); + free(clusters); + free(packages); + free(l1i); + free(l1d); + free(l2); + free(l3); + free(l4); + free(linux_cpu_to_processor_map); + free(linux_cpu_to_core_map); +} diff --git a/dep/cpuinfo/src/x86/mach/init.c b/dep/cpuinfo/src/x86/mach/init.c new file mode 100644 index 000000000..b44d3adf5 --- /dev/null +++ b/dep/cpuinfo/src/x86/mach/init.c @@ -0,0 +1,356 @@ +#include +#include +#include + +#include +#include +#include +#include +#include + + +static inline uint32_t max(uint32_t a, uint32_t b) { + return a > b ? a : b; +} + +static inline uint32_t bit_mask(uint32_t bits) { + return (UINT32_C(1) << bits) - UINT32_C(1); +} + +void cpuinfo_x86_mach_init(void) { + struct cpuinfo_processor* processors = NULL; + struct cpuinfo_core* cores = NULL; + struct cpuinfo_cluster* clusters = NULL; + struct cpuinfo_package* packages = NULL; + struct cpuinfo_cache* l1i = NULL; + struct cpuinfo_cache* l1d = NULL; + struct cpuinfo_cache* l2 = NULL; + struct cpuinfo_cache* l3 = NULL; + struct cpuinfo_cache* l4 = NULL; + + struct cpuinfo_mach_topology mach_topology = cpuinfo_mach_detect_topology(); + processors = calloc(mach_topology.threads, sizeof(struct cpuinfo_processor)); + if (processors == NULL) { + cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" logical processors", + mach_topology.threads * sizeof(struct cpuinfo_processor), mach_topology.threads); + goto cleanup; + } + cores = calloc(mach_topology.cores, sizeof(struct cpuinfo_core)); + if (cores == NULL) { + cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" cores", + mach_topology.cores * sizeof(struct cpuinfo_core), mach_topology.cores); + goto cleanup; + } + /* On x86 cluster of cores is a physical package */ + clusters = calloc(mach_topology.packages, sizeof(struct cpuinfo_cluster)); + if (clusters == NULL) { + cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" core clusters", + mach_topology.packages * sizeof(struct cpuinfo_cluster), mach_topology.packages); + goto cleanup; + } + packages = calloc(mach_topology.packages, sizeof(struct cpuinfo_package)); + if (packages == NULL) { + cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" physical packages", + mach_topology.packages * sizeof(struct cpuinfo_package), mach_topology.packages); + goto cleanup; + } + + struct cpuinfo_x86_processor x86_processor; + memset(&x86_processor, 0, sizeof(x86_processor)); + cpuinfo_x86_init_processor(&x86_processor); + char brand_string[48]; + cpuinfo_x86_normalize_brand_string(x86_processor.brand_string, brand_string); + + const uint32_t threads_per_core = mach_topology.threads / mach_topology.cores; + const uint32_t threads_per_package = mach_topology.threads / mach_topology.packages; + const uint32_t cores_per_package = mach_topology.cores / mach_topology.packages; + for (uint32_t i = 0; i < mach_topology.packages; i++) { + clusters[i] = (struct cpuinfo_cluster) { + .processor_start = i * threads_per_package, + .processor_count = threads_per_package, + .core_start = i * cores_per_package, + .core_count = cores_per_package, + .cluster_id = 0, + .package = packages + i, + .vendor = x86_processor.vendor, + .uarch = x86_processor.uarch, + .cpuid = x86_processor.cpuid, + }; + packages[i].processor_start = i * threads_per_package; + packages[i].processor_count = threads_per_package; + packages[i].core_start = i * cores_per_package; + packages[i].core_count = cores_per_package; + packages[i].cluster_start = i; + packages[i].cluster_count = 1; + cpuinfo_x86_format_package_name(x86_processor.vendor, brand_string, packages[i].name); + } + for (uint32_t i = 0; i < mach_topology.cores; i++) { + cores[i] = (struct cpuinfo_core) { + .processor_start = i * threads_per_core, + .processor_count = threads_per_core, + .core_id = i % cores_per_package, + .cluster = clusters + i / cores_per_package, + .package = packages + i / cores_per_package, + .vendor = x86_processor.vendor, + .uarch = x86_processor.uarch, + .cpuid = x86_processor.cpuid, + }; + } + for (uint32_t i = 0; i < mach_topology.threads; i++) { + const uint32_t smt_id = i % threads_per_core; + const uint32_t core_id = i / threads_per_core; + const uint32_t package_id = i / threads_per_package; + + /* Reconstruct APIC IDs from topology components */ + const uint32_t thread_bits_mask = bit_mask(x86_processor.topology.thread_bits_length); + const uint32_t core_bits_mask = bit_mask(x86_processor.topology.core_bits_length); + const uint32_t package_bits_offset = max( + x86_processor.topology.thread_bits_offset + x86_processor.topology.thread_bits_length, + x86_processor.topology.core_bits_offset + x86_processor.topology.core_bits_length); + const uint32_t apic_id = + ((smt_id & thread_bits_mask) << x86_processor.topology.thread_bits_offset) | + ((core_id & core_bits_mask) << x86_processor.topology.core_bits_offset) | + (package_id << package_bits_offset); + cpuinfo_log_debug("reconstructed APIC ID 0x%08"PRIx32" for thread %"PRIu32, apic_id, i); + + processors[i].smt_id = smt_id; + processors[i].core = cores + i / threads_per_core; + processors[i].cluster = clusters + i / threads_per_package; + processors[i].package = packages + i / threads_per_package; + processors[i].apic_id = apic_id; + } + + uint32_t threads_per_l1 = 0, l1_count = 0; + if (x86_processor.cache.l1i.size != 0 || x86_processor.cache.l1d.size != 0) { + threads_per_l1 = mach_topology.threads_per_cache[1]; + if (threads_per_l1 == 0) { + /* Assume that threads on the same core share L1 */ + threads_per_l1 = mach_topology.threads / mach_topology.cores; + cpuinfo_log_warning("Mach kernel did not report number of threads sharing L1 cache; assume %"PRIu32, + threads_per_l1); + } + l1_count = mach_topology.threads / threads_per_l1; + cpuinfo_log_debug("detected %"PRIu32" L1 caches", l1_count); + } + + uint32_t threads_per_l2 = 0, l2_count = 0; + if (x86_processor.cache.l2.size != 0) { + threads_per_l2 = mach_topology.threads_per_cache[2]; + if (threads_per_l2 == 0) { + if (x86_processor.cache.l3.size != 0) { + /* This is not a last-level cache; assume that threads on the same core share L2 */ + threads_per_l2 = mach_topology.threads / mach_topology.cores; + } else { + /* This is a last-level cache; assume that threads on the same package share L2 */ + threads_per_l2 = mach_topology.threads / mach_topology.packages; + } + cpuinfo_log_warning("Mach kernel did not report number of threads sharing L2 cache; assume %"PRIu32, + threads_per_l2); + } + l2_count = mach_topology.threads / threads_per_l2; + cpuinfo_log_debug("detected %"PRIu32" L2 caches", l2_count); + } + + uint32_t threads_per_l3 = 0, l3_count = 0; + if (x86_processor.cache.l3.size != 0) { + threads_per_l3 = mach_topology.threads_per_cache[3]; + if (threads_per_l3 == 0) { + /* + * Assume that threads on the same package share L3. + * However, is it not necessarily the last-level cache (there may be L4 cache as well) + */ + threads_per_l3 = mach_topology.threads / mach_topology.packages; + cpuinfo_log_warning("Mach kernel did not report number of threads sharing L3 cache; assume %"PRIu32, + threads_per_l3); + } + l3_count = mach_topology.threads / threads_per_l3; + cpuinfo_log_debug("detected %"PRIu32" L3 caches", l3_count); + } + + uint32_t threads_per_l4 = 0, l4_count = 0; + if (x86_processor.cache.l4.size != 0) { + threads_per_l4 = mach_topology.threads_per_cache[4]; + if (threads_per_l4 == 0) { + /* + * Assume that all threads share this L4. + * As of now, L4 cache exists only on notebook x86 CPUs, which are single-package, + * but multi-socket systems could have shared L4 (like on IBM POWER8). + */ + threads_per_l4 = mach_topology.threads; + cpuinfo_log_warning("Mach kernel did not report number of threads sharing L4 cache; assume %"PRIu32, + threads_per_l4); + } + l4_count = mach_topology.threads / threads_per_l4; + cpuinfo_log_debug("detected %"PRIu32" L4 caches", l4_count); + } + + if (x86_processor.cache.l1i.size != 0) { + l1i = calloc(l1_count, sizeof(struct cpuinfo_cache)); + if (l1i == NULL) { + cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" L1I caches", + l1_count * sizeof(struct cpuinfo_cache), l1_count); + return; + } + for (uint32_t c = 0; c < l1_count; c++) { + l1i[c] = (struct cpuinfo_cache) { + .size = x86_processor.cache.l1i.size, + .associativity = x86_processor.cache.l1i.associativity, + .sets = x86_processor.cache.l1i.sets, + .partitions = x86_processor.cache.l1i.partitions, + .line_size = x86_processor.cache.l1i.line_size, + .flags = x86_processor.cache.l1i.flags, + .processor_start = c * threads_per_l1, + .processor_count = threads_per_l1, + }; + } + for (uint32_t t = 0; t < mach_topology.threads; t++) { + processors[t].cache.l1i = &l1i[t / threads_per_l1]; + } + } + + if (x86_processor.cache.l1d.size != 0) { + l1d = calloc(l1_count, sizeof(struct cpuinfo_cache)); + if (l1d == NULL) { + cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" L1D caches", + l1_count * sizeof(struct cpuinfo_cache), l1_count); + return; + } + for (uint32_t c = 0; c < l1_count; c++) { + l1d[c] = (struct cpuinfo_cache) { + .size = x86_processor.cache.l1d.size, + .associativity = x86_processor.cache.l1d.associativity, + .sets = x86_processor.cache.l1d.sets, + .partitions = x86_processor.cache.l1d.partitions, + .line_size = x86_processor.cache.l1d.line_size, + .flags = x86_processor.cache.l1d.flags, + .processor_start = c * threads_per_l1, + .processor_count = threads_per_l1, + }; + } + for (uint32_t t = 0; t < mach_topology.threads; t++) { + processors[t].cache.l1d = &l1d[t / threads_per_l1]; + } + } + + if (l2_count != 0) { + l2 = calloc(l2_count, sizeof(struct cpuinfo_cache)); + if (l2 == NULL) { + cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" L2 caches", + l2_count * sizeof(struct cpuinfo_cache), l2_count); + return; + } + for (uint32_t c = 0; c < l2_count; c++) { + l2[c] = (struct cpuinfo_cache) { + .size = x86_processor.cache.l2.size, + .associativity = x86_processor.cache.l2.associativity, + .sets = x86_processor.cache.l2.sets, + .partitions = x86_processor.cache.l2.partitions, + .line_size = x86_processor.cache.l2.line_size, + .flags = x86_processor.cache.l2.flags, + .processor_start = c * threads_per_l2, + .processor_count = threads_per_l2, + }; + } + for (uint32_t t = 0; t < mach_topology.threads; t++) { + processors[t].cache.l2 = &l2[t / threads_per_l2]; + } + } + + if (l3_count != 0) { + l3 = calloc(l3_count, sizeof(struct cpuinfo_cache)); + if (l3 == NULL) { + cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" L3 caches", + l3_count * sizeof(struct cpuinfo_cache), l3_count); + return; + } + for (uint32_t c = 0; c < l3_count; c++) { + l3[c] = (struct cpuinfo_cache) { + .size = x86_processor.cache.l3.size, + .associativity = x86_processor.cache.l3.associativity, + .sets = x86_processor.cache.l3.sets, + .partitions = x86_processor.cache.l3.partitions, + .line_size = x86_processor.cache.l3.line_size, + .flags = x86_processor.cache.l3.flags, + .processor_start = c * threads_per_l3, + .processor_count = threads_per_l3, + }; + } + for (uint32_t t = 0; t < mach_topology.threads; t++) { + processors[t].cache.l3 = &l3[t / threads_per_l3]; + } + } + + if (l4_count != 0) { + l4 = calloc(l4_count, sizeof(struct cpuinfo_cache)); + if (l4 == NULL) { + cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" L4 caches", + l4_count * sizeof(struct cpuinfo_cache), l4_count); + return; + } + for (uint32_t c = 0; c < l4_count; c++) { + l4[c] = (struct cpuinfo_cache) { + .size = x86_processor.cache.l4.size, + .associativity = x86_processor.cache.l4.associativity, + .sets = x86_processor.cache.l4.sets, + .partitions = x86_processor.cache.l4.partitions, + .line_size = x86_processor.cache.l4.line_size, + .flags = x86_processor.cache.l4.flags, + .processor_start = c * threads_per_l4, + .processor_count = threads_per_l4, + }; + } + for (uint32_t t = 0; t < mach_topology.threads; t++) { + processors[t].cache.l4 = &l4[t / threads_per_l4]; + } + } + + /* Commit changes */ + cpuinfo_processors = processors; + cpuinfo_cores = cores; + cpuinfo_clusters = clusters; + cpuinfo_packages = packages; + cpuinfo_cache[cpuinfo_cache_level_1i] = l1i; + cpuinfo_cache[cpuinfo_cache_level_1d] = l1d; + cpuinfo_cache[cpuinfo_cache_level_2] = l2; + cpuinfo_cache[cpuinfo_cache_level_3] = l3; + cpuinfo_cache[cpuinfo_cache_level_4] = l4; + + cpuinfo_processors_count = mach_topology.threads; + cpuinfo_cores_count = mach_topology.cores; + cpuinfo_clusters_count = mach_topology.packages; + cpuinfo_packages_count = mach_topology.packages; + cpuinfo_cache_count[cpuinfo_cache_level_1i] = l1_count; + cpuinfo_cache_count[cpuinfo_cache_level_1d] = l1_count; + cpuinfo_cache_count[cpuinfo_cache_level_2] = l2_count; + cpuinfo_cache_count[cpuinfo_cache_level_3] = l3_count; + cpuinfo_cache_count[cpuinfo_cache_level_4] = l4_count; + cpuinfo_max_cache_size = cpuinfo_compute_max_cache_size(&processors[0]); + + cpuinfo_global_uarch = (struct cpuinfo_uarch_info) { + .uarch = x86_processor.uarch, + .cpuid = x86_processor.cpuid, + .processor_count = mach_topology.threads, + .core_count = mach_topology.cores, + }; + + __sync_synchronize(); + + cpuinfo_is_initialized = true; + + processors = NULL; + cores = NULL; + clusters = NULL; + packages = NULL; + l1i = l1d = l2 = l3 = l4 = NULL; + +cleanup: + free(processors); + free(cores); + free(clusters); + free(packages); + free(l1i); + free(l1d); + free(l2); + free(l3); + free(l4); +} diff --git a/dep/cpuinfo/src/x86/mockcpuid.c b/dep/cpuinfo/src/x86/mockcpuid.c new file mode 100644 index 000000000..2631f09ba --- /dev/null +++ b/dep/cpuinfo/src/x86/mockcpuid.c @@ -0,0 +1,70 @@ +#include +#include + +#if !CPUINFO_MOCK + #error This file should be built only in mock mode +#endif + +#include + + +static struct cpuinfo_mock_cpuid* cpuinfo_mock_cpuid_data = NULL; +static uint32_t cpuinfo_mock_cpuid_entries = 0; +static uint32_t cpuinfo_mock_cpuid_leaf4_iteration = 0; + +void CPUINFO_ABI cpuinfo_mock_set_cpuid(struct cpuinfo_mock_cpuid* dump, size_t entries) { + cpuinfo_mock_cpuid_data = dump; + cpuinfo_mock_cpuid_entries = entries; +}; + +void CPUINFO_ABI cpuinfo_mock_get_cpuid(uint32_t eax, uint32_t regs[4]) { + if (eax != 4) { + cpuinfo_mock_cpuid_leaf4_iteration = 0; + } + if (cpuinfo_mock_cpuid_data != NULL && cpuinfo_mock_cpuid_entries != 0) { + if (eax == 4) { + uint32_t skip_entries = cpuinfo_mock_cpuid_leaf4_iteration; + for (uint32_t i = 0; i < cpuinfo_mock_cpuid_entries; i++) { + if (eax == cpuinfo_mock_cpuid_data[i].input_eax) { + if (skip_entries-- == 0) { + regs[0] = cpuinfo_mock_cpuid_data[i].eax; + regs[1] = cpuinfo_mock_cpuid_data[i].ebx; + regs[2] = cpuinfo_mock_cpuid_data[i].ecx; + regs[3] = cpuinfo_mock_cpuid_data[i].edx; + cpuinfo_mock_cpuid_leaf4_iteration++; + return; + } + } + } + } else { + for (uint32_t i = 0; i < cpuinfo_mock_cpuid_entries; i++) { + if (eax == cpuinfo_mock_cpuid_data[i].input_eax) { + regs[0] = cpuinfo_mock_cpuid_data[i].eax; + regs[1] = cpuinfo_mock_cpuid_data[i].ebx; + regs[2] = cpuinfo_mock_cpuid_data[i].ecx; + regs[3] = cpuinfo_mock_cpuid_data[i].edx; + return; + } + } + } + } + regs[0] = regs[1] = regs[2] = regs[3] = 0; +} + +void CPUINFO_ABI cpuinfo_mock_get_cpuidex(uint32_t eax, uint32_t ecx, uint32_t regs[4]) { + cpuinfo_mock_cpuid_leaf4_iteration = 0; + if (cpuinfo_mock_cpuid_data != NULL && cpuinfo_mock_cpuid_entries != 0) { + for (uint32_t i = 0; i < cpuinfo_mock_cpuid_entries; i++) { + if (eax == cpuinfo_mock_cpuid_data[i].input_eax && + ecx == cpuinfo_mock_cpuid_data[i].input_ecx) + { + regs[0] = cpuinfo_mock_cpuid_data[i].eax; + regs[1] = cpuinfo_mock_cpuid_data[i].ebx; + regs[2] = cpuinfo_mock_cpuid_data[i].ecx; + regs[3] = cpuinfo_mock_cpuid_data[i].edx; + return; + } + } + } + regs[0] = regs[1] = regs[2] = regs[3] = 0; +} diff --git a/dep/cpuinfo/src/x86/name.c b/dep/cpuinfo/src/x86/name.c new file mode 100644 index 000000000..82d4457d6 --- /dev/null +++ b/dep/cpuinfo/src/x86/name.c @@ -0,0 +1,708 @@ +#include +#include +#include +#include +#include + +#include +#include +#include + + +/* The state of the parser to be preserved between parsing different tokens. */ +struct parser_state { + /* + * Pointer to the start of the previous token if it is "model". + * NULL if previous token is not "model". + */ + char* context_model; + /* + * Pointer to the start of the previous token if it is a single-uppercase-letter token. + * NULL if previous token is anything different. + */ + char* context_upper_letter; + /* + * Pointer to the start of the previous token if it is "Dual". + * NULL if previous token is not "Dual". + */ + char* context_dual; + /* + * Pointer to the start of the previous token if it is "Core", "Dual-Core", "QuadCore", etc. + * NULL if previous token is anything different. + */ + char* context_core; + /* + * Pointer to the start of the previous token if it is "Eng" or "Engineering", etc. + * NULL if previous token is anything different. + */ + char* context_engineering; + /* + * Pointer to the '@' symbol in the brand string (separates frequency specification). + * NULL if there is no '@' symbol. + */ + char* frequency_separator; + /* Indicates whether the brand string (after transformations) contains frequency. */ + bool frequency_token; + /* Indicates whether the processor is of Xeon family (contains "Xeon" substring). */ + bool xeon; + /* Indicates whether the processor model number was already parsed. */ + bool parsed_model_number; + /* Indicates whether the processor is an engineering sample (contains "Engineering Sample" or "Eng Sample" substrings). */ + bool engineering_sample; +}; + +/** @brief Resets information about the previous token. Keeps all other state information. */ +static void reset_context(struct parser_state* state) { + state->context_model = NULL; + state->context_upper_letter = NULL; + state->context_dual = NULL; + state->context_core = NULL; +} + +/** + * @brief Overwrites the supplied string with space characters if it exactly matches the given string. + * @param string The string to be compared against other string, and erased in case of matching. + * @param length The length of the two string to be compared against each other. + * @param target The string to compare against. + * @retval true If the two strings match and the first supplied string was erased (overwritten with space characters). + * @retval false If the two strings are different and the first supplied string remained unchanged. + */ +static inline bool erase_matching(char* string, size_t length, const char* target) { + const bool match = memcmp(string, target, length) == 0; + if (match) { + memset(string, ' ', length); + } + return match; +} + +/** + * @brief Checks if the supplied ASCII character is an uppercase latin letter. + * @param character The character to analyse. + * @retval true If the supplied character is an uppercase latin letter ('A' to 'Z'). + * @retval false If the supplied character is anything different. + */ +static inline bool is_upper_letter(char character) { + return (uint32_t) (character - 'A') <= (uint32_t)('Z' - 'A'); +} + +/** + * @brief Checks if the supplied ASCII character is a digit. + * @param character The character to analyse. + * @retval true If the supplied character is a digit ('0' to '9'). + * @retval false If the supplied character is anything different. + */ +static inline bool is_digit(char character) { + return (uint32_t) (character - '0') < UINT32_C(10); +} + +static inline bool is_zero_number(const char* token_start, const char* token_end) { + for (const char* char_ptr = token_start; char_ptr != token_end; char_ptr++) { + if (*char_ptr != '0') { + return false; + } + } + return true; +} + +static inline bool is_space(const char* token_start, const char* token_end) { + for (const char* char_ptr = token_start; char_ptr != token_end; char_ptr++) { + if (*char_ptr != ' ') { + return false; + } + } + return true; +} + +static inline bool is_number(const char* token_start, const char* token_end) { + for (const char* char_ptr = token_start; char_ptr != token_end; char_ptr++) { + if (!is_digit(*char_ptr)) { + return false; + } + } + return true; +} + +static inline bool is_model_number(const char* token_start, const char* token_end) { + for (const char* char_ptr = token_start + 1; char_ptr < token_end; char_ptr++) { + if (is_digit(char_ptr[-1]) && is_digit(char_ptr[0])) { + return true; + } + } + return false; +} + +static inline bool is_frequency(const char* token_start, const char* token_end) { + const size_t token_length = (size_t) (token_end - token_start); + if (token_length > 3 && token_end[-2] == 'H' && token_end[-1] == 'z') { + switch (token_end[-3]) { + case 'K': + case 'M': + case 'G': + return true; + } + } + return false; +} + +/** + * @warning Input and output tokens can overlap + */ +static inline char* move_token(const char* token_start, const char* token_end, char* output_ptr) { + const size_t token_length = (size_t) (token_end - token_start); + memmove(output_ptr, token_start, token_length); + return output_ptr + token_length; +} + +static bool transform_token(char* token_start, char* token_end, struct parser_state* state) { + const struct parser_state previousState = *state; + reset_context(state); + + size_t token_length = (size_t) (token_end - token_start); + + if (state->frequency_separator != NULL) { + if (token_start > state->frequency_separator) { + if (state->parsed_model_number) { + memset(token_start, ' ', token_length); + } + } + } + + + /* Early AMD and Cyrix processors have "tm" suffix for trademark, e.g. + * "AMD-K6tm w/ multimedia extensions" + * "Cyrix MediaGXtm MMXtm Enhanced" + */ + if (token_length > 2) { + const char context_char = token_end[-3]; + if (is_digit(context_char) || is_upper_letter(context_char)) { + if (erase_matching(token_end - 2, 2, "tm")) { + token_end -= 2; + token_length -= 2; + } + } + } + if (token_length > 4) { + /* Some early AMD CPUs have "AMD-" at the beginning, e.g. + * "AMD-K5(tm) Processor" + * "AMD-K6tm w/ multimedia extensions" + * "AMD-K6(tm) 3D+ Processor" + * "AMD-K6(tm)-III Processor" + */ + if (erase_matching(token_start, 4, "AMD-")) { + token_start += 4; + token_length -= 4; + } + } + switch (token_length) { + case 1: + /* + * On some Intel processors there is a space between the first letter of + * the name and the number after it, e.g. + * "Intel(R) Core(TM) i7 CPU X 990 @ 3.47GHz" + * "Intel(R) Core(TM) CPU Q 820 @ 1.73GHz" + * We want to merge these parts together, in reverse order, i.e. "X 990" -> "990X", "820" -> "820Q" + */ + if (is_upper_letter(token_start[0])) { + state->context_upper_letter = token_start; + return true; + } + break; + case 2: + /* Erase everything after "w/" in "AMD-K6tm w/ multimedia extensions" */ + if (erase_matching(token_start, token_length, "w/")) { + return false; + } + /* + * Intel Xeon processors since Ivy Bridge use versions, e.g. + * "Intel Xeon E3-1230 v2" + * Some processor branch strings report them as "V", others report as "v". + * Normalize the former (upper-case) to the latter (lower-case) version + */ + if (token_start[0] == 'V' && is_digit(token_start[1])) { + token_start[0] = 'v'; + return true; + } + break; + case 3: + /* + * Erase "CPU" in brand string on Intel processors, e.g. + * "Intel(R) Core(TM) i5 CPU 650 @ 3.20GHz" + * "Intel(R) Xeon(R) CPU X3210 @ 2.13GHz" + * "Intel(R) Atom(TM) CPU Z2760 @ 1.80GHz" + */ + if (erase_matching(token_start, token_length, "CPU")) { + return true; + } + /* + * Erase everywhing after "SOC" on AMD System-on-Chips, e.g. + * "AMD GX-212JC SOC with Radeon(TM) R2E Graphics \0" + */ + if (erase_matching(token_start, token_length, "SOC")) { + return false; + } + /* + * Erase "AMD" in brand string on AMD processors, e.g. + * "AMD Athlon(tm) Processor" + * "AMD Engineering Sample" + * "Quad-Core AMD Opteron(tm) Processor 2344 HE" + */ + if (erase_matching(token_start, token_length, "AMD")) { + return true; + } + /* + * Erase "VIA" in brand string on VIA processors, e.g. + * "VIA C3 Ezra" + * "VIA C7-M Processor 1200MHz" + * "VIA Nano L3050@1800MHz" + */ + if (erase_matching(token_start, token_length, "VIA")) { + return true; + } + /* Erase "IDT" in brand string on early Centaur processors, e.g. "IDT WinChip 2-3D" */ + if (erase_matching(token_start, token_length, "IDT")) { + return true; + } + /* + * Erase everything starting with "MMX" in + * "Cyrix MediaGXtm MMXtm Enhanced" ("tm" suffix is removed by this point) + */ + if (erase_matching(token_start, token_length, "MMX")) { + return false; + } + /* + * Erase everything starting with "APU" on AMD processors, e.g. + * "AMD A10-4600M APU with Radeon(tm) HD Graphics" + * "AMD A10-7850K APU with Radeon(TM) R7 Graphics" + * "AMD A6-6310 APU with AMD Radeon R4 Graphics" + */ + if (erase_matching(token_start, token_length, "APU")) { + return false; + } + /* + * Remember to discard string if it contains "Eng Sample", + * e.g. "Eng Sample, ZD302046W4K43_36/30/20_2/8_A" + */ + if (memcmp(token_start, "Eng", token_length) == 0) { + state->context_engineering = token_start; + } + break; + case 4: + /* Remember to erase "Dual Core" in "AMD Athlon(tm) 64 X2 Dual Core Processor 3800+" */ + if (memcmp(token_start, "Dual", token_length) == 0) { + state->context_dual = token_start; + } + /* Remember if the processor is on Xeon family */ + if (memcmp(token_start, "Xeon", token_length) == 0) { + state->xeon = true; + } + /* Erase "Dual Core" in "AMD Athlon(tm) 64 X2 Dual Core Processor 3800+" */ + if (previousState.context_dual != NULL) { + if (memcmp(token_start, "Core", token_length) == 0) { + memset(previousState.context_dual, ' ', (size_t) (token_end - previousState.context_dual)); + state->context_core = token_end; + return true; + } + } + break; + case 5: + /* + * Erase "Intel" in brand string on Intel processors, e.g. + * "Intel(R) Xeon(R) CPU X3210 @ 2.13GHz" + * "Intel(R) Atom(TM) CPU D2700 @ 2.13GHz" + * "Genuine Intel(R) processor 800MHz" + */ + if (erase_matching(token_start, token_length, "Intel")) { + return true; + } + /* + * Erase "Cyrix" in brand string on Cyrix processors, e.g. + * "Cyrix MediaGXtm MMXtm Enhanced" + */ + if (erase_matching(token_start, token_length, "Cyrix")) { + return true; + } + /* + * Erase everything following "Geode" (but not "Geode" token itself) on Geode processors, e.g. + * "Geode(TM) Integrated Processor by AMD PCS" + * "Geode(TM) Integrated Processor by National Semi" + */ + if (memcmp(token_start, "Geode", token_length) == 0) { + return false; + } + /* Remember to erase "model unknown" in "AMD Processor model unknown" */ + if (memcmp(token_start, "model", token_length) == 0) { + state->context_model = token_start; + return true; + } + break; + case 6: + /* + * Erase everything starting with "Radeon" or "RADEON" on AMD APUs, e.g. + * "A8-7670K Radeon R7, 10 Compute Cores 4C+6G" + * "FX-8800P Radeon R7, 12 Compute Cores 4C+8G" + * "A12-9800 RADEON R7, 12 COMPUTE CORES 4C+8G" + * "A9-9410 RADEON R5, 5 COMPUTE CORES 2C+3G" + */ + if (erase_matching(token_start, token_length, "Radeon") || erase_matching(token_start, token_length, "RADEON")) { + return false; + } + /* + * Erase "Mobile" when it is not part of the processor name, + * e.g. in "AMD Turion(tm) X2 Ultra Dual-Core Mobile ZM-82" + */ + if (previousState.context_core != NULL) { + if (erase_matching(token_start, token_length, "Mobile")) { + return true; + } + } + /* Erase "family" in "Intel(R) Pentium(R) III CPU family 1266MHz" */ + if (erase_matching(token_start, token_length, "family")) { + return true; + } + /* Discard the string if it contains "Engineering Sample" */ + if (previousState.context_engineering != NULL) { + if (memcmp(token_start, "Sample", token_length) == 0) { + state->engineering_sample = true; + return false; + } + } + break; + case 7: + /* + * Erase "Geniune" in brand string on Intel engineering samples, e.g. + * "Genuine Intel(R) processor 800MHz" + * "Genuine Intel(R) CPU @ 2.13GHz" + * "Genuine Intel(R) CPU 0000 @ 1.73GHz" + */ + if (erase_matching(token_start, token_length, "Genuine")) { + return true; + } + /* + * Erase "12-core" in brand string on AMD Threadripper, e.g. + * "AMD Ryzen Threadripper 1920X 12-Core Processor" + */ + if (erase_matching(token_start, token_length, "12-Core")) { + return true; + } + /* + * Erase "16-core" in brand string on AMD Threadripper, e.g. + * "AMD Ryzen Threadripper 1950X 16-Core Processor" + */ + if (erase_matching(token_start, token_length, "16-Core")) { + return true; + } + /* Erase "model unknown" in "AMD Processor model unknown" */ + if (previousState.context_model != NULL) { + if (memcmp(token_start, "unknown", token_length) == 0) { + memset(previousState.context_model, ' ', token_end - previousState.context_model); + return true; + } + } + /* + * Discard the string if it contains "Eng Sample:" or "Eng Sample," e.g. + * "AMD Eng Sample, ZD302046W4K43_36/30/20_2/8_A" + * "AMD Eng Sample: 2D3151A2M88E4_35/31_N" + */ + if (previousState.context_engineering != NULL) { + if (memcmp(token_start, "Sample,", token_length) == 0 || memcmp(token_start, "Sample:", token_length) == 0) { + state->engineering_sample = true; + return false; + } + } + break; + case 8: + /* Erase "QuadCore" in "VIA QuadCore L4700 @ 1.2+ GHz" */ + if (erase_matching(token_start, token_length, "QuadCore")) { + state->context_core = token_end; + return true; + } + /* Erase "Six-Core" in "AMD FX(tm)-6100 Six-Core Processor" */ + if (erase_matching(token_start, token_length, "Six-Core")) { + state->context_core = token_end; + return true; + } + break; + case 9: + if (erase_matching(token_start, token_length, "Processor")) { + return true; + } + if (erase_matching(token_start, token_length, "processor")) { + return true; + } + /* Erase "Dual-Core" in "Pentium(R) Dual-Core CPU T4200 @ 2.00GHz" */ + if (erase_matching(token_start, token_length, "Dual-Core")) { + state->context_core = token_end; + return true; + } + /* Erase "Quad-Core" in AMD processors, e.g. + * "Quad-Core AMD Opteron(tm) Processor 2347 HE" + * "AMD FX(tm)-4170 Quad-Core Processor" + */ + if (erase_matching(token_start, token_length, "Quad-Core")) { + state->context_core = token_end; + return true; + } + /* Erase "Transmeta" in brand string on Transmeta processors, e.g. + * "Transmeta(tm) Crusoe(tm) Processor TM5800" + * "Transmeta Efficeon(tm) Processor TM8000" + */ + if (erase_matching(token_start, token_length, "Transmeta")) { + return true; + } + break; + case 10: + /* + * Erase "Eight-Core" in AMD processors, e.g. + * "AMD FX(tm)-8150 Eight-Core Processor" + */ + if (erase_matching(token_start, token_length, "Eight-Core")) { + state->context_core = token_end; + return true; + } + break; + case 11: + /* + * Erase "Triple-Core" in AMD processors, e.g. + * "AMD Phenom(tm) II N830 Triple-Core Processor" + * "AMD Phenom(tm) 8650 Triple-Core Processor" + */ + if (erase_matching(token_start, token_length, "Triple-Core")) { + state->context_core = token_end; + return true; + } + /* + * Remember to discard string if it contains "Engineering Sample", + * e.g. "AMD Engineering Sample" + */ + if (memcmp(token_start, "Engineering", token_length) == 0) { + state->context_engineering = token_start; + return true; + } + break; + } + if (is_zero_number(token_start, token_end)) { + memset(token_start, ' ', token_length); + return true; + } + /* On some Intel processors the last letter of the name is put before the number, + * and an additional space it added, e.g. + * "Intel(R) Core(TM) i7 CPU X 990 @ 3.47GHz" + * "Intel(R) Core(TM) CPU Q 820 @ 1.73GHz" + * "Intel(R) Core(TM) i5 CPU M 480 @ 2.67GHz" + * We fix this issue, i.e. "X 990" -> "990X", "Q 820" -> "820Q" + */ + if (previousState.context_upper_letter != 0) { + /* A single letter token followed by 2-to-5 digit letter is merged together */ + switch (token_length) { + case 2: + case 3: + case 4: + case 5: + if (is_number(token_start, token_end)) { + /* Load the previous single-letter token */ + const char letter = *previousState.context_upper_letter; + /* Erase the previous single-letter token */ + *previousState.context_upper_letter = ' '; + /* Move the current token one position to the left */ + move_token(token_start, token_end, token_start - 1); + token_start -= 1; + /* + * Add the letter on the end + * Note: accessing token_start[-1] is safe because this is not the first token + */ + token_end[-1] = letter; + } + } + } + if (state->frequency_separator != NULL) { + if (is_model_number(token_start, token_end)) { + state->parsed_model_number = true; + } + } + if (is_frequency(token_start, token_end)) { + state->frequency_token = true; + } + return true; +} + +uint32_t cpuinfo_x86_normalize_brand_string( + const char raw_name[48], + char normalized_name[48]) +{ + normalized_name[0] = '\0'; + char name[48]; + memcpy(name, raw_name, sizeof(name)); + + /* + * First find the end of the string + * Start search from the end because some brand strings contain zeroes in the middle + */ + char* name_end = &name[48]; + while (name_end[-1] == '\0') { + /* + * Adject name_end by 1 position and check that we didn't reach the start of the brand string. + * This is possible if all characters are zero. + */ + if (--name_end == name) { + /* All characters are zeros */ + return 0; + } + } + + struct parser_state parser_state = { 0 }; + + /* Now unify all whitespace characters: replace tabs and '\0' with spaces */ + { + bool inside_parentheses = false; + for (char* char_ptr = name; char_ptr != name_end; char_ptr++) { + switch (*char_ptr) { + case '(': + inside_parentheses = true; + *char_ptr = ' '; + break; + case ')': + inside_parentheses = false; + *char_ptr = ' '; + break; + case '@': + parser_state.frequency_separator = char_ptr; + case '\0': + case '\t': + *char_ptr = ' '; + break; + default: + if (inside_parentheses) { + *char_ptr = ' '; + } + } + } + } + + /* Iterate through all tokens and erase redundant parts */ + { + bool is_token = false; + char* token_start = name; + for (char* char_ptr = name; char_ptr != name_end; char_ptr++) { + if (*char_ptr == ' ') { + if (is_token) { + is_token = false; + if (!transform_token(token_start, char_ptr, &parser_state)) { + name_end = char_ptr; + break; + } + } + } else { + if (!is_token) { + is_token = true; + token_start = char_ptr; + } + } + } + if (is_token) { + transform_token(token_start, name_end, &parser_state); + } + } + + /* If this is an engineering sample, return empty string */ + if (parser_state.engineering_sample) { + return 0; + } + + /* Check if there is some string before the frequency separator. */ + if (parser_state.frequency_separator != NULL) { + if (is_space(name, parser_state.frequency_separator)) { + /* If only frequency is available, return empty string */ + return 0; + } + } + + /* Compact tokens: collapse multiple spacing into one */ + { + char* output_ptr = normalized_name; + char* token_start = name; + bool is_token = false; + bool previous_token_ends_with_dash = true; + bool current_token_starts_with_dash = false; + uint32_t token_count = 1; + for (char* char_ptr = name; char_ptr != name_end; char_ptr++) { + const char character = *char_ptr; + if (character == ' ') { + if (is_token) { + is_token = false; + if (!current_token_starts_with_dash && !previous_token_ends_with_dash) { + token_count += 1; + *output_ptr++ = ' '; + } + output_ptr = move_token(token_start, char_ptr, output_ptr); + /* Note: char_ptr[-1] exists because there is a token before this space */ + previous_token_ends_with_dash = (char_ptr[-1] == '-'); + } + } else { + if (!is_token) { + is_token = true; + token_start = char_ptr; + current_token_starts_with_dash = (character == '-'); + } + } + } + if (is_token) { + if (!current_token_starts_with_dash && !previous_token_ends_with_dash) { + token_count += 1; + *output_ptr++ = ' '; + } + output_ptr = move_token(token_start, name_end, output_ptr); + } + if (parser_state.frequency_token && token_count <= 1) { + /* The only remaining part is frequency */ + normalized_name[0] = '\0'; + return 0; + } + if (output_ptr < &normalized_name[48]) { + *output_ptr = '\0'; + } else { + normalized_name[47] = '\0'; + } + return (uint32_t) (output_ptr - normalized_name); + } +} + +static const char* vendor_string_map[] = { + [cpuinfo_vendor_intel] = "Intel", + [cpuinfo_vendor_amd] = "AMD", + [cpuinfo_vendor_via] = "VIA", + [cpuinfo_vendor_hygon] = "Hygon", + [cpuinfo_vendor_rdc] = "RDC", + [cpuinfo_vendor_dmp] = "DM&P", + [cpuinfo_vendor_transmeta] = "Transmeta", + [cpuinfo_vendor_cyrix] = "Cyrix", + [cpuinfo_vendor_rise] = "Rise", + [cpuinfo_vendor_nsc] = "NSC", + [cpuinfo_vendor_sis] = "SiS", + [cpuinfo_vendor_nexgen] = "NexGen", + [cpuinfo_vendor_umc] = "UMC", +}; + +uint32_t cpuinfo_x86_format_package_name( + enum cpuinfo_vendor vendor, + const char normalized_brand_string[48], + char package_name[CPUINFO_PACKAGE_NAME_MAX]) +{ + if (normalized_brand_string[0] == '\0') { + package_name[0] = '\0'; + return 0; + } + + const char* vendor_string = NULL; + if ((uint32_t) vendor < (uint32_t) CPUINFO_COUNT_OF(vendor_string_map)) { + vendor_string = vendor_string_map[(uint32_t) vendor]; + } + if (vendor_string == NULL) { + strncpy(package_name, normalized_brand_string, CPUINFO_PACKAGE_NAME_MAX); + package_name[CPUINFO_PACKAGE_NAME_MAX - 1] = '\0'; + return 0; + } else { + snprintf(package_name, CPUINFO_PACKAGE_NAME_MAX, + "%s %s", vendor_string, normalized_brand_string); + return (uint32_t) strlen(vendor_string) + 1; + } +} diff --git a/dep/cpuinfo/src/x86/topology.c b/dep/cpuinfo/src/x86/topology.c new file mode 100644 index 000000000..0e83d4683 --- /dev/null +++ b/dep/cpuinfo/src/x86/topology.c @@ -0,0 +1,127 @@ +#include +#include + +#include +#include +#include +#include +#include + + +enum topology_type { + topology_type_invalid = 0, + topology_type_smt = 1, + topology_type_core = 2, +}; + +void cpuinfo_x86_detect_topology( + uint32_t max_base_index, + uint32_t max_extended_index, + struct cpuid_regs leaf1, + struct cpuinfo_x86_topology* topology) +{ + /* + * HTT: indicates multi-core/hyper-threading support on this core. + * - Intel, AMD: edx[bit 28] in basic info. + */ + const bool htt = !!(leaf1.edx & UINT32_C(0x10000000)); + + uint32_t apic_id = 0; + if (htt) { + apic_id = leaf1.ebx >> 24; + bool amd_cmp_legacy = false; + if (max_extended_index >= UINT32_C(0x80000001)) { + const struct cpuid_regs leaf0x80000001 = cpuid(UINT32_C(0x80000001)); + /* + * CmpLegacy: core multi-processing legacy mode. + * - AMD: ecx[bit 1] in extended info (reserved bit on Intel CPUs). + */ + amd_cmp_legacy = !!(leaf0x80000001.ecx & UINT32_C(0x00000002)); + } + if (amd_cmp_legacy) { + if (max_extended_index >= UINT32_C(0x80000008)) { + const struct cpuid_regs leaf0x80000008 = cpuid(UINT32_C(0x80000008)); + /* + * NC: number of physical cores - 1. The number of cores in the processor is NC+1. + * - AMD: ecx[bits 0-7] in leaf 0x80000008 (reserved zero bits on Intel CPUs). + */ + const uint32_t cores_per_processor = 1 + (leaf0x80000008.ecx & UINT32_C(0x000000FF)); + topology->core_bits_length = bit_length(cores_per_processor); + cpuinfo_log_debug("HTT: APIC ID = %08"PRIx32", cores per processor = %"PRIu32, apic_id, cores_per_processor); + } else { + /* + * LogicalProcessorCount: the number of cores per processor. + * - AMD: ebx[bits 16-23] in basic info (different interpretation on Intel CPUs). + */ + const uint32_t cores_per_processor = (leaf1.ebx >> 16) & UINT32_C(0x000000FF); + if (cores_per_processor != 0) { + topology->core_bits_length = bit_length(cores_per_processor); + } + cpuinfo_log_debug("HTT: APIC ID = %08"PRIx32", cores per processor = %"PRIu32, apic_id, cores_per_processor); + } + } else { + /* + * Maximum number of addressable IDs for logical processors in this physical package. + * - Intel: ebx[bits 16-23] in basic info (different interpretation on AMD CPUs). + */ + const uint32_t logical_processors = (leaf1.ebx >> 16) & UINT32_C(0x000000FF); + if (logical_processors != 0) { + const uint32_t log2_max_logical_processors = bit_length(logical_processors); + const uint32_t log2_max_threads_per_core = log2_max_logical_processors - topology->core_bits_length; + topology->core_bits_offset = log2_max_threads_per_core; + topology->thread_bits_length = log2_max_threads_per_core; + } + cpuinfo_log_debug("HTT: APIC ID = %08"PRIx32", logical processors = %"PRIu32, apic_id, logical_processors); + } + } + + /* + * x2APIC: indicated support for x2APIC feature. + * - Intel: ecx[bit 21] in basic info (reserved bit on AMD CPUs). + */ + const bool x2apic = !!(leaf1.ecx & UINT32_C(0x00200000)); + if (x2apic && (max_base_index >= UINT32_C(0xB))) { + uint32_t level = 0; + uint32_t type; + uint32_t total_shift = 0; + topology->thread_bits_offset = topology->thread_bits_length = 0; + topology->core_bits_offset = topology->core_bits_length = 0; + do { + const struct cpuid_regs leafB = cpuidex(UINT32_C(0xB), level); + type = (leafB.ecx >> 8) & UINT32_C(0x000000FF); + const uint32_t level_shift = leafB.eax & UINT32_C(0x0000001F); + const uint32_t x2apic_id = leafB.edx; + apic_id = x2apic_id; + switch (type) { + case topology_type_invalid: + break; + case topology_type_smt: + cpuinfo_log_debug("x2 level %"PRIu32": APIC ID = %08"PRIx32", " + "type SMT, shift %"PRIu32", total shift %"PRIu32, + level, apic_id, level_shift, total_shift); + topology->thread_bits_offset = total_shift; + topology->thread_bits_length = level_shift; + break; + case topology_type_core: + cpuinfo_log_debug("x2 level %"PRIu32": APIC ID = %08"PRIx32", " + "type core, shift %"PRIu32", total shift %"PRIu32, + level, apic_id, level_shift, total_shift); + topology->core_bits_offset = total_shift; + topology->core_bits_length = level_shift; + break; + default: + cpuinfo_log_warning("unexpected topology type %"PRIu32" (offset %"PRIu32", length %"PRIu32") " + "reported in leaf 0x0000000B is ignored", type, total_shift, level_shift); + break; + } + total_shift += level_shift; + level += 1; + } while (type != 0); + cpuinfo_log_debug("x2APIC ID 0x%08"PRIx32", " + "SMT offset %"PRIu32" length %"PRIu32", core offset %"PRIu32" length %"PRIu32, apic_id, + topology->thread_bits_offset, topology->thread_bits_length, + topology->core_bits_offset, topology->core_bits_length); + } + + topology->apic_id = apic_id; +} diff --git a/dep/cpuinfo/src/x86/uarch.c b/dep/cpuinfo/src/x86/uarch.c new file mode 100644 index 000000000..370549942 --- /dev/null +++ b/dep/cpuinfo/src/x86/uarch.c @@ -0,0 +1,241 @@ +#include + +#include +#include + + +enum cpuinfo_uarch cpuinfo_x86_decode_uarch( + enum cpuinfo_vendor vendor, + const struct cpuinfo_x86_model_info* model_info) +{ + switch (vendor) { + case cpuinfo_vendor_intel: + switch (model_info->family) { +#if CPUINFO_ARCH_X86 + case 0x05: + switch (model_info->model) { + case 0x01: // Pentium (60, 66) + case 0x02: // Pentium (75, 90, 100, 120, 133, 150, 166, 200) + case 0x03: // Pentium OverDrive for Intel486-based systems + case 0x04: // Pentium MMX + return cpuinfo_uarch_p5; + case 0x09: + return cpuinfo_uarch_quark; + } + break; +#endif /* CPUINFO_ARCH_X86 */ + case 0x06: + switch (model_info->model) { + /* Mainstream cores */ +#if CPUINFO_ARCH_X86 + case 0x01: // Pentium Pro + case 0x03: // Pentium II (Klamath) and Pentium II Overdrive + case 0x05: // Pentium II (Deschutes, Tonga), Pentium II Celeron (Covington), Pentium II Xeon (Drake) + case 0x06: // Pentium II (Dixon), Pentium II Celeron (Mendocino) + case 0x07: // Pentium III (Katmai), Pentium III Xeon (Tanner) + case 0x08: // Pentium III (Coppermine), Pentium II Celeron (Coppermine-128), Pentium III Xeon (Cascades) + case 0x0A: // Pentium III Xeon (Cascades-2MB) + case 0x0B: // Pentium III (Tualatin), Pentium III Celeron (Tualatin-256) + return cpuinfo_uarch_p6; + case 0x09: // Pentium M (Banias), Pentium M Celeron (Banias-0, Banias-512) + case 0x0D: // Pentium M (Dothan), Pentium M Celeron (Dothan-512, Dothan-1024) + case 0x15: // Intel 80579 (Tolapai) + return cpuinfo_uarch_dothan; + case 0x0E: // Core Solo/Duo (Yonah), Pentium Dual-Core T2xxx (Yonah), Celeron M (Yonah-512, Yonah-1024), Dual-Core Xeon (Sossaman) + return cpuinfo_uarch_yonah; +#endif /* CPUINFO_ARCH_X86 */ + case 0x0F: // Core 2 Duo (Conroe, Conroe-2M, Merom), Core 2 Quad (Tigerton), Xeon (Woodcrest, Clovertown, Kentsfield) + case 0x16: // Celeron (Conroe-L, Merom-L), Core 2 Duo (Merom) + return cpuinfo_uarch_conroe; + case 0x17: // Core 2 Duo (Penryn-3M), Core 2 Quad (Yorkfield), Core 2 Extreme (Yorkfield), Xeon (Harpertown), Pentium Dual-Core (Penryn) + case 0x1D: // Xeon (Dunnington) + return cpuinfo_uarch_penryn; + case 0x1A: // Core iX (Bloomfield), Xeon (Gainestown) + case 0x1E: // Core iX (Lynnfield, Clarksfield) + case 0x1F: // Core iX (Havendale) + case 0x2E: // Xeon (Beckton) + case 0x25: // Core iX (Clarkdale) + case 0x2C: // Core iX (Gulftown), Xeon (Gulftown) + case 0x2F: // Xeon (Eagleton) + return cpuinfo_uarch_nehalem; + case 0x2A: // Core iX (Sandy Bridge) + case 0x2D: // Core iX (Sandy Bridge-E), Xeon (Sandy Bridge EP/EX) + return cpuinfo_uarch_sandy_bridge; + case 0x3A: // Core iX (Ivy Bridge) + case 0x3E: // Ivy Bridge-E + return cpuinfo_uarch_ivy_bridge; + case 0x3C: + case 0x3F: // Haswell-E + case 0x45: // Haswell ULT + case 0x46: // Haswell with eDRAM + return cpuinfo_uarch_haswell; + case 0x3D: // Broadwell-U + case 0x47: // Broadwell-H + case 0x4F: // Broadwell-E + case 0x56: // Broadwell-DE + return cpuinfo_uarch_broadwell; + case 0x4E: // Sky Lake Client Y/U + case 0x55: // Sky/Cascade/Cooper Lake Server + case 0x5E: // Sky Lake Client DT/H/S + case 0x8E: // Kaby/Whiskey/Amber/Comet Lake Y/U + case 0x9E: // Kaby/Coffee Lake DT/H/S + case 0xA5: // Comet Lake H/S + case 0xA6: // Comet Lake U/Y + return cpuinfo_uarch_sky_lake; + case 0x66: // Cannon Lake (Core i3-8121U) + return cpuinfo_uarch_palm_cove; + case 0x6A: // Ice Lake-DE + case 0x6C: // Ice Lake-SP + case 0x7D: // Ice Lake-Y + case 0x7E: // Ice Lake-U + return cpuinfo_uarch_sunny_cove; + + /* Low-power cores */ + case 0x1C: // Diamondville, Silverthorne, Pineview + case 0x26: // Tunnel Creek + return cpuinfo_uarch_bonnell; + case 0x27: // Medfield + case 0x35: // Cloverview + case 0x36: // Cedarview, Centerton + return cpuinfo_uarch_saltwell; + case 0x37: // Bay Trail + case 0x4A: // Merrifield + case 0x4D: // Avoton, Rangeley + case 0x5A: // Moorefield + case 0x5D: // SoFIA + return cpuinfo_uarch_silvermont; + case 0x4C: // Braswell, Cherry Trail + case 0x75: // Spreadtrum SC9853I-IA + return cpuinfo_uarch_airmont; + case 0x5C: // Apollo Lake + case 0x5F: // Denverton + return cpuinfo_uarch_goldmont; + case 0x7A: // Gemini Lake + return cpuinfo_uarch_goldmont_plus; + + /* Knights-series cores */ + case 0x57: + return cpuinfo_uarch_knights_landing; + case 0x85: + return cpuinfo_uarch_knights_mill; + } + break; + case 0x0F: + switch (model_info->model) { + case 0x00: // Pentium 4 Xeon (Foster) + case 0x01: // Pentium 4 Celeron (Willamette-128), Pentium 4 Xeon (Foster, Foster MP) + case 0x02: // Pentium 4 (Northwood), Pentium 4 EE (Gallatin), Pentium 4 Celeron (Northwood-128, Northwood-256), Pentium 4 Xeon (Gallatin DP, Prestonia) + return cpuinfo_uarch_willamette; + break; + case 0x03: // Pentium 4 (Prescott), Pentium 4 Xeon (Nocona) + case 0x04: // Pentium 4 (Prescott-2M), Pentium 4 EE (Prescott-2M), Pentium D (Smithfield), Celeron D (Prescott-256), Pentium 4 Xeon (Cranford, Irwindale, Paxville) + case 0x06: // Pentium 4 (Cedar Mill), Pentium D EE (Presler), Celeron D (Cedar Mill), Pentium 4 Xeon (Dempsey, Tulsa) + return cpuinfo_uarch_prescott; + } + break; + } + break; + case cpuinfo_vendor_amd: + switch (model_info->family) { +#if CPUINFO_ARCH_X86 + case 0x5: + switch (model_info->model) { + case 0x00: + case 0x01: + case 0x02: + return cpuinfo_uarch_k5; + case 0x06: + case 0x07: + case 0x08: + case 0x0D: + return cpuinfo_uarch_k6; + case 0x0A: + return cpuinfo_uarch_geode; + } + break; + case 0x6: + return cpuinfo_uarch_k7; +#endif /* CPUINFO_ARCH_X86 */ + case 0xF: // Opteron, Athlon 64, Sempron + case 0x11: // Turion + return cpuinfo_uarch_k8; + case 0x10: // Opteron, Phenom, Athlon, Sempron + case 0x12: // Llano APU + return cpuinfo_uarch_k10; + case 0x14: + return cpuinfo_uarch_bobcat; + case 0x15: + switch (model_info->model) { + case 0x00: // Engineering samples + case 0x01: // Zambezi, Interlagos + return cpuinfo_uarch_bulldozer; + case 0x02: // Vishera + case 0x10: // Trinity + case 0x13: // Richland + return cpuinfo_uarch_piledriver; + case 0x38: // Godavari + case 0x30: // Kaveri + return cpuinfo_uarch_steamroller; + case 0x60: // Carrizo + case 0x65: // Bristol Ridge + case 0x70: // Stoney Ridge + return cpuinfo_uarch_excavator; + default: + switch (model_info->extended_model) { + case 0x0: + return cpuinfo_uarch_bulldozer; + case 0x1: // No L3 cache + case 0x2: // With L3 cache + return cpuinfo_uarch_piledriver; + case 0x3: // With L3 cache + case 0x4: // No L3 cache + return cpuinfo_uarch_steamroller; + } + break; + } + break; + case 0x16: + if (model_info->model >= 0x03) { + return cpuinfo_uarch_puma; + } else { + return cpuinfo_uarch_jaguar; + } + case 0x17: + switch (model_info->model) { + case 0x01: // 14 nm Naples, Whitehaven, Summit Ridge, Snowy Owl + case 0x08: // 12 nm Pinnacle Ridge + case 0x11: // 14 nm Raven Ridge, Great Horned Owl + case 0x18: // 12 nm Picasso + return cpuinfo_uarch_zen; + case 0x31: // Rome, Castle Peak + case 0x60: // Renoir + case 0x68: // Lucienne + case 0x71: // Matisse + case 0x90: // Van Gogh + case 0x98: // Mero + return cpuinfo_uarch_zen2; + } + break; + case 0x19: + switch (model_info->model) { + case 0x01: // Genesis + case 0x21: // Vermeer + case 0x30: // Badami, Trento + case 0x40: // Rembrandt + case 0x50: // Cezanne + return cpuinfo_uarch_zen3; + } + break; + } + break; + case cpuinfo_vendor_hygon: + switch (model_info->family) { + case 0x00: + return cpuinfo_uarch_dhyana; + } + break; + default: + break; + } + return cpuinfo_uarch_unknown; +} diff --git a/dep/cpuinfo/src/x86/vendor.c b/dep/cpuinfo/src/x86/vendor.c new file mode 100644 index 000000000..bad50fa95 --- /dev/null +++ b/dep/cpuinfo/src/x86/vendor.c @@ -0,0 +1,189 @@ +#include + +#include +#include + + +/* Intel vendor string: "GenuineIntel" */ +#define Genu UINT32_C(0x756E6547) +#define ineI UINT32_C(0x49656E69) +#define ntel UINT32_C(0x6C65746E) + +/* AMD vendor strings: "AuthenticAMD", "AMDisbetter!", "AMD ISBETTER" */ +#define Auth UINT32_C(0x68747541) +#define enti UINT32_C(0x69746E65) +#define cAMD UINT32_C(0x444D4163) +#define AMDi UINT32_C(0x69444D41) +#define sbet UINT32_C(0x74656273) +#define ter UINT32_C(0x21726574) +#define AMD UINT32_C(0x20444D41) +#define ISBE UINT32_C(0x45425349) +#define TTER UINT32_C(0x52455454) + +/* VIA (Centaur) vendor strings: "CentaurHauls", "VIA VIA VIA " */ +#define Cent UINT32_C(0x746E6543) +#define aurH UINT32_C(0x48727561) +#define auls UINT32_C(0x736C7561) +#define VIA UINT32_C(0x20414956) + +/* Hygon vendor string: "HygonGenuine" */ +#define Hygo UINT32_C(0x6F677948) +#define nGen UINT32_C(0x6E65476E) +#define uine UINT32_C(0x656E6975) + +/* Transmeta vendor strings: "GenuineTMx86", "TransmetaCPU" */ +#define ineT UINT32_C(0x54656E69) +#define Mx86 UINT32_C(0x3638784D) +#define Tran UINT32_C(0x6E617254) +#define smet UINT32_C(0x74656D73) +#define aCPU UINT32_C(0x55504361) + +/* Cyrix vendor string: "CyrixInstead" */ +#define Cyri UINT32_C(0x69727943) +#define xIns UINT32_C(0x736E4978) +#define tead UINT32_C(0x64616574) + +/* Rise vendor string: "RiseRiseRise" */ +#define Rise UINT32_C(0x65736952) + +/* NSC vendor string: "Geode by NSC" */ +#define Geod UINT32_C(0x646F6547) +#define e_by UINT32_C(0x79622065) +#define NSC UINT32_C(0x43534E20) + +/* SiS vendor string: "SiS SiS SiS " */ +#define SiS UINT32_C(0x20536953) + +/* NexGen vendor string: "NexGenDriven" */ +#define NexG UINT32_C(0x4778654E) +#define enDr UINT32_C(0x72446E65) +#define iven UINT32_C(0x6E657669) + +/* UMC vendor string: "UMC UMC UMC " */ +#define UMC UINT32_C(0x20434D55) + +/* RDC vendor string: "Genuine RDC" */ +#define ine UINT32_C(0x20656E69) +#define RDC UINT32_C(0x43445220) + +/* D&MP vendor string: "Vortex86 SoC" */ +#define Vort UINT32_C(0x74726F56) +#define ex86 UINT32_C(0x36387865) +#define SoC UINT32_C(0x436F5320) + + +enum cpuinfo_vendor cpuinfo_x86_decode_vendor(uint32_t ebx, uint32_t ecx, uint32_t edx) { + switch (ebx) { + case Genu: + switch (edx) { + case ineI: + if (ecx == ntel) { + /* "GenuineIntel" */ + return cpuinfo_vendor_intel; + } + break; +#if CPUINFO_ARCH_X86 + case ineT: + if (ecx == Mx86) { + /* "GenuineTMx86" */ + return cpuinfo_vendor_transmeta; + } + break; + case ine: + if (ecx == RDC) { + /* "Genuine RDC" */ + return cpuinfo_vendor_rdc; + } + break; +#endif + } + break; + case Auth: + if (edx == enti && ecx == cAMD) { + /* "AuthenticAMD" */ + return cpuinfo_vendor_amd; + } + break; + case Cent: + if (edx == aurH && ecx == auls) { + /* "CentaurHauls" */ + return cpuinfo_vendor_via; + } + break; + case Hygo: + if (edx == nGen && ecx == uine) { + /* "HygonGenuine" */ + return cpuinfo_vendor_hygon; + } + break; +#if CPUINFO_ARCH_X86 + case AMDi: + if (edx == sbet && ecx == ter) { + /* "AMDisbetter!" */ + return cpuinfo_vendor_amd; + } + break; + case AMD: + if (edx == ISBE && ecx == TTER) { + /* "AMD ISBETTER" */ + return cpuinfo_vendor_amd; + } + break; + case VIA: + if (edx == VIA && ecx == VIA) { + /* "VIA VIA VIA " */ + return cpuinfo_vendor_via; + } + break; + case Tran: + if (edx == smet && ecx == aCPU) { + /* "TransmetaCPU" */ + return cpuinfo_vendor_transmeta; + } + break; + case Cyri: + if (edx == xIns && ecx == tead) { + /* "CyrixInstead" */ + return cpuinfo_vendor_cyrix; + } + break; + case Rise: + if (edx == Rise && ecx == Rise) { + /* "RiseRiseRise" */ + return cpuinfo_vendor_rise; + } + break; + case Geod: + if (edx == e_by && ecx == NSC) { + /* "Geode by NSC" */ + return cpuinfo_vendor_nsc; + } + break; + case SiS: + if (edx == SiS && ecx == SiS) { + /* "SiS SiS SiS " */ + return cpuinfo_vendor_sis; + } + break; + case NexG: + if (edx == enDr && ecx == iven) { + /* "NexGenDriven" */ + return cpuinfo_vendor_nexgen; + } + break; + case UMC: + if (edx == UMC && ecx == UMC) { + /* "UMC UMC UMC " */ + return cpuinfo_vendor_umc; + } + break; + case Vort: + if (edx == ex86 && ecx == SoC) { + /* "Vortex86 SoC" */ + return cpuinfo_vendor_dmp; + } + break; +#endif + } + return cpuinfo_vendor_unknown; +} diff --git a/dep/cpuinfo/src/x86/windows/api.h b/dep/cpuinfo/src/x86/windows/api.h new file mode 100644 index 000000000..33d917e0d --- /dev/null +++ b/dep/cpuinfo/src/x86/windows/api.h @@ -0,0 +1,41 @@ +#pragma once + +#include +#include +#include + +#include +#include + +struct cpuinfo_arm_linux_processor { + /** + * Minimum processor ID on the package which includes this logical processor. + * This value can serve as an ID for the cluster of logical processors: it is the + * same for all logical processors on the same package. + */ + uint32_t package_leader_id; + /** + * Minimum processor ID on the core which includes this logical processor. + * This value can serve as an ID for the cluster of logical processors: it is the + * same for all logical processors on the same package. + */ + /** + * Number of logical processors in the package. + */ + uint32_t package_processor_count; + /** + * Maximum frequency, in kHZ. + * The value is parsed from /sys/devices/system/cpu/cpu/cpufreq/cpuinfo_max_freq + * If failed to read or parse the file, the value is 0. + */ + uint32_t max_frequency; + /** + * Minimum frequency, in kHZ. + * The value is parsed from /sys/devices/system/cpu/cpu/cpufreq/cpuinfo_min_freq + * If failed to read or parse the file, the value is 0. + */ + uint32_t min_frequency; + /** Linux processor ID */ + uint32_t system_processor_id; + uint32_t flags; +}; diff --git a/dep/cpuinfo/src/x86/windows/init.c b/dep/cpuinfo/src/x86/windows/init.c new file mode 100644 index 000000000..274075c02 --- /dev/null +++ b/dep/cpuinfo/src/x86/windows/init.c @@ -0,0 +1,634 @@ +#include +#include +#include +#include + +#include +#include +#include +#include + +#include + +#ifdef __GNUC__ + #define CPUINFO_ALLOCA __builtin_alloca +#else + #define CPUINFO_ALLOCA _alloca +#endif + + +static inline uint32_t bit_mask(uint32_t bits) { + return (UINT32_C(1) << bits) - UINT32_C(1); +} + +static inline uint32_t low_index_from_kaffinity(KAFFINITY kaffinity) { + #if defined(_M_X64) || defined(_M_AMD64) + unsigned long index; + _BitScanForward64(&index, (unsigned __int64) kaffinity); + return (uint32_t) index; + #elif defined(_M_IX86) + unsigned long index; + _BitScanForward(&index, (unsigned long) kaffinity); + return (uint32_t) index; + #else + #error Platform-specific implementation required + #endif +} + +static void cpuinfo_x86_count_caches( + uint32_t processors_count, + const struct cpuinfo_processor* processors, + const struct cpuinfo_x86_processor* x86_processor, + uint32_t* l1i_count_ptr, + uint32_t* l1d_count_ptr, + uint32_t* l2_count_ptr, + uint32_t* l3_count_ptr, + uint32_t* l4_count_ptr) +{ + uint32_t l1i_count = 0, l1d_count = 0, l2_count = 0, l3_count = 0, l4_count = 0; + uint32_t last_l1i_id = UINT32_MAX, last_l1d_id = UINT32_MAX; + uint32_t last_l2_id = UINT32_MAX, last_l3_id = UINT32_MAX, last_l4_id = UINT32_MAX; + for (uint32_t i = 0; i < processors_count; i++) { + const uint32_t apic_id = processors[i].apic_id; + cpuinfo_log_debug("APID ID %"PRIu32": logical processor %"PRIu32, apic_id, i); + + if (x86_processor->cache.l1i.size != 0) { + const uint32_t l1i_id = apic_id & ~bit_mask(x86_processor->cache.l1i.apic_bits); + if (l1i_id != last_l1i_id) { + last_l1i_id = l1i_id; + l1i_count++; + } + } + if (x86_processor->cache.l1d.size != 0) { + const uint32_t l1d_id = apic_id & ~bit_mask(x86_processor->cache.l1d.apic_bits); + if (l1d_id != last_l1d_id) { + last_l1d_id = l1d_id; + l1d_count++; + } + } + if (x86_processor->cache.l2.size != 0) { + const uint32_t l2_id = apic_id & ~bit_mask(x86_processor->cache.l2.apic_bits); + if (l2_id != last_l2_id) { + last_l2_id = l2_id; + l2_count++; + } + } + if (x86_processor->cache.l3.size != 0) { + const uint32_t l3_id = apic_id & ~bit_mask(x86_processor->cache.l3.apic_bits); + if (l3_id != last_l3_id) { + last_l3_id = l3_id; + l3_count++; + } + } + if (x86_processor->cache.l4.size != 0) { + const uint32_t l4_id = apic_id & ~bit_mask(x86_processor->cache.l4.apic_bits); + if (l4_id != last_l4_id) { + last_l4_id = l4_id; + l4_count++; + } + } + } + *l1i_count_ptr = l1i_count; + *l1d_count_ptr = l1d_count; + *l2_count_ptr = l2_count; + *l3_count_ptr = l3_count; + *l4_count_ptr = l4_count; +} + +static bool cpuinfo_x86_windows_is_wine(void) { + HMODULE ntdll = GetModuleHandleW(L"ntdll.dll"); + if (ntdll == NULL) { + return false; + } + + return GetProcAddress(ntdll, "wine_get_version") != NULL; +} + +BOOL CALLBACK cpuinfo_x86_windows_init(PINIT_ONCE init_once, PVOID parameter, PVOID* context) { + struct cpuinfo_processor* processors = NULL; + struct cpuinfo_core* cores = NULL; + struct cpuinfo_cluster* clusters = NULL; + struct cpuinfo_package* packages = NULL; + struct cpuinfo_cache* l1i = NULL; + struct cpuinfo_cache* l1d = NULL; + struct cpuinfo_cache* l2 = NULL; + struct cpuinfo_cache* l3 = NULL; + struct cpuinfo_cache* l4 = NULL; + PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX processor_infos = NULL; + + HANDLE heap = GetProcessHeap(); + const bool is_wine = cpuinfo_x86_windows_is_wine(); + + struct cpuinfo_x86_processor x86_processor; + ZeroMemory(&x86_processor, sizeof(x86_processor)); + cpuinfo_x86_init_processor(&x86_processor); + char brand_string[48]; + cpuinfo_x86_normalize_brand_string(x86_processor.brand_string, brand_string); + + const uint32_t thread_bits_mask = bit_mask(x86_processor.topology.thread_bits_length); + const uint32_t core_bits_mask = bit_mask(x86_processor.topology.core_bits_length); + const uint32_t package_bits_offset = max( + x86_processor.topology.thread_bits_offset + x86_processor.topology.thread_bits_length, + x86_processor.topology.core_bits_offset + x86_processor.topology.core_bits_length); + + /* WINE doesn't implement GetMaximumProcessorGroupCount and aborts when calling it */ + const uint32_t max_group_count = is_wine ? 1 : (uint32_t) GetMaximumProcessorGroupCount(); + cpuinfo_log_debug("detected %"PRIu32" processor groups", max_group_count); + + uint32_t processors_count = 0; + uint32_t* processors_per_group = (uint32_t*) CPUINFO_ALLOCA(max_group_count * sizeof(uint32_t)); + for (uint32_t i = 0; i < max_group_count; i++) { + processors_per_group[i] = GetMaximumProcessorCount((WORD) i); + cpuinfo_log_debug("detected %"PRIu32" processors in group %"PRIu32, + processors_per_group[i], i); + processors_count += processors_per_group[i]; + } + + uint32_t* processors_before_group = (uint32_t*) CPUINFO_ALLOCA(max_group_count * sizeof(uint32_t)); + for (uint32_t i = 0, count = 0; i < max_group_count; i++) { + processors_before_group[i] = count; + cpuinfo_log_debug("detected %"PRIu32" processors before group %"PRIu32, + processors_before_group[i], i); + count += processors_per_group[i]; + } + + processors = HeapAlloc(heap, HEAP_ZERO_MEMORY, processors_count * sizeof(struct cpuinfo_processor)); + if (processors == NULL) { + cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" logical processors", + processors_count * sizeof(struct cpuinfo_processor), processors_count); + goto cleanup; + } + + DWORD cores_info_size = 0; + if (GetLogicalProcessorInformationEx(RelationProcessorCore, NULL, &cores_info_size) == FALSE) { + const DWORD last_error = GetLastError(); + if (last_error != ERROR_INSUFFICIENT_BUFFER) { + cpuinfo_log_error("failed to query size of processor cores information: error %"PRIu32, + (uint32_t) last_error); + goto cleanup; + } + } + + DWORD packages_info_size = 0; + if (GetLogicalProcessorInformationEx(RelationProcessorPackage, NULL, &packages_info_size) == FALSE) { + const DWORD last_error = GetLastError(); + if (last_error != ERROR_INSUFFICIENT_BUFFER) { + cpuinfo_log_error("failed to query size of processor packages information: error %"PRIu32, + (uint32_t) last_error); + goto cleanup; + } + } + + DWORD max_info_size = max(cores_info_size, packages_info_size); + + processor_infos = HeapAlloc(heap, 0, max_info_size); + if (processor_infos == NULL) { + cpuinfo_log_error("failed to allocate %"PRIu32" bytes for logical processor information", + (uint32_t) max_info_size); + goto cleanup; + } + + if (GetLogicalProcessorInformationEx(RelationProcessorPackage, processor_infos, &max_info_size) == FALSE) { + cpuinfo_log_error("failed to query processor packages information: error %"PRIu32, + (uint32_t) GetLastError()); + goto cleanup; + } + + uint32_t packages_count = 0; + PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX packages_info_end = + (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX) ((uintptr_t) processor_infos + packages_info_size); + for (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX package_info = processor_infos; + package_info < packages_info_end; + package_info = (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX) ((uintptr_t) package_info + package_info->Size)) + { + if (package_info->Relationship != RelationProcessorPackage) { + cpuinfo_log_warning("unexpected processor info type (%"PRIu32") for processor package information", + (uint32_t) package_info->Relationship); + continue; + } + + /* We assume that packages are reported in APIC order */ + const uint32_t package_id = packages_count++; + /* Reconstruct package part of APIC ID */ + const uint32_t package_apic_id = package_id << package_bits_offset; + /* Iterate processor groups and set the package part of APIC ID */ + for (uint32_t i = 0; i < package_info->Processor.GroupCount; i++) { + const uint32_t group_id = package_info->Processor.GroupMask[i].Group; + /* Global index of the first logical processor belonging to this group */ + const uint32_t group_processors_start = processors_before_group[group_id]; + /* Bitmask representing processors in this group belonging to this package */ + KAFFINITY group_processors_mask = package_info->Processor.GroupMask[i].Mask; + while (group_processors_mask != 0) { + const uint32_t group_processor_id = low_index_from_kaffinity(group_processors_mask); + const uint32_t processor_id = group_processors_start + group_processor_id; + processors[processor_id].package = (const struct cpuinfo_package*) NULL + package_id; + processors[processor_id].windows_group_id = (uint16_t) group_id; + processors[processor_id].windows_processor_id = (uint16_t) group_processor_id; + processors[processor_id].apic_id = package_apic_id; + + /* Reset the lowest bit in affinity mask */ + group_processors_mask &= (group_processors_mask - 1); + } + } + } + + max_info_size = max(cores_info_size, packages_info_size); + if (GetLogicalProcessorInformationEx(RelationProcessorCore, processor_infos, &max_info_size) == FALSE) { + cpuinfo_log_error("failed to query processor cores information: error %"PRIu32, + (uint32_t) GetLastError()); + goto cleanup; + } + + uint32_t cores_count = 0; + /* Index (among all cores) of the the first core on the current package */ + uint32_t package_core_start = 0; + uint32_t current_package_apic_id = 0; + PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX cores_info_end = + (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX) ((uintptr_t) processor_infos + cores_info_size); + for (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX core_info = processor_infos; + core_info < cores_info_end; + core_info = (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX) ((uintptr_t) core_info + core_info->Size)) + { + if (core_info->Relationship != RelationProcessorCore) { + cpuinfo_log_warning("unexpected processor info type (%"PRIu32") for processor core information", + (uint32_t) core_info->Relationship); + continue; + } + + /* We assume that cores and logical processors are reported in APIC order */ + const uint32_t core_id = cores_count++; + uint32_t smt_id = 0; + /* Reconstruct core part of APIC ID */ + const uint32_t core_apic_id = (core_id & core_bits_mask) << x86_processor.topology.core_bits_offset; + /* Iterate processor groups and set the core & SMT parts of APIC ID */ + for (uint32_t i = 0; i < core_info->Processor.GroupCount; i++) { + const uint32_t group_id = core_info->Processor.GroupMask[i].Group; + /* Global index of the first logical processor belonging to this group */ + const uint32_t group_processors_start = processors_before_group[group_id]; + /* Bitmask representing processors in this group belonging to this package */ + KAFFINITY group_processors_mask = core_info->Processor.GroupMask[i].Mask; + while (group_processors_mask != 0) { + const uint32_t group_processor_id = low_index_from_kaffinity(group_processors_mask); + const uint32_t processor_id = group_processors_start + group_processor_id; + + /* Check if this is the first core on a new package */ + if (processors[processor_id].apic_id != current_package_apic_id) { + package_core_start = core_id; + current_package_apic_id = processors[processor_id].apic_id; + } + /* Core ID w.r.t package */ + const uint32_t package_core_id = core_id - package_core_start; + + /* Update APIC ID with core and SMT parts */ + processors[processor_id].apic_id |= + ((smt_id & thread_bits_mask) << x86_processor.topology.thread_bits_offset) | + ((package_core_id & core_bits_mask) << x86_processor.topology.core_bits_offset); + cpuinfo_log_debug("reconstructed APIC ID 0x%08"PRIx32" for processor %"PRIu32" in group %"PRIu32, + processors[processor_id].apic_id, group_processor_id, group_id); + + /* Set SMT ID (assume logical processors within the core are reported in APIC order) */ + processors[processor_id].smt_id = smt_id++; + processors[processor_id].core = (const struct cpuinfo_core*) NULL + core_id; + + /* Reset the lowest bit in affinity mask */ + group_processors_mask &= (group_processors_mask - 1); + } + } + } + + cores = HeapAlloc(heap, HEAP_ZERO_MEMORY, cores_count * sizeof(struct cpuinfo_core)); + if (cores == NULL) { + cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" cores", + cores_count * sizeof(struct cpuinfo_core), cores_count); + goto cleanup; + } + + clusters = HeapAlloc(heap, HEAP_ZERO_MEMORY, packages_count * sizeof(struct cpuinfo_cluster)); + if (clusters == NULL) { + cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" core clusters", + packages_count * sizeof(struct cpuinfo_cluster), packages_count); + goto cleanup; + } + + packages = HeapAlloc(heap, HEAP_ZERO_MEMORY, packages_count * sizeof(struct cpuinfo_package)); + if (packages == NULL) { + cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" physical packages", + packages_count * sizeof(struct cpuinfo_package), packages_count); + goto cleanup; + } + + for (uint32_t i = processors_count; i != 0; i--) { + const uint32_t processor_id = i - 1; + struct cpuinfo_processor* processor = processors + processor_id; + + /* Adjust core and package pointers for all logical processors */ + struct cpuinfo_core* core = + (struct cpuinfo_core*) ((uintptr_t) cores + (uintptr_t) processor->core); + processor->core = core; + struct cpuinfo_cluster* cluster = + (struct cpuinfo_cluster*) ((uintptr_t) clusters + (uintptr_t) processor->cluster); + processor->cluster = cluster; + struct cpuinfo_package* package = + (struct cpuinfo_package*) ((uintptr_t) packages + (uintptr_t) processor->package); + processor->package = package; + + /* This can be overwritten by lower-index processors on the same package */ + package->processor_start = processor_id; + package->processor_count += 1; + + /* This can be overwritten by lower-index processors on the same cluster */ + cluster->processor_start = processor_id; + cluster->processor_count += 1; + + /* This can be overwritten by lower-index processors on the same core*/ + core->processor_start = processor_id; + core->processor_count += 1; + } + + /* Set vendor/uarch/CPUID information for cores */ + for (uint32_t i = cores_count; i != 0; i--) { + const uint32_t global_core_id = i - 1; + struct cpuinfo_core* core = cores + global_core_id; + const struct cpuinfo_processor* processor = processors + core->processor_start; + struct cpuinfo_package* package = (struct cpuinfo_package*) processor->package; + struct cpuinfo_cluster* cluster = (struct cpuinfo_cluster*) processor->cluster; + + core->cluster = cluster; + core->package = package; + core->core_id = core_bits_mask & + (processor->apic_id >> x86_processor.topology.core_bits_offset); + core->vendor = x86_processor.vendor; + core->uarch = x86_processor.uarch; + core->cpuid = x86_processor.cpuid; + + /* This can be overwritten by lower-index cores on the same cluster/package */ + cluster->core_start = global_core_id; + cluster->core_count += 1; + package->core_start = global_core_id; + package->core_count += 1; + } + + for (uint32_t i = 0; i < packages_count; i++) { + struct cpuinfo_package* package = packages + i; + struct cpuinfo_cluster* cluster = clusters + i; + + cluster->package = package; + cluster->vendor = cores[cluster->core_start].vendor; + cluster->uarch = cores[cluster->core_start].uarch; + cluster->cpuid = cores[cluster->core_start].cpuid; + package->cluster_start = i; + package->cluster_count = 1; + cpuinfo_x86_format_package_name(x86_processor.vendor, brand_string, package->name); + } + + /* Count caches */ + uint32_t l1i_count, l1d_count, l2_count, l3_count, l4_count; + cpuinfo_x86_count_caches(processors_count, processors, &x86_processor, + &l1i_count, &l1d_count, &l2_count, &l3_count, &l4_count); + + /* Allocate cache descriptions */ + if (l1i_count != 0) { + l1i = HeapAlloc(heap, HEAP_ZERO_MEMORY, l1i_count * sizeof(struct cpuinfo_cache)); + if (l1i == NULL) { + cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" L1I caches", + l1i_count * sizeof(struct cpuinfo_cache), l1i_count); + goto cleanup; + } + } + if (l1d_count != 0) { + l1d = HeapAlloc(heap, HEAP_ZERO_MEMORY, l1d_count * sizeof(struct cpuinfo_cache)); + if (l1d == NULL) { + cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" L1D caches", + l1d_count * sizeof(struct cpuinfo_cache), l1d_count); + goto cleanup; + } + } + if (l2_count != 0) { + l2 = HeapAlloc(heap, HEAP_ZERO_MEMORY, l2_count * sizeof(struct cpuinfo_cache)); + if (l2 == NULL) { + cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" L2 caches", + l2_count * sizeof(struct cpuinfo_cache), l2_count); + goto cleanup; + } + } + if (l3_count != 0) { + l3 = HeapAlloc(heap, HEAP_ZERO_MEMORY, l3_count * sizeof(struct cpuinfo_cache)); + if (l3 == NULL) { + cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" L3 caches", + l3_count * sizeof(struct cpuinfo_cache), l3_count); + goto cleanup; + } + } + if (l4_count != 0) { + l4 = HeapAlloc(heap, HEAP_ZERO_MEMORY, l4_count * sizeof(struct cpuinfo_cache)); + if (l4 == NULL) { + cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" L4 caches", + l4_count * sizeof(struct cpuinfo_cache), l4_count); + goto cleanup; + } + } + + /* Set cache information */ + uint32_t l1i_index = UINT32_MAX, l1d_index = UINT32_MAX, l2_index = UINT32_MAX, l3_index = UINT32_MAX, l4_index = UINT32_MAX; + uint32_t last_l1i_id = UINT32_MAX, last_l1d_id = UINT32_MAX; + uint32_t last_l2_id = UINT32_MAX, last_l3_id = UINT32_MAX, last_l4_id = UINT32_MAX; + for (uint32_t i = 0; i < processors_count; i++) { + const uint32_t apic_id = processors[i].apic_id; + + if (x86_processor.cache.l1i.size != 0) { + const uint32_t l1i_id = apic_id & ~bit_mask(x86_processor.cache.l1i.apic_bits); + processors[i].cache.l1i = &l1i[l1i_index]; + if (l1i_id != last_l1i_id) { + /* new cache */ + last_l1i_id = l1i_id; + l1i[++l1i_index] = (struct cpuinfo_cache) { + .size = x86_processor.cache.l1i.size, + .associativity = x86_processor.cache.l1i.associativity, + .sets = x86_processor.cache.l1i.sets, + .partitions = x86_processor.cache.l1i.partitions, + .line_size = x86_processor.cache.l1i.line_size, + .flags = x86_processor.cache.l1i.flags, + .processor_start = i, + .processor_count = 1, + }; + } else { + /* another processor sharing the same cache */ + l1i[l1i_index].processor_count += 1; + } + processors[i].cache.l1i = &l1i[l1i_index]; + } else { + /* reset cache id */ + last_l1i_id = UINT32_MAX; + } + if (x86_processor.cache.l1d.size != 0) { + const uint32_t l1d_id = apic_id & ~bit_mask(x86_processor.cache.l1d.apic_bits); + processors[i].cache.l1d = &l1d[l1d_index]; + if (l1d_id != last_l1d_id) { + /* new cache */ + last_l1d_id = l1d_id; + l1d[++l1d_index] = (struct cpuinfo_cache) { + .size = x86_processor.cache.l1d.size, + .associativity = x86_processor.cache.l1d.associativity, + .sets = x86_processor.cache.l1d.sets, + .partitions = x86_processor.cache.l1d.partitions, + .line_size = x86_processor.cache.l1d.line_size, + .flags = x86_processor.cache.l1d.flags, + .processor_start = i, + .processor_count = 1, + }; + } else { + /* another processor sharing the same cache */ + l1d[l1d_index].processor_count += 1; + } + processors[i].cache.l1d = &l1d[l1d_index]; + } else { + /* reset cache id */ + last_l1d_id = UINT32_MAX; + } + if (x86_processor.cache.l2.size != 0) { + const uint32_t l2_id = apic_id & ~bit_mask(x86_processor.cache.l2.apic_bits); + processors[i].cache.l2 = &l2[l2_index]; + if (l2_id != last_l2_id) { + /* new cache */ + last_l2_id = l2_id; + l2[++l2_index] = (struct cpuinfo_cache) { + .size = x86_processor.cache.l2.size, + .associativity = x86_processor.cache.l2.associativity, + .sets = x86_processor.cache.l2.sets, + .partitions = x86_processor.cache.l2.partitions, + .line_size = x86_processor.cache.l2.line_size, + .flags = x86_processor.cache.l2.flags, + .processor_start = i, + .processor_count = 1, + }; + } else { + /* another processor sharing the same cache */ + l2[l2_index].processor_count += 1; + } + processors[i].cache.l2 = &l2[l2_index]; + } else { + /* reset cache id */ + last_l2_id = UINT32_MAX; + } + if (x86_processor.cache.l3.size != 0) { + const uint32_t l3_id = apic_id & ~bit_mask(x86_processor.cache.l3.apic_bits); + processors[i].cache.l3 = &l3[l3_index]; + if (l3_id != last_l3_id) { + /* new cache */ + last_l3_id = l3_id; + l3[++l3_index] = (struct cpuinfo_cache) { + .size = x86_processor.cache.l3.size, + .associativity = x86_processor.cache.l3.associativity, + .sets = x86_processor.cache.l3.sets, + .partitions = x86_processor.cache.l3.partitions, + .line_size = x86_processor.cache.l3.line_size, + .flags = x86_processor.cache.l3.flags, + .processor_start = i, + .processor_count = 1, + }; + } else { + /* another processor sharing the same cache */ + l3[l3_index].processor_count += 1; + } + processors[i].cache.l3 = &l3[l3_index]; + } else { + /* reset cache id */ + last_l3_id = UINT32_MAX; + } + if (x86_processor.cache.l4.size != 0) { + const uint32_t l4_id = apic_id & ~bit_mask(x86_processor.cache.l4.apic_bits); + processors[i].cache.l4 = &l4[l4_index]; + if (l4_id != last_l4_id) { + /* new cache */ + last_l4_id = l4_id; + l4[++l4_index] = (struct cpuinfo_cache) { + .size = x86_processor.cache.l4.size, + .associativity = x86_processor.cache.l4.associativity, + .sets = x86_processor.cache.l4.sets, + .partitions = x86_processor.cache.l4.partitions, + .line_size = x86_processor.cache.l4.line_size, + .flags = x86_processor.cache.l4.flags, + .processor_start = i, + .processor_count = 1, + }; + } else { + /* another processor sharing the same cache */ + l4[l4_index].processor_count += 1; + } + processors[i].cache.l4 = &l4[l4_index]; + } else { + /* reset cache id */ + last_l4_id = UINT32_MAX; + } + } + + + /* Commit changes */ + cpuinfo_processors = processors; + cpuinfo_cores = cores; + cpuinfo_clusters = clusters; + cpuinfo_packages = packages; + cpuinfo_cache[cpuinfo_cache_level_1i] = l1i; + cpuinfo_cache[cpuinfo_cache_level_1d] = l1d; + cpuinfo_cache[cpuinfo_cache_level_2] = l2; + cpuinfo_cache[cpuinfo_cache_level_3] = l3; + cpuinfo_cache[cpuinfo_cache_level_4] = l4; + + cpuinfo_processors_count = processors_count; + cpuinfo_cores_count = cores_count; + cpuinfo_clusters_count = packages_count; + cpuinfo_packages_count = packages_count; + cpuinfo_cache_count[cpuinfo_cache_level_1i] = l1i_count; + cpuinfo_cache_count[cpuinfo_cache_level_1d] = l1d_count; + cpuinfo_cache_count[cpuinfo_cache_level_2] = l2_count; + cpuinfo_cache_count[cpuinfo_cache_level_3] = l3_count; + cpuinfo_cache_count[cpuinfo_cache_level_4] = l4_count; + cpuinfo_max_cache_size = cpuinfo_compute_max_cache_size(&processors[0]); + + cpuinfo_global_uarch = (struct cpuinfo_uarch_info) { + .uarch = x86_processor.uarch, + .cpuid = x86_processor.cpuid, + .processor_count = processors_count, + .core_count = cores_count, + }; + + MemoryBarrier(); + + cpuinfo_is_initialized = true; + + processors = NULL; + cores = NULL; + clusters = NULL; + packages = NULL; + l1i = l1d = l2 = l3 = l4 = NULL; + +cleanup: + if (processors != NULL) { + HeapFree(heap, 0, processors); + } + if (cores != NULL) { + HeapFree(heap, 0, cores); + } + if (clusters != NULL) { + HeapFree(heap, 0, clusters); + } + if (packages != NULL) { + HeapFree(heap, 0, packages); + } + if (l1i != NULL) { + HeapFree(heap, 0, l1i); + } + if (l1d != NULL) { + HeapFree(heap, 0, l1d); + } + if (l2 != NULL) { + HeapFree(heap, 0, l2); + } + if (l3 != NULL) { + HeapFree(heap, 0, l3); + } + if (l4 != NULL) { + HeapFree(heap, 0, l4); + } + return TRUE; +} diff --git a/duckstation.sln b/duckstation.sln index e1e255834..28a95da6a 100644 --- a/duckstation.sln +++ b/duckstation.sln @@ -111,6 +111,8 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "soundtouch", "dep\soundtouc EndProject Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "zstd", "dep\zstd\zstd.vcxproj", "{73EE0C55-6FFE-44E7-9C12-BAA52434A797}" EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "cpuinfo", "dep\cpuinfo\cpuinfo.vcxproj", "{EE55AA65-EA6B-4861-810B-78354B53A807}" +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|ARM64 = Debug|ARM64 @@ -1029,6 +1031,42 @@ Global {73EE0C55-6FFE-44E7-9C12-BAA52434A797}.ReleaseUWP|x64.Build.0 = ReleaseUWP|x64 {73EE0C55-6FFE-44E7-9C12-BAA52434A797}.ReleaseUWP|x86.ActiveCfg = ReleaseUWP|Win32 {73EE0C55-6FFE-44E7-9C12-BAA52434A797}.ReleaseUWP|x86.Build.0 = ReleaseUWP|Win32 + {EE55AA65-EA6B-4861-810B-78354B53A807}.Debug|ARM64.ActiveCfg = Debug|ARM64 + {EE55AA65-EA6B-4861-810B-78354B53A807}.Debug|ARM64.Build.0 = Debug|ARM64 + {EE55AA65-EA6B-4861-810B-78354B53A807}.Debug|x64.ActiveCfg = Debug|x64 + {EE55AA65-EA6B-4861-810B-78354B53A807}.Debug|x64.Build.0 = Debug|x64 + {EE55AA65-EA6B-4861-810B-78354B53A807}.Debug|x86.ActiveCfg = Debug|Win32 + {EE55AA65-EA6B-4861-810B-78354B53A807}.Debug|x86.Build.0 = Debug|Win32 + {EE55AA65-EA6B-4861-810B-78354B53A807}.DebugFast|ARM64.ActiveCfg = DebugFast|ARM64 + {EE55AA65-EA6B-4861-810B-78354B53A807}.DebugFast|ARM64.Build.0 = DebugFast|ARM64 + {EE55AA65-EA6B-4861-810B-78354B53A807}.DebugFast|x64.ActiveCfg = DebugFast|x64 + {EE55AA65-EA6B-4861-810B-78354B53A807}.DebugFast|x64.Build.0 = DebugFast|x64 + {EE55AA65-EA6B-4861-810B-78354B53A807}.DebugFast|x86.ActiveCfg = DebugFast|Win32 + {EE55AA65-EA6B-4861-810B-78354B53A807}.DebugFast|x86.Build.0 = DebugFast|Win32 + {EE55AA65-EA6B-4861-810B-78354B53A807}.DebugUWP|ARM64.ActiveCfg = DebugFast|ARM64 + {EE55AA65-EA6B-4861-810B-78354B53A807}.DebugUWP|ARM64.Build.0 = DebugFast|ARM64 + {EE55AA65-EA6B-4861-810B-78354B53A807}.DebugUWP|x64.ActiveCfg = DebugFast|x64 + {EE55AA65-EA6B-4861-810B-78354B53A807}.DebugUWP|x64.Build.0 = DebugFast|x64 + {EE55AA65-EA6B-4861-810B-78354B53A807}.DebugUWP|x86.ActiveCfg = DebugFast|Win32 + {EE55AA65-EA6B-4861-810B-78354B53A807}.DebugUWP|x86.Build.0 = DebugFast|Win32 + {EE55AA65-EA6B-4861-810B-78354B53A807}.Release|ARM64.ActiveCfg = Release|ARM64 + {EE55AA65-EA6B-4861-810B-78354B53A807}.Release|ARM64.Build.0 = Release|ARM64 + {EE55AA65-EA6B-4861-810B-78354B53A807}.Release|x64.ActiveCfg = Release|x64 + {EE55AA65-EA6B-4861-810B-78354B53A807}.Release|x64.Build.0 = Release|x64 + {EE55AA65-EA6B-4861-810B-78354B53A807}.Release|x86.ActiveCfg = Release|Win32 + {EE55AA65-EA6B-4861-810B-78354B53A807}.Release|x86.Build.0 = Release|Win32 + {EE55AA65-EA6B-4861-810B-78354B53A807}.ReleaseLTCG|ARM64.ActiveCfg = ReleaseLTCG|ARM64 + {EE55AA65-EA6B-4861-810B-78354B53A807}.ReleaseLTCG|ARM64.Build.0 = ReleaseLTCG|ARM64 + {EE55AA65-EA6B-4861-810B-78354B53A807}.ReleaseLTCG|x64.ActiveCfg = ReleaseLTCG|x64 + {EE55AA65-EA6B-4861-810B-78354B53A807}.ReleaseLTCG|x64.Build.0 = ReleaseLTCG|x64 + {EE55AA65-EA6B-4861-810B-78354B53A807}.ReleaseLTCG|x86.ActiveCfg = ReleaseLTCG|Win32 + {EE55AA65-EA6B-4861-810B-78354B53A807}.ReleaseLTCG|x86.Build.0 = ReleaseLTCG|Win32 + {EE55AA65-EA6B-4861-810B-78354B53A807}.ReleaseUWP|ARM64.ActiveCfg = ReleaseLTCG|ARM64 + {EE55AA65-EA6B-4861-810B-78354B53A807}.ReleaseUWP|ARM64.Build.0 = ReleaseLTCG|ARM64 + {EE55AA65-EA6B-4861-810B-78354B53A807}.ReleaseUWP|x64.ActiveCfg = ReleaseLTCG|x64 + {EE55AA65-EA6B-4861-810B-78354B53A807}.ReleaseUWP|x64.Build.0 = ReleaseLTCG|x64 + {EE55AA65-EA6B-4861-810B-78354B53A807}.ReleaseUWP|x86.ActiveCfg = ReleaseLTCG|Win32 + {EE55AA65-EA6B-4861-810B-78354B53A807}.ReleaseUWP|x86.Build.0 = ReleaseLTCG|Win32 EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE @@ -1054,6 +1092,7 @@ Global {8BE398E6-B882-4248-9065-FECC8728E038} = {BA490C0E-497D-4634-A21E-E65012006385} {751D9F62-881C-454E-BCE8-CB9CF5F1D22F} = {BA490C0E-497D-4634-A21E-E65012006385} {73EE0C55-6FFE-44E7-9C12-BAA52434A797} = {BA490C0E-497D-4634-A21E-E65012006385} + {EE55AA65-EA6B-4861-810B-78354B53A807} = {BA490C0E-497D-4634-A21E-E65012006385} EndGlobalSection GlobalSection(ExtensibilityGlobals) = postSolution SolutionGuid = {26E40B32-7C1D-48D0-95F4-1A500E054028}