mirror of
https://github.com/WinampDesktop/winamp.git
synced 2025-06-18 02:15:46 -04:00
dep: Add cpuinfo
This commit is contained in:
159
dep/cpuinfo/src/x86/api.h
Normal file
159
dep/cpuinfo/src/x86/api.h
Normal file
@ -0,0 +1,159 @@
|
||||
#pragma once
|
||||
|
||||
#include <stdbool.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#include <cpuinfo.h>
|
||||
#include <cpuinfo/common.h>
|
||||
|
||||
|
||||
struct cpuid_regs {
|
||||
uint32_t eax;
|
||||
uint32_t ebx;
|
||||
uint32_t ecx;
|
||||
uint32_t edx;
|
||||
};
|
||||
|
||||
struct cpuinfo_x86_cache {
|
||||
uint32_t size;
|
||||
uint32_t associativity;
|
||||
uint32_t sets;
|
||||
uint32_t partitions;
|
||||
uint32_t line_size;
|
||||
uint32_t flags;
|
||||
uint32_t apic_bits;
|
||||
};
|
||||
|
||||
struct cpuinfo_x86_caches {
|
||||
struct cpuinfo_trace_cache trace;
|
||||
struct cpuinfo_x86_cache l1i;
|
||||
struct cpuinfo_x86_cache l1d;
|
||||
struct cpuinfo_x86_cache l2;
|
||||
struct cpuinfo_x86_cache l3;
|
||||
struct cpuinfo_x86_cache l4;
|
||||
uint32_t prefetch_size;
|
||||
};
|
||||
|
||||
struct cpuinfo_x86_model_info {
|
||||
uint32_t model;
|
||||
uint32_t family;
|
||||
|
||||
uint32_t base_model;
|
||||
uint32_t base_family;
|
||||
uint32_t stepping;
|
||||
uint32_t extended_model;
|
||||
uint32_t extended_family;
|
||||
uint32_t processor_type;
|
||||
};
|
||||
|
||||
struct cpuinfo_x86_topology {
|
||||
uint32_t apic_id;
|
||||
uint32_t thread_bits_offset;
|
||||
uint32_t thread_bits_length;
|
||||
uint32_t core_bits_offset;
|
||||
uint32_t core_bits_length;
|
||||
};
|
||||
|
||||
struct cpuinfo_x86_processor {
|
||||
uint32_t cpuid;
|
||||
enum cpuinfo_vendor vendor;
|
||||
enum cpuinfo_uarch uarch;
|
||||
#ifdef __linux__
|
||||
int linux_id;
|
||||
#endif
|
||||
struct cpuinfo_x86_caches cache;
|
||||
struct {
|
||||
struct cpuinfo_tlb itlb_4KB;
|
||||
struct cpuinfo_tlb itlb_2MB;
|
||||
struct cpuinfo_tlb itlb_4MB;
|
||||
struct cpuinfo_tlb dtlb0_4KB;
|
||||
struct cpuinfo_tlb dtlb0_2MB;
|
||||
struct cpuinfo_tlb dtlb0_4MB;
|
||||
struct cpuinfo_tlb dtlb_4KB;
|
||||
struct cpuinfo_tlb dtlb_2MB;
|
||||
struct cpuinfo_tlb dtlb_4MB;
|
||||
struct cpuinfo_tlb dtlb_1GB;
|
||||
struct cpuinfo_tlb stlb2_4KB;
|
||||
struct cpuinfo_tlb stlb2_2MB;
|
||||
struct cpuinfo_tlb stlb2_1GB;
|
||||
} tlb;
|
||||
struct cpuinfo_x86_topology topology;
|
||||
char brand_string[CPUINFO_PACKAGE_NAME_MAX];
|
||||
};
|
||||
|
||||
CPUINFO_INTERNAL void cpuinfo_x86_init_processor(struct cpuinfo_x86_processor* processor);
|
||||
|
||||
CPUINFO_INTERNAL enum cpuinfo_vendor cpuinfo_x86_decode_vendor(uint32_t ebx, uint32_t ecx, uint32_t edx);
|
||||
CPUINFO_INTERNAL struct cpuinfo_x86_model_info cpuinfo_x86_decode_model_info(uint32_t eax);
|
||||
CPUINFO_INTERNAL enum cpuinfo_uarch cpuinfo_x86_decode_uarch(
|
||||
enum cpuinfo_vendor vendor,
|
||||
const struct cpuinfo_x86_model_info* model_info);
|
||||
|
||||
CPUINFO_INTERNAL struct cpuinfo_x86_isa cpuinfo_x86_detect_isa(
|
||||
const struct cpuid_regs basic_info, const struct cpuid_regs extended_info,
|
||||
uint32_t max_base_index, uint32_t max_extended_index,
|
||||
enum cpuinfo_vendor vendor, enum cpuinfo_uarch uarch);
|
||||
|
||||
CPUINFO_INTERNAL void cpuinfo_x86_detect_topology(
|
||||
uint32_t max_base_index,
|
||||
uint32_t max_extended_index,
|
||||
struct cpuid_regs leaf1,
|
||||
struct cpuinfo_x86_topology* topology);
|
||||
|
||||
CPUINFO_INTERNAL void cpuinfo_x86_detect_cache(
|
||||
uint32_t max_base_index, uint32_t max_extended_index,
|
||||
bool amd_topology_extensions,
|
||||
enum cpuinfo_vendor vendor,
|
||||
const struct cpuinfo_x86_model_info* model_info,
|
||||
struct cpuinfo_x86_caches* cache,
|
||||
struct cpuinfo_tlb* itlb_4KB,
|
||||
struct cpuinfo_tlb* itlb_2MB,
|
||||
struct cpuinfo_tlb* itlb_4MB,
|
||||
struct cpuinfo_tlb* dtlb0_4KB,
|
||||
struct cpuinfo_tlb* dtlb0_2MB,
|
||||
struct cpuinfo_tlb* dtlb0_4MB,
|
||||
struct cpuinfo_tlb* dtlb_4KB,
|
||||
struct cpuinfo_tlb* dtlb_2MB,
|
||||
struct cpuinfo_tlb* dtlb_4MB,
|
||||
struct cpuinfo_tlb* dtlb_1GB,
|
||||
struct cpuinfo_tlb* stlb2_4KB,
|
||||
struct cpuinfo_tlb* stlb2_2MB,
|
||||
struct cpuinfo_tlb* stlb2_1GB,
|
||||
uint32_t* log2_package_cores_max);
|
||||
|
||||
CPUINFO_INTERNAL void cpuinfo_x86_decode_cache_descriptor(
|
||||
uint8_t descriptor, enum cpuinfo_vendor vendor,
|
||||
const struct cpuinfo_x86_model_info* model_info,
|
||||
struct cpuinfo_x86_caches* cache,
|
||||
struct cpuinfo_tlb* itlb_4KB,
|
||||
struct cpuinfo_tlb* itlb_2MB,
|
||||
struct cpuinfo_tlb* itlb_4MB,
|
||||
struct cpuinfo_tlb* dtlb0_4KB,
|
||||
struct cpuinfo_tlb* dtlb0_2MB,
|
||||
struct cpuinfo_tlb* dtlb0_4MB,
|
||||
struct cpuinfo_tlb* dtlb_4KB,
|
||||
struct cpuinfo_tlb* dtlb_2MB,
|
||||
struct cpuinfo_tlb* dtlb_4MB,
|
||||
struct cpuinfo_tlb* dtlb_1GB,
|
||||
struct cpuinfo_tlb* stlb2_4KB,
|
||||
struct cpuinfo_tlb* stlb2_2MB,
|
||||
struct cpuinfo_tlb* stlb2_1GB,
|
||||
uint32_t* prefetch_size);
|
||||
|
||||
CPUINFO_INTERNAL bool cpuinfo_x86_decode_deterministic_cache_parameters(
|
||||
struct cpuid_regs regs,
|
||||
struct cpuinfo_x86_caches* cache,
|
||||
uint32_t* package_cores_max);
|
||||
|
||||
CPUINFO_INTERNAL bool cpuinfo_x86_decode_cache_properties(
|
||||
struct cpuid_regs regs,
|
||||
struct cpuinfo_x86_caches* cache);
|
||||
|
||||
CPUINFO_INTERNAL uint32_t cpuinfo_x86_normalize_brand_string(
|
||||
const char raw_name[48],
|
||||
char normalized_name[48]);
|
||||
|
||||
CPUINFO_INTERNAL uint32_t cpuinfo_x86_format_package_name(
|
||||
enum cpuinfo_vendor vendor,
|
||||
const char normalized_brand_string[48],
|
||||
char package_name[CPUINFO_PACKAGE_NAME_MAX]);
|
1726
dep/cpuinfo/src/x86/cache/descriptor.c
vendored
Normal file
1726
dep/cpuinfo/src/x86/cache/descriptor.c
vendored
Normal file
File diff suppressed because it is too large
Load Diff
257
dep/cpuinfo/src/x86/cache/deterministic.c
vendored
Normal file
257
dep/cpuinfo/src/x86/cache/deterministic.c
vendored
Normal file
@ -0,0 +1,257 @@
|
||||
#include <stdint.h>
|
||||
|
||||
#include <cpuinfo.h>
|
||||
#include <x86/cpuid.h>
|
||||
#include <cpuinfo/utils.h>
|
||||
#include <cpuinfo/log.h>
|
||||
|
||||
|
||||
enum cache_type {
|
||||
cache_type_none = 0,
|
||||
cache_type_data = 1,
|
||||
cache_type_instruction = 2,
|
||||
cache_type_unified = 3,
|
||||
};
|
||||
|
||||
bool cpuinfo_x86_decode_deterministic_cache_parameters(
|
||||
struct cpuid_regs regs,
|
||||
struct cpuinfo_x86_caches* cache,
|
||||
uint32_t* package_cores_max)
|
||||
{
|
||||
const uint32_t type = regs.eax & UINT32_C(0x1F);
|
||||
if (type == cache_type_none) {
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Level starts at 1 */
|
||||
const uint32_t level = (regs.eax >> 5) & UINT32_C(0x7);
|
||||
|
||||
const uint32_t sets = 1 + regs.ecx;
|
||||
const uint32_t line_size = 1 + (regs.ebx & UINT32_C(0x00000FFF));
|
||||
const uint32_t partitions = 1 + ((regs.ebx >> 12) & UINT32_C(0x000003FF));
|
||||
const uint32_t associativity = 1 + (regs.ebx >> 22);
|
||||
|
||||
*package_cores_max = 1 + (regs.eax >> 26);
|
||||
const uint32_t processors = 1 + ((regs.eax >> 14) & UINT32_C(0x00000FFF));
|
||||
const uint32_t apic_bits = bit_length(processors);
|
||||
|
||||
uint32_t flags = 0;
|
||||
if (regs.edx & UINT32_C(0x00000002)) {
|
||||
flags |= CPUINFO_CACHE_INCLUSIVE;
|
||||
}
|
||||
if (regs.edx & UINT32_C(0x00000004)) {
|
||||
flags |= CPUINFO_CACHE_COMPLEX_INDEXING;
|
||||
}
|
||||
switch (level) {
|
||||
case 1:
|
||||
switch (type) {
|
||||
case cache_type_unified:
|
||||
cache->l1d = cache->l1i = (struct cpuinfo_x86_cache) {
|
||||
.size = associativity * partitions * line_size * sets,
|
||||
.associativity = associativity,
|
||||
.sets = sets,
|
||||
.partitions = partitions,
|
||||
.line_size = line_size,
|
||||
.flags = flags | CPUINFO_CACHE_UNIFIED,
|
||||
.apic_bits = apic_bits
|
||||
};
|
||||
break;
|
||||
case cache_type_data:
|
||||
cache->l1d = (struct cpuinfo_x86_cache) {
|
||||
.size = associativity * partitions * line_size * sets,
|
||||
.associativity = associativity,
|
||||
.sets = sets,
|
||||
.partitions = partitions,
|
||||
.line_size = line_size,
|
||||
.flags = flags,
|
||||
.apic_bits = apic_bits
|
||||
};
|
||||
break;
|
||||
case cache_type_instruction:
|
||||
cache->l1i = (struct cpuinfo_x86_cache) {
|
||||
.size = associativity * partitions * line_size * sets,
|
||||
.associativity = associativity,
|
||||
.sets = sets,
|
||||
.partitions = partitions,
|
||||
.line_size = line_size,
|
||||
.flags = flags,
|
||||
.apic_bits = apic_bits
|
||||
};
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case 2:
|
||||
switch (type) {
|
||||
case cache_type_instruction:
|
||||
cpuinfo_log_warning("unexpected L2 instruction cache reported in leaf 0x00000004 is ignored");
|
||||
break;
|
||||
case cache_type_unified:
|
||||
flags |= CPUINFO_CACHE_UNIFIED;
|
||||
case cache_type_data:
|
||||
cache->l2 = (struct cpuinfo_x86_cache) {
|
||||
.size = associativity * partitions * line_size * sets,
|
||||
.associativity = associativity,
|
||||
.sets = sets,
|
||||
.partitions = partitions,
|
||||
.line_size = line_size,
|
||||
.flags = flags,
|
||||
.apic_bits = apic_bits
|
||||
};
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case 3:
|
||||
switch (type) {
|
||||
case cache_type_instruction:
|
||||
cpuinfo_log_warning("unexpected L3 instruction cache reported in leaf 0x00000004 is ignored");
|
||||
break;
|
||||
case cache_type_unified:
|
||||
flags |= CPUINFO_CACHE_UNIFIED;
|
||||
case cache_type_data:
|
||||
cache->l3 = (struct cpuinfo_x86_cache) {
|
||||
.size = associativity * partitions * line_size * sets,
|
||||
.associativity = associativity,
|
||||
.sets = sets,
|
||||
.partitions = partitions,
|
||||
.line_size = line_size,
|
||||
.flags = flags,
|
||||
.apic_bits = apic_bits
|
||||
};
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case 4:
|
||||
switch (type) {
|
||||
case cache_type_instruction:
|
||||
cpuinfo_log_warning("unexpected L4 instruction cache reported in leaf 0x00000004 is ignored");
|
||||
break;
|
||||
case cache_type_unified:
|
||||
flags |= CPUINFO_CACHE_UNIFIED;
|
||||
case cache_type_data:
|
||||
cache->l4 = (struct cpuinfo_x86_cache) {
|
||||
.size = associativity * partitions * line_size * sets,
|
||||
.associativity = associativity,
|
||||
.sets = sets,
|
||||
.partitions = partitions,
|
||||
.line_size = line_size,
|
||||
.flags = flags,
|
||||
.apic_bits = apic_bits
|
||||
};
|
||||
break;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
cpuinfo_log_warning("unexpected L%"PRIu32" cache reported in leaf 0x00000004 is ignored", level);
|
||||
break;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
bool cpuinfo_x86_decode_cache_properties(
|
||||
struct cpuid_regs regs,
|
||||
struct cpuinfo_x86_caches* cache)
|
||||
{
|
||||
const uint32_t type = regs.eax & UINT32_C(0x1F);
|
||||
if (type == cache_type_none) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const uint32_t level = (regs.eax >> 5) & UINT32_C(0x7);
|
||||
const uint32_t cores = 1 + ((regs.eax >> 14) & UINT32_C(0x00000FFF));
|
||||
const uint32_t apic_bits = bit_length(cores);
|
||||
|
||||
const uint32_t sets = 1 + regs.ecx;
|
||||
const uint32_t line_size = 1 + (regs.ebx & UINT32_C(0x00000FFF));
|
||||
const uint32_t partitions = 1 + ((regs.ebx >> 12) & UINT32_C(0x000003FF));
|
||||
const uint32_t associativity = 1 + (regs.ebx >> 22);
|
||||
|
||||
uint32_t flags = 0;
|
||||
if (regs.edx & UINT32_C(0x00000002)) {
|
||||
flags |= CPUINFO_CACHE_INCLUSIVE;
|
||||
}
|
||||
|
||||
switch (level) {
|
||||
case 1:
|
||||
switch (type) {
|
||||
case cache_type_unified:
|
||||
cache->l1d = cache->l1i = (struct cpuinfo_x86_cache) {
|
||||
.size = associativity * partitions * line_size * sets,
|
||||
.associativity = associativity,
|
||||
.sets = sets,
|
||||
.partitions = partitions,
|
||||
.line_size = line_size,
|
||||
.flags = flags | CPUINFO_CACHE_UNIFIED,
|
||||
.apic_bits = apic_bits
|
||||
};
|
||||
break;
|
||||
case cache_type_data:
|
||||
cache->l1d = (struct cpuinfo_x86_cache) {
|
||||
.size = associativity * partitions * line_size * sets,
|
||||
.associativity = associativity,
|
||||
.sets = sets,
|
||||
.partitions = partitions,
|
||||
.line_size = line_size,
|
||||
.flags = flags,
|
||||
.apic_bits = apic_bits
|
||||
};
|
||||
break;
|
||||
case cache_type_instruction:
|
||||
cache->l1i = (struct cpuinfo_x86_cache) {
|
||||
.size = associativity * partitions * line_size * sets,
|
||||
.associativity = associativity,
|
||||
.sets = sets,
|
||||
.partitions = partitions,
|
||||
.line_size = line_size,
|
||||
.flags = flags,
|
||||
.apic_bits = apic_bits
|
||||
};
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case 2:
|
||||
switch (type) {
|
||||
case cache_type_instruction:
|
||||
cpuinfo_log_warning("unexpected L2 instruction cache reported in leaf 0x8000001D is ignored");
|
||||
break;
|
||||
case cache_type_unified:
|
||||
flags |= CPUINFO_CACHE_UNIFIED;
|
||||
case cache_type_data:
|
||||
cache->l2 = (struct cpuinfo_x86_cache) {
|
||||
.size = associativity * partitions * line_size * sets,
|
||||
.associativity = associativity,
|
||||
.sets = sets,
|
||||
.partitions = partitions,
|
||||
.line_size = line_size,
|
||||
.flags = flags,
|
||||
.apic_bits = apic_bits
|
||||
};
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case 3:
|
||||
switch (type) {
|
||||
case cache_type_instruction:
|
||||
cpuinfo_log_warning("unexpected L3 instruction cache reported in leaf 0x8000001D is ignored");
|
||||
break;
|
||||
case cache_type_unified:
|
||||
flags |= CPUINFO_CACHE_UNIFIED;
|
||||
case cache_type_data:
|
||||
cache->l3 = (struct cpuinfo_x86_cache) {
|
||||
.size = associativity * partitions * line_size * sets,
|
||||
.associativity = associativity,
|
||||
.sets = sets,
|
||||
.partitions = partitions,
|
||||
.line_size = line_size,
|
||||
.flags = flags,
|
||||
.apic_bits = apic_bits
|
||||
};
|
||||
break;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
cpuinfo_log_warning("unexpected L%"PRIu32" cache reported in leaf 0x8000001D is ignored", level);
|
||||
break;
|
||||
}
|
||||
return true;
|
||||
}
|
88
dep/cpuinfo/src/x86/cache/init.c
vendored
Normal file
88
dep/cpuinfo/src/x86/cache/init.c
vendored
Normal file
@ -0,0 +1,88 @@
|
||||
#include <stdint.h>
|
||||
|
||||
#include <cpuinfo.h>
|
||||
#include <cpuinfo/utils.h>
|
||||
#include <cpuinfo/log.h>
|
||||
#include <x86/cpuid.h>
|
||||
#include <x86/api.h>
|
||||
|
||||
|
||||
union cpuinfo_x86_cache_descriptors {
|
||||
struct cpuid_regs regs;
|
||||
uint8_t as_bytes[16];
|
||||
};
|
||||
|
||||
enum cache_type {
|
||||
cache_type_none = 0,
|
||||
cache_type_data = 1,
|
||||
cache_type_instruction = 2,
|
||||
cache_type_unified = 3,
|
||||
};
|
||||
|
||||
void cpuinfo_x86_detect_cache(
|
||||
uint32_t max_base_index, uint32_t max_extended_index,
|
||||
bool amd_topology_extensions,
|
||||
enum cpuinfo_vendor vendor,
|
||||
const struct cpuinfo_x86_model_info* model_info,
|
||||
struct cpuinfo_x86_caches* cache,
|
||||
struct cpuinfo_tlb* itlb_4KB,
|
||||
struct cpuinfo_tlb* itlb_2MB,
|
||||
struct cpuinfo_tlb* itlb_4MB,
|
||||
struct cpuinfo_tlb* dtlb0_4KB,
|
||||
struct cpuinfo_tlb* dtlb0_2MB,
|
||||
struct cpuinfo_tlb* dtlb0_4MB,
|
||||
struct cpuinfo_tlb* dtlb_4KB,
|
||||
struct cpuinfo_tlb* dtlb_2MB,
|
||||
struct cpuinfo_tlb* dtlb_4MB,
|
||||
struct cpuinfo_tlb* dtlb_1GB,
|
||||
struct cpuinfo_tlb* stlb2_4KB,
|
||||
struct cpuinfo_tlb* stlb2_2MB,
|
||||
struct cpuinfo_tlb* stlb2_1GB,
|
||||
uint32_t* log2_package_cores_max)
|
||||
{
|
||||
if (max_base_index >= 2) {
|
||||
union cpuinfo_x86_cache_descriptors descriptors;
|
||||
descriptors.regs = cpuid(2);
|
||||
uint32_t iterations = (uint8_t) descriptors.as_bytes[0];
|
||||
if (iterations != 0) {
|
||||
iterate_descriptors:
|
||||
for (uint32_t i = 1 /* note: not 0 */; i < 16; i++) {
|
||||
const uint8_t descriptor = descriptors.as_bytes[i];
|
||||
if (descriptor != 0) {
|
||||
cpuinfo_x86_decode_cache_descriptor(
|
||||
descriptor, vendor, model_info,
|
||||
cache,
|
||||
itlb_4KB, itlb_2MB, itlb_4MB,
|
||||
dtlb0_4KB, dtlb0_2MB, dtlb0_4MB,
|
||||
dtlb_4KB, dtlb_2MB, dtlb_4MB, dtlb_1GB,
|
||||
stlb2_4KB, stlb2_2MB, stlb2_1GB,
|
||||
&cache->prefetch_size);
|
||||
}
|
||||
}
|
||||
if (--iterations != 0) {
|
||||
descriptors.regs = cpuid(2);
|
||||
goto iterate_descriptors;
|
||||
}
|
||||
}
|
||||
|
||||
if (vendor != cpuinfo_vendor_amd && vendor != cpuinfo_vendor_hygon && max_base_index >= 4) {
|
||||
struct cpuid_regs leaf4;
|
||||
uint32_t input_ecx = 0;
|
||||
uint32_t package_cores_max = 0;
|
||||
do {
|
||||
leaf4 = cpuidex(4, input_ecx++);
|
||||
} while (cpuinfo_x86_decode_deterministic_cache_parameters(
|
||||
leaf4, cache, &package_cores_max));
|
||||
if (package_cores_max != 0) {
|
||||
*log2_package_cores_max = bit_length(package_cores_max);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (amd_topology_extensions && max_extended_index >= UINT32_C(0x8000001D)) {
|
||||
struct cpuid_regs leaf0x8000001D;
|
||||
uint32_t input_ecx = 0;
|
||||
do {
|
||||
leaf0x8000001D = cpuidex(UINT32_C(0x8000001D), input_ecx++);
|
||||
} while (cpuinfo_x86_decode_cache_properties(leaf0x8000001D, cache));
|
||||
}
|
||||
}
|
79
dep/cpuinfo/src/x86/cpuid.h
Normal file
79
dep/cpuinfo/src/x86/cpuid.h
Normal file
@ -0,0 +1,79 @@
|
||||
#pragma once
|
||||
#include <stdint.h>
|
||||
|
||||
#if defined(__GNUC__)
|
||||
#include <cpuid.h>
|
||||
#elif defined(_MSC_VER)
|
||||
#include <intrin.h>
|
||||
#endif
|
||||
|
||||
#if CPUINFO_MOCK
|
||||
#include <cpuinfo-mock.h>
|
||||
#endif
|
||||
#include <x86/api.h>
|
||||
|
||||
|
||||
#if defined(__GNUC__) || defined(_MSC_VER)
|
||||
static inline struct cpuid_regs cpuid(uint32_t eax) {
|
||||
#if CPUINFO_MOCK
|
||||
uint32_t regs_array[4];
|
||||
cpuinfo_mock_get_cpuid(eax, regs_array);
|
||||
return (struct cpuid_regs) {
|
||||
.eax = regs_array[0],
|
||||
.ebx = regs_array[1],
|
||||
.ecx = regs_array[2],
|
||||
.edx = regs_array[3],
|
||||
};
|
||||
#else
|
||||
struct cpuid_regs regs;
|
||||
#if defined(__GNUC__)
|
||||
__cpuid(eax, regs.eax, regs.ebx, regs.ecx, regs.edx);
|
||||
#else
|
||||
int regs_array[4];
|
||||
__cpuid(regs_array, (int) eax);
|
||||
regs.eax = regs_array[0];
|
||||
regs.ebx = regs_array[1];
|
||||
regs.ecx = regs_array[2];
|
||||
regs.edx = regs_array[3];
|
||||
#endif
|
||||
return regs;
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline struct cpuid_regs cpuidex(uint32_t eax, uint32_t ecx) {
|
||||
#if CPUINFO_MOCK
|
||||
uint32_t regs_array[4];
|
||||
cpuinfo_mock_get_cpuidex(eax, ecx, regs_array);
|
||||
return (struct cpuid_regs) {
|
||||
.eax = regs_array[0],
|
||||
.ebx = regs_array[1],
|
||||
.ecx = regs_array[2],
|
||||
.edx = regs_array[3],
|
||||
};
|
||||
#else
|
||||
struct cpuid_regs regs;
|
||||
#if defined(__GNUC__)
|
||||
__cpuid_count(eax, ecx, regs.eax, regs.ebx, regs.ecx, regs.edx);
|
||||
#else
|
||||
int regs_array[4];
|
||||
__cpuidex(regs_array, (int) eax, (int) ecx);
|
||||
regs.eax = regs_array[0];
|
||||
regs.ebx = regs_array[1];
|
||||
regs.ecx = regs_array[2];
|
||||
regs.edx = regs_array[3];
|
||||
#endif
|
||||
return regs;
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
static inline uint64_t xgetbv(uint32_t ext_ctrl_reg) {
|
||||
#ifdef _MSC_VER
|
||||
return (uint64_t)_xgetbv((unsigned int)ext_ctrl_reg);
|
||||
#else
|
||||
uint32_t lo, hi;
|
||||
__asm__(".byte 0x0F, 0x01, 0xD0" : "=a" (lo), "=d" (hi) : "c" (ext_ctrl_reg));
|
||||
return ((uint64_t) hi << 32) | (uint64_t) lo;
|
||||
#endif
|
||||
}
|
||||
|
19
dep/cpuinfo/src/x86/info.c
Normal file
19
dep/cpuinfo/src/x86/info.c
Normal file
@ -0,0 +1,19 @@
|
||||
#include <stdint.h>
|
||||
|
||||
#include <cpuinfo.h>
|
||||
#include <x86/api.h>
|
||||
|
||||
|
||||
struct cpuinfo_x86_model_info cpuinfo_x86_decode_model_info(uint32_t eax) {
|
||||
struct cpuinfo_x86_model_info model_info;
|
||||
model_info.stepping = eax & 0xF;
|
||||
model_info.base_model = (eax >> 4) & 0xF;
|
||||
model_info.base_family = (eax >> 8) & 0xF;
|
||||
model_info.processor_type = (eax >> 12) & 0x3;
|
||||
model_info.extended_model = (eax >> 16) & 0xF;
|
||||
model_info.extended_family = (eax >> 20) & 0xFF;
|
||||
|
||||
model_info.family = model_info.base_family + model_info.extended_family;
|
||||
model_info.model = model_info.base_model + (model_info.extended_model << 4);
|
||||
return model_info;
|
||||
}
|
75
dep/cpuinfo/src/x86/init.c
Normal file
75
dep/cpuinfo/src/x86/init.c
Normal file
@ -0,0 +1,75 @@
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
|
||||
#include <cpuinfo.h>
|
||||
#include <x86/cpuid.h>
|
||||
#include <x86/api.h>
|
||||
#include <cpuinfo/utils.h>
|
||||
#include <cpuinfo/log.h>
|
||||
#include <cpuinfo/common.h>
|
||||
|
||||
|
||||
struct cpuinfo_x86_isa cpuinfo_isa = { 0 };
|
||||
CPUINFO_INTERNAL uint32_t cpuinfo_x86_clflush_size = 0;
|
||||
|
||||
void cpuinfo_x86_init_processor(struct cpuinfo_x86_processor* processor) {
|
||||
const struct cpuid_regs leaf0 = cpuid(0);
|
||||
const uint32_t max_base_index = leaf0.eax;
|
||||
const enum cpuinfo_vendor vendor = processor->vendor =
|
||||
cpuinfo_x86_decode_vendor(leaf0.ebx, leaf0.ecx, leaf0.edx);
|
||||
|
||||
const struct cpuid_regs leaf0x80000000 = cpuid(UINT32_C(0x80000000));
|
||||
const uint32_t max_extended_index =
|
||||
leaf0x80000000.eax >= UINT32_C(0x80000000) ? leaf0x80000000.eax : 0;
|
||||
|
||||
const struct cpuid_regs leaf0x80000001 = max_extended_index >= UINT32_C(0x80000001) ?
|
||||
cpuid(UINT32_C(0x80000001)) : (struct cpuid_regs) { 0, 0, 0, 0 };
|
||||
|
||||
if (max_base_index >= 1) {
|
||||
const struct cpuid_regs leaf1 = cpuid(1);
|
||||
processor->cpuid = leaf1.eax;
|
||||
|
||||
const struct cpuinfo_x86_model_info model_info = cpuinfo_x86_decode_model_info(leaf1.eax);
|
||||
const enum cpuinfo_uarch uarch = processor->uarch =
|
||||
cpuinfo_x86_decode_uarch(vendor, &model_info);
|
||||
|
||||
cpuinfo_x86_clflush_size = ((leaf1.ebx >> 8) & UINT32_C(0x000000FF)) * 8;
|
||||
|
||||
/*
|
||||
* Topology extensions support:
|
||||
* - AMD: ecx[bit 22] in extended info (reserved bit on Intel CPUs).
|
||||
*/
|
||||
const bool amd_topology_extensions = !!(leaf0x80000001.ecx & UINT32_C(0x00400000));
|
||||
|
||||
cpuinfo_x86_detect_cache(
|
||||
max_base_index, max_extended_index, amd_topology_extensions, vendor, &model_info,
|
||||
&processor->cache,
|
||||
&processor->tlb.itlb_4KB,
|
||||
&processor->tlb.itlb_2MB,
|
||||
&processor->tlb.itlb_4MB,
|
||||
&processor->tlb.dtlb0_4KB,
|
||||
&processor->tlb.dtlb0_2MB,
|
||||
&processor->tlb.dtlb0_4MB,
|
||||
&processor->tlb.dtlb_4KB,
|
||||
&processor->tlb.dtlb_2MB,
|
||||
&processor->tlb.dtlb_4MB,
|
||||
&processor->tlb.dtlb_1GB,
|
||||
&processor->tlb.stlb2_4KB,
|
||||
&processor->tlb.stlb2_2MB,
|
||||
&processor->tlb.stlb2_1GB,
|
||||
&processor->topology.core_bits_length);
|
||||
|
||||
cpuinfo_x86_detect_topology(max_base_index, max_extended_index, leaf1, &processor->topology);
|
||||
|
||||
cpuinfo_isa = cpuinfo_x86_detect_isa(leaf1, leaf0x80000001,
|
||||
max_base_index, max_extended_index, vendor, uarch);
|
||||
}
|
||||
if (max_extended_index >= UINT32_C(0x80000004)) {
|
||||
struct cpuid_regs brand_string[3];
|
||||
for (uint32_t i = 0; i < 3; i++) {
|
||||
brand_string[i] = cpuid(UINT32_C(0x80000002) + i);
|
||||
}
|
||||
memcpy(processor->brand_string, brand_string, sizeof(processor->brand_string));
|
||||
cpuinfo_log_debug("raw CPUID brand string: \"%48s\"", processor->brand_string);
|
||||
}
|
||||
}
|
724
dep/cpuinfo/src/x86/isa.c
Normal file
724
dep/cpuinfo/src/x86/isa.c
Normal file
@ -0,0 +1,724 @@
|
||||
#include <stdbool.h>
|
||||
#include <stdint.h>
|
||||
#include <stddef.h>
|
||||
|
||||
#include <x86/cpuid.h>
|
||||
#include <cpuinfo.h>
|
||||
|
||||
|
||||
#if CPUINFO_ARCH_X86
|
||||
#ifdef _MSC_VER
|
||||
#pragma pack(push, 2)
|
||||
#endif
|
||||
struct fxsave_region {
|
||||
uint16_t fpu_control_word;
|
||||
uint16_t fpu_status_word;
|
||||
uint16_t fpu_tag_word;
|
||||
uint16_t fpu_opcode;
|
||||
uint32_t fpu_instruction_pointer_offset;
|
||||
uint32_t fpu_instruction_pointer_selector;
|
||||
uint32_t fpu_operand_pointer_offset;
|
||||
uint32_t fpu_operand_pointer_selector;
|
||||
uint32_t mxcsr_state;
|
||||
uint32_t mxcsr_mask;
|
||||
uint64_t fpu_registers[8 * 2];
|
||||
uint64_t xmm_registers[8 * 2];
|
||||
uint64_t padding[28];
|
||||
}
|
||||
#ifndef _MSC_VER
|
||||
__attribute__((__aligned__(16), __packed__))
|
||||
#endif
|
||||
; /* end of fxsave_region structure */
|
||||
#ifdef _MSC_VER
|
||||
#pragma pack(pop, 2)
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
||||
struct cpuinfo_x86_isa cpuinfo_x86_detect_isa(
|
||||
const struct cpuid_regs basic_info, const struct cpuid_regs extended_info,
|
||||
uint32_t max_base_index, uint32_t max_extended_index,
|
||||
enum cpuinfo_vendor vendor, enum cpuinfo_uarch uarch)
|
||||
{
|
||||
struct cpuinfo_x86_isa isa = { 0 };
|
||||
|
||||
const struct cpuid_regs structured_feature_info0 =
|
||||
(max_base_index >= 7) ? cpuidex(7, 0) : (struct cpuid_regs) { 0, 0, 0, 0};
|
||||
const struct cpuid_regs structured_feature_info1 =
|
||||
(max_base_index >= 7) ? cpuidex(7, 1) : (struct cpuid_regs) { 0, 0, 0, 0};
|
||||
|
||||
const uint32_t processor_capacity_info_index = UINT32_C(0x80000008);
|
||||
const struct cpuid_regs processor_capacity_info =
|
||||
(max_extended_index >= processor_capacity_info_index) ?
|
||||
cpuid(processor_capacity_info_index) : (struct cpuid_regs) { 0, 0, 0, 0 };
|
||||
|
||||
bool avx_regs = false, avx512_regs = false, mpx_regs = false;
|
||||
/*
|
||||
* OSXSAVE: Operating system enabled XSAVE instructions for application use:
|
||||
* - Intel, AMD: ecx[bit 26] in basic info = XSAVE/XRSTOR instructions supported by a chip.
|
||||
* - Intel, AMD: ecx[bit 27] in basic info = XSAVE/XRSTOR instructions enabled by OS.
|
||||
*/
|
||||
const uint32_t osxsave_mask = UINT32_C(0x0C000000);
|
||||
if ((basic_info.ecx & osxsave_mask) == osxsave_mask) {
|
||||
uint64_t xcr0_valid_bits = 0;
|
||||
if (max_base_index >= 0xD) {
|
||||
const struct cpuid_regs regs = cpuidex(0xD, 0);
|
||||
xcr0_valid_bits = ((uint64_t) regs.edx << 32) | regs.eax;
|
||||
}
|
||||
|
||||
const uint64_t xfeature_enabled_mask = xgetbv(0);
|
||||
|
||||
/*
|
||||
* AVX registers:
|
||||
* - Intel, AMD: XFEATURE_ENABLED_MASK[bit 1] for low 128 bits of ymm registers
|
||||
* - Intel, AMD: XFEATURE_ENABLED_MASK[bit 2] for high 128 bits of ymm registers
|
||||
*/
|
||||
const uint64_t avx_regs_mask = UINT64_C(0x0000000000000006);
|
||||
if ((xcr0_valid_bits & avx_regs_mask) == avx_regs_mask) {
|
||||
avx_regs = (xfeature_enabled_mask & avx_regs_mask) == avx_regs_mask;
|
||||
}
|
||||
|
||||
/*
|
||||
* AVX512 registers:
|
||||
* - Intel, AMD: XFEATURE_ENABLED_MASK[bit 1] for low 128 bits of zmm registers
|
||||
* - Intel, AMD: XFEATURE_ENABLED_MASK[bit 2] for bits 128-255 of zmm registers
|
||||
* - Intel: XFEATURE_ENABLED_MASK[bit 5] for 8 64-bit OpMask registers (k0-k7)
|
||||
* - Intel: XFEATURE_ENABLED_MASK[bit 6] for the high 256 bits of the zmm registers zmm0-zmm15
|
||||
* - Intel: XFEATURE_ENABLED_MASK[bit 7] for the 512-bit zmm registers zmm16-zmm31
|
||||
*/
|
||||
const uint64_t avx512_regs_mask = UINT64_C(0x00000000000000E6);
|
||||
if ((xcr0_valid_bits & avx512_regs_mask) == avx512_regs_mask) {
|
||||
avx512_regs = (xfeature_enabled_mask & avx512_regs_mask) == avx512_regs_mask;
|
||||
}
|
||||
|
||||
/*
|
||||
* MPX registers:
|
||||
* - Intel: XFEATURE_ENABLED_MASK[bit 3] for BNDREGS
|
||||
* - Intel: XFEATURE_ENABLED_MASK[bit 4] for BNDCSR
|
||||
*/
|
||||
const uint64_t mpx_regs_mask = UINT64_C(0x0000000000000018);
|
||||
if ((xcr0_valid_bits & mpx_regs_mask) == mpx_regs_mask) {
|
||||
mpx_regs = (xfeature_enabled_mask & mpx_regs_mask) == mpx_regs_mask;
|
||||
}
|
||||
}
|
||||
|
||||
#if CPUINFO_ARCH_X86
|
||||
/*
|
||||
* RDTSC instruction:
|
||||
* - Intel, AMD: edx[bit 4] in basic info.
|
||||
* - AMD: edx[bit 4] in extended info (reserved bit on Intel CPUs).
|
||||
*/
|
||||
isa.rdtsc = !!((basic_info.edx | extended_info.edx) & UINT32_C(0x00000010));
|
||||
#endif
|
||||
|
||||
/*
|
||||
* SYSENTER/SYSEXIT instructions:
|
||||
* - Intel, AMD: edx[bit 11] in basic info.
|
||||
*/
|
||||
isa.sysenter = !!(basic_info.edx & UINT32_C(0x00000800));
|
||||
|
||||
#if CPUINFO_ARCH_X86
|
||||
/*
|
||||
* SYSCALL/SYSRET instructions:
|
||||
* - Intel, AMD: edx[bit 11] in extended info.
|
||||
*/
|
||||
isa.syscall = !!(extended_info.edx & UINT32_C(0x00000800));
|
||||
#endif
|
||||
|
||||
/*
|
||||
* RDMSR/WRMSR instructions:
|
||||
* - Intel, AMD: edx[bit 5] in basic info.
|
||||
* - AMD: edx[bit 5] in extended info (reserved bit on Intel CPUs).
|
||||
*/
|
||||
isa.msr = !!((basic_info.edx | extended_info.edx) & UINT32_C(0x00000020));
|
||||
|
||||
/*
|
||||
* CLZERO instruction:
|
||||
* - AMD: ebx[bit 0] in processor capacity info (reserved bit on Intel CPUs).
|
||||
*/
|
||||
isa.clzero = !!(processor_capacity_info.ebx & UINT32_C(0x00000001));
|
||||
|
||||
/*
|
||||
* CLFLUSH instruction:
|
||||
* - Intel, AMD: edx[bit 19] in basic info.
|
||||
*/
|
||||
isa.clflush = !!(basic_info.edx & UINT32_C(0x00080000));
|
||||
|
||||
/*
|
||||
* CLFLUSHOPT instruction:
|
||||
* - Intel: ebx[bit 23] in structured feature info (ecx = 0).
|
||||
*/
|
||||
isa.clflushopt = !!(structured_feature_info0.ebx & UINT32_C(0x00800000));
|
||||
|
||||
/*
|
||||
* MWAIT/MONITOR instructions:
|
||||
* - Intel, AMD: ecx[bit 3] in basic info.
|
||||
*/
|
||||
isa.mwait = !!(basic_info.ecx & UINT32_C(0x00000008));
|
||||
|
||||
/*
|
||||
* MWAITX/MONITORX instructions:
|
||||
* - AMD: ecx[bit 29] in extended info.
|
||||
*/
|
||||
isa.mwaitx = !!(extended_info.ecx & UINT32_C(0x20000000));
|
||||
|
||||
/*
|
||||
* FXSAVE/FXRSTOR instructions:
|
||||
* - Intel, AMD: edx[bit 24] in basic info.
|
||||
* - AMD: edx[bit 24] in extended info (zero bit on Intel CPUs, EMMX bit on Cyrix CPUs).
|
||||
*/
|
||||
switch (vendor) {
|
||||
#if CPUINFO_ARCH_X86
|
||||
case cpuinfo_vendor_cyrix:
|
||||
case cpuinfo_vendor_nsc:
|
||||
isa.emmx = !!(extended_info.edx & UINT32_C(0x01000000));
|
||||
break;
|
||||
#endif
|
||||
default:
|
||||
isa.fxsave = !!((basic_info.edx | extended_info.edx) & UINT32_C(0x01000000));
|
||||
break;
|
||||
}
|
||||
|
||||
/*
|
||||
* XSAVE/XRSTOR instructions:
|
||||
* - Intel, AMD: ecx[bit 26] in basic info.
|
||||
*/
|
||||
isa.xsave = !!(basic_info.ecx & UINT32_C(0x04000000));
|
||||
|
||||
#if CPUINFO_ARCH_X86
|
||||
/*
|
||||
* x87 FPU instructions:
|
||||
* - Intel, AMD: edx[bit 0] in basic info.
|
||||
* - AMD: edx[bit 0] in extended info (reserved bit on Intel CPUs).
|
||||
*/
|
||||
isa.fpu = !!((basic_info.edx | extended_info.edx) & UINT32_C(0x00000001));
|
||||
|
||||
/*
|
||||
* MMX instructions:
|
||||
* - Intel, AMD: edx[bit 23] in basic info.
|
||||
* - AMD: edx[bit 23] in extended info (zero bit on Intel CPUs).
|
||||
*/
|
||||
isa.mmx = !!((basic_info.edx | extended_info.edx) & UINT32_C(0x00800000));
|
||||
|
||||
/*
|
||||
* MMX+/Integer SSE instructions:
|
||||
* - Intel, AMD: edx[bit 25] in basic info (SSE feature flag).
|
||||
* - Pre-SSE AMD: edx[bit 22] in extended info (zero bit on Intel CPUs).
|
||||
*/
|
||||
isa.mmx_plus = !!((basic_info.edx & UINT32_C(0x02000000)) | (extended_info.edx & UINT32_C(0x00400000)));
|
||||
#endif
|
||||
|
||||
/*
|
||||
* 3dnow! instructions:
|
||||
* - AMD: edx[bit 31] of extended info (zero bit on Intel CPUs).
|
||||
*/
|
||||
isa.three_d_now = !!(extended_info.edx & UINT32_C(0x80000000));
|
||||
|
||||
/*
|
||||
* 3dnow!+ instructions:
|
||||
* - AMD: edx[bit 30] of extended info (zero bit on Intel CPUs).
|
||||
*/
|
||||
isa.three_d_now_plus = !!(extended_info.edx & UINT32_C(0x40000000));
|
||||
|
||||
#if CPUINFO_ARCH_X86
|
||||
/*
|
||||
* 3dnow! Geode instructions:
|
||||
* - No CPUID bit, detect as Geode microarchitecture + 3dnow!+ support
|
||||
*/
|
||||
isa.three_d_now_geode = isa.three_d_now_plus && (uarch == cpuinfo_uarch_geode);
|
||||
#endif
|
||||
|
||||
/*
|
||||
* PREFETCH instruction:
|
||||
* - AMD: ecx[bit 8] of extended info (one of 3dnow! prefetch instructions).
|
||||
* On Intel this bit indicates PREFETCHW, but not PREFETCH support.
|
||||
* - AMD: edx[bit 31] of extended info (implied by 3dnow! support). Reserved bit on Intel CPUs.
|
||||
* - AMD: edx[bit 30] of extended info (implied by 3dnow!+ support). Reserved bit on Intel CPUs.
|
||||
* - AMD: edx[bit 29] of extended info (x86-64 support). Does not imply PREFETCH support on non-AMD CPUs!!!
|
||||
*/
|
||||
switch (vendor) {
|
||||
case cpuinfo_vendor_intel:
|
||||
/*
|
||||
* Instruction is not documented in the manual,
|
||||
* and the 3dnow! prefetch CPUID bit indicates PREFETCHW instruction.
|
||||
*/
|
||||
break;
|
||||
case cpuinfo_vendor_amd:
|
||||
case cpuinfo_vendor_hygon:
|
||||
isa.prefetch = !!((extended_info.ecx & UINT32_C(0x00000100)) | (extended_info.edx & UINT32_C(0xE0000000)));
|
||||
break;
|
||||
default:
|
||||
/*
|
||||
* Conservatively assume, that 3dnow!/3dnow!+ support implies PREFETCH support, but
|
||||
* 3dnow! prefetch CPUID bit follows Intel spec (PREFETCHW, but not PREFETCH).
|
||||
*/
|
||||
isa.prefetch = !!(extended_info.edx & UINT32_C(0xC0000000));
|
||||
break;
|
||||
}
|
||||
|
||||
/*
|
||||
* PREFETCHW instruction:
|
||||
* - AMD: ecx[bit 8] of extended info (one of 3dnow! prefetch instructions).
|
||||
* - Intel: ecx[bit 8] of extended info (PREFETCHW instruction only).
|
||||
* - AMD: edx[bit 31] of extended info (implied by 3dnow! support). Reserved bit on Intel CPUs.
|
||||
* - AMD: edx[bit 30] of extended info (implied by 3dnow!+ support). Reserved bit on Intel CPUs.
|
||||
* - AMD: edx[bit 29] of extended info (x86-64 support). Does not imply PREFETCHW support on non-AMD CPUs!!!
|
||||
*/
|
||||
switch (vendor) {
|
||||
case cpuinfo_vendor_amd:
|
||||
case cpuinfo_vendor_hygon:
|
||||
isa.prefetchw = !!((extended_info.ecx & UINT32_C(0x00000100)) | (extended_info.edx & UINT32_C(0xE0000000)));
|
||||
break;
|
||||
default:
|
||||
/* Assume, that 3dnow!/3dnow!+ support implies PREFETCHW support, not implications from x86-64 support */
|
||||
isa.prefetchw = !!((extended_info.ecx & UINT32_C(0x00000100)) | (extended_info.edx & UINT32_C(0xC0000000)));
|
||||
break;
|
||||
}
|
||||
|
||||
/*
|
||||
* PREFETCHWT1 instruction:
|
||||
* - Intel: ecx[bit 0] of structured feature info (ecx = 0). Reserved bit on AMD.
|
||||
*/
|
||||
isa.prefetchwt1 = !!(structured_feature_info0.ecx & UINT32_C(0x00000001));
|
||||
|
||||
#if CPUINFO_ARCH_X86
|
||||
/*
|
||||
* SSE instructions:
|
||||
* - Intel, AMD: edx[bit 25] in basic info.
|
||||
*/
|
||||
isa.sse = !!(basic_info.edx & UINT32_C(0x02000000));
|
||||
|
||||
/*
|
||||
* SSE2 instructions:
|
||||
* - Intel, AMD: edx[bit 26] in basic info.
|
||||
*/
|
||||
isa.sse2 = !!(basic_info.edx & UINT32_C(0x04000000));
|
||||
#endif
|
||||
|
||||
/*
|
||||
* SSE3 instructions:
|
||||
* - Intel, AMD: ecx[bit 0] in basic info.
|
||||
*/
|
||||
isa.sse3 = !!(basic_info.ecx & UINT32_C(0x00000001));
|
||||
|
||||
#if CPUINFO_ARCH_X86
|
||||
/*
|
||||
* CPUs with x86-64 or SSE3 always support DAZ (denormals-as-zero) mode.
|
||||
* Only early Pentium 4 models may not support it.
|
||||
*/
|
||||
if (isa.sse3) {
|
||||
isa.daz = true;
|
||||
} else {
|
||||
/* Detect DAZ support from masked MXCSR bits */
|
||||
if (isa.sse && isa.fxsave) {
|
||||
struct fxsave_region region = { 0 };
|
||||
#ifdef _MSC_VER
|
||||
_fxsave(®ion);
|
||||
#else
|
||||
__asm__ __volatile__ ("fxsave %[region];" : [region] "+m" (region));
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Denormals-as-zero (DAZ) flag:
|
||||
* - Intel, AMD: MXCSR[bit 6]
|
||||
*/
|
||||
isa.daz = !!(region.mxcsr_mask & UINT32_C(0x00000040));
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* SSSE3 instructions:
|
||||
* - Intel, AMD: ecx[bit 9] in basic info.
|
||||
*/
|
||||
isa.ssse3 = !!(basic_info.ecx & UINT32_C(0x0000200));
|
||||
|
||||
|
||||
/*
|
||||
* SSE4.1 instructions:
|
||||
* - Intel, AMD: ecx[bit 19] in basic info.
|
||||
*/
|
||||
isa.sse4_1 = !!(basic_info.ecx & UINT32_C(0x00080000));
|
||||
|
||||
/*
|
||||
* SSE4.2 instructions:
|
||||
* - Intel: ecx[bit 20] in basic info (reserved bit on AMD CPUs).
|
||||
*/
|
||||
isa.sse4_2 = !!(basic_info.ecx & UINT32_C(0x00100000));
|
||||
|
||||
/*
|
||||
* SSE4A instructions:
|
||||
* - AMD: ecx[bit 6] in extended info (reserved bit on Intel CPUs).
|
||||
*/
|
||||
isa.sse4a = !!(extended_info.ecx & UINT32_C(0x00000040));
|
||||
|
||||
/*
|
||||
* Misaligned memory operands in SSE instructions:
|
||||
* - AMD: ecx[bit 7] in extended info (reserved bit on Intel CPUs).
|
||||
*/
|
||||
isa.misaligned_sse = !!(extended_info.ecx & UINT32_C(0x00000080));
|
||||
|
||||
/*
|
||||
* AVX instructions:
|
||||
* - Intel, AMD: ecx[bit 28] in basic info.
|
||||
*/
|
||||
isa.avx = avx_regs && !!(basic_info.ecx & UINT32_C(0x10000000));
|
||||
|
||||
/*
|
||||
* FMA3 instructions:
|
||||
* - Intel: ecx[bit 12] in basic info (reserved bit on AMD CPUs).
|
||||
*/
|
||||
isa.fma3 = avx_regs && !!(basic_info.ecx & UINT32_C(0x00001000));
|
||||
|
||||
/*
|
||||
* FMA4 instructions:
|
||||
* - AMD: ecx[bit 16] in extended info (reserved bit on Intel CPUs).
|
||||
*/
|
||||
isa.fma4 = avx_regs && !!(extended_info.ecx & UINT32_C(0x00010000));
|
||||
|
||||
/*
|
||||
* XOP instructions:
|
||||
* - AMD: ecx[bit 11] in extended info (reserved bit on Intel CPUs).
|
||||
*/
|
||||
isa.xop = avx_regs && !!(extended_info.ecx & UINT32_C(0x00000800));
|
||||
|
||||
/*
|
||||
* F16C instructions:
|
||||
* - Intel, AMD: ecx[bit 29] in basic info.
|
||||
*/
|
||||
isa.f16c = avx_regs && !!(basic_info.ecx & UINT32_C(0x20000000));
|
||||
|
||||
/*
|
||||
* AVX2 instructions:
|
||||
* - Intel: ebx[bit 5] in structured feature info (ecx = 0).
|
||||
*/
|
||||
isa.avx2 = avx_regs && !!(structured_feature_info0.ebx & UINT32_C(0x00000020));
|
||||
|
||||
/*
|
||||
* AVX512F instructions:
|
||||
* - Intel: ebx[bit 16] in structured feature info (ecx = 0).
|
||||
*/
|
||||
isa.avx512f = avx512_regs && !!(structured_feature_info0.ebx & UINT32_C(0x00010000));
|
||||
|
||||
/*
|
||||
* AVX512PF instructions:
|
||||
* - Intel: ebx[bit 26] in structured feature info (ecx = 0).
|
||||
*/
|
||||
isa.avx512pf = avx512_regs && !!(structured_feature_info0.ebx & UINT32_C(0x04000000));
|
||||
|
||||
/*
|
||||
* AVX512ER instructions:
|
||||
* - Intel: ebx[bit 27] in structured feature info (ecx = 0).
|
||||
*/
|
||||
isa.avx512er = avx512_regs && !!(structured_feature_info0.ebx & UINT32_C(0x08000000));
|
||||
|
||||
/*
|
||||
* AVX512CD instructions:
|
||||
* - Intel: ebx[bit 28] in structured feature info (ecx = 0).
|
||||
*/
|
||||
isa.avx512cd = avx512_regs && !!(structured_feature_info0.ebx & UINT32_C(0x10000000));
|
||||
|
||||
/*
|
||||
* AVX512DQ instructions:
|
||||
* - Intel: ebx[bit 17] in structured feature info (ecx = 0).
|
||||
*/
|
||||
isa.avx512dq = avx512_regs && !!(structured_feature_info0.ebx & UINT32_C(0x00020000));
|
||||
|
||||
/*
|
||||
* AVX512BW instructions:
|
||||
* - Intel: ebx[bit 30] in structured feature info (ecx = 0).
|
||||
*/
|
||||
isa.avx512bw = avx512_regs && !!(structured_feature_info0.ebx & UINT32_C(0x40000000));
|
||||
|
||||
/*
|
||||
* AVX512VL instructions:
|
||||
* - Intel: ebx[bit 31] in structured feature info (ecx = 0).
|
||||
*/
|
||||
isa.avx512vl = avx512_regs && !!(structured_feature_info0.ebx & UINT32_C(0x80000000));
|
||||
|
||||
/*
|
||||
* AVX512IFMA instructions:
|
||||
* - Intel: ebx[bit 21] in structured feature info (ecx = 0).
|
||||
*/
|
||||
isa.avx512ifma = avx512_regs && !!(structured_feature_info0.ebx & UINT32_C(0x00200000));
|
||||
|
||||
/*
|
||||
* AVX512VBMI instructions:
|
||||
* - Intel: ecx[bit 1] in structured feature info (ecx = 0).
|
||||
*/
|
||||
isa.avx512vbmi = avx512_regs && !!(structured_feature_info0.ecx & UINT32_C(0x00000002));
|
||||
|
||||
/*
|
||||
* AVX512VBMI2 instructions:
|
||||
* - Intel: ecx[bit 6] in structured feature info (ecx = 0).
|
||||
*/
|
||||
isa.avx512vbmi2 = avx512_regs && !!(structured_feature_info0.ecx & UINT32_C(0x00000040));
|
||||
|
||||
/*
|
||||
* AVX512BITALG instructions:
|
||||
* - Intel: ecx[bit 12] in structured feature info (ecx = 0).
|
||||
*/
|
||||
isa.avx512bitalg = avx512_regs && !!(structured_feature_info0.ecx & UINT32_C(0x00001000));
|
||||
|
||||
/*
|
||||
* AVX512VPOPCNTDQ instructions:
|
||||
* - Intel: ecx[bit 14] in structured feature info (ecx = 0).
|
||||
*/
|
||||
isa.avx512vpopcntdq = avx512_regs && !!(structured_feature_info0.ecx & UINT32_C(0x00004000));
|
||||
|
||||
/*
|
||||
* AVX512VNNI instructions:
|
||||
* - Intel: ecx[bit 11] in structured feature info (ecx = 0).
|
||||
*/
|
||||
isa.avx512vnni = avx512_regs && !!(structured_feature_info0.ecx & UINT32_C(0x00000800));
|
||||
|
||||
/*
|
||||
* AVX512_4VNNIW instructions:
|
||||
* - Intel: edx[bit 2] in structured feature info (ecx = 0).
|
||||
*/
|
||||
isa.avx512_4vnniw = avx512_regs && !!(structured_feature_info0.edx & UINT32_C(0x00000004));
|
||||
|
||||
/*
|
||||
* AVX512_4FMAPS instructions:
|
||||
* - Intel: edx[bit 3] in structured feature info (ecx = 0).
|
||||
*/
|
||||
isa.avx512_4fmaps = avx512_regs && !!(structured_feature_info0.edx & UINT32_C(0x00000008));
|
||||
|
||||
/*
|
||||
* AVX512_VP2INTERSECT instructions:
|
||||
* - Intel: edx[bit 8] in structured feature info (ecx = 0).
|
||||
*/
|
||||
isa.avx512vp2intersect = avx512_regs && !!(structured_feature_info0.edx & UINT32_C(0x00000100));
|
||||
|
||||
/*
|
||||
* AVX512_BF16 instructions:
|
||||
* - Intel: eax[bit 5] in structured feature info (ecx = 1).
|
||||
*/
|
||||
isa.avx512bf16 = avx512_regs && !!(structured_feature_info1.eax & UINT32_C(0x00000020));
|
||||
|
||||
/*
|
||||
* HLE instructions:
|
||||
* - Intel: ebx[bit 4] in structured feature info (ecx = 0).
|
||||
*/
|
||||
isa.hle = !!(structured_feature_info0.ebx & UINT32_C(0x00000010));
|
||||
|
||||
/*
|
||||
* RTM instructions:
|
||||
* - Intel: ebx[bit 11] in structured feature info (ecx = 0).
|
||||
*/
|
||||
isa.rtm = !!(structured_feature_info0.ebx & UINT32_C(0x00000800));
|
||||
|
||||
/*
|
||||
* XTEST instruction:
|
||||
* - Intel: either HLE or RTM is supported
|
||||
*/
|
||||
isa.xtest = isa.hle || isa.rtm;
|
||||
|
||||
/*
|
||||
* MPX registers and instructions:
|
||||
* - Intel: ebx[bit 14] in structured feature info (ecx = 0).
|
||||
*/
|
||||
isa.mpx = mpx_regs && !!(structured_feature_info0.ebx & UINT32_C(0x00004000));
|
||||
|
||||
#if CPUINFO_ARCH_X86
|
||||
/*
|
||||
* CMOV instructions:
|
||||
* - Intel, AMD: edx[bit 15] in basic info.
|
||||
* - AMD: edx[bit 15] in extended info (zero bit on Intel CPUs).
|
||||
*/
|
||||
isa.cmov = !!((basic_info.edx | extended_info.edx) & UINT32_C(0x00008000));
|
||||
|
||||
/*
|
||||
* CMPXCHG8B instruction:
|
||||
* - Intel, AMD: edx[bit 8] in basic info.
|
||||
* - AMD: edx[bit 8] in extended info (reserved bit on Intel CPUs).
|
||||
*/
|
||||
isa.cmpxchg8b = !!((basic_info.edx | extended_info.edx) & UINT32_C(0x00000100));
|
||||
#endif
|
||||
|
||||
/*
|
||||
* CMPXCHG16B instruction:
|
||||
* - Intel, AMD: ecx[bit 13] in basic info.
|
||||
*/
|
||||
isa.cmpxchg16b = !!(basic_info.ecx & UINT32_C(0x00002000));
|
||||
|
||||
/*
|
||||
* CLWB instruction:
|
||||
* - Intel: ebx[bit 24] in structured feature info (ecx = 0).
|
||||
*/
|
||||
isa.clwb = !!(structured_feature_info0.ebx & UINT32_C(0x01000000));
|
||||
|
||||
/*
|
||||
* MOVBE instruction:
|
||||
* - Intel: ecx[bit 22] in basic info.
|
||||
*/
|
||||
isa.movbe = !!(basic_info.ecx & UINT32_C(0x00400000));
|
||||
|
||||
#if CPUINFO_ARCH_X86_64
|
||||
/*
|
||||
* Some early x86-64 CPUs lack LAHF & SAHF instructions.
|
||||
* A special CPU feature bit must be checked to ensure their availability:
|
||||
* - Intel, AMD: ecx[bit 0] in extended info.
|
||||
*/
|
||||
isa.lahf_sahf = !!(extended_info.ecx & UINT32_C(0x00000001));
|
||||
#endif
|
||||
|
||||
/*
|
||||
* RDFSBASE/RDGSBASE/WRFSBASE/WRGSBASE instructions.
|
||||
* - Intel: ebx[bit 0] in structured feature info (ecx = 0).
|
||||
*/
|
||||
isa.fs_gs_base = !!(structured_feature_info0.ebx & UINT32_C(0x00000001));
|
||||
|
||||
/*
|
||||
* LZCNT instruction:
|
||||
* - Intel, AMD: ecx[bit 5] in extended info.
|
||||
*/
|
||||
isa.lzcnt = !!(extended_info.ecx & UINT32_C(0x00000020));
|
||||
|
||||
/*
|
||||
* POPCNT instruction:
|
||||
* - Intel, AMD: ecx[bit 23] in basic info.
|
||||
*/
|
||||
isa.popcnt = !!(basic_info.ecx & UINT32_C(0x00800000));
|
||||
|
||||
/*
|
||||
* TBM instructions:
|
||||
* - AMD: ecx[bit 21] in extended info (reserved bit on Intel CPUs).
|
||||
*/
|
||||
isa.tbm = !!(extended_info.ecx & UINT32_C(0x00200000));
|
||||
|
||||
/*
|
||||
* BMI instructions:
|
||||
* - Intel, AMD: ebx[bit 3] in structured feature info (ecx = 0).
|
||||
*/
|
||||
isa.bmi = !!(structured_feature_info0.ebx & UINT32_C(0x00000008));
|
||||
|
||||
/*
|
||||
* BMI2 instructions:
|
||||
* - Intel: ebx[bit 8] in structured feature info (ecx = 0).
|
||||
*/
|
||||
isa.bmi2 = !!(structured_feature_info0.ebx & UINT32_C(0x00000100));
|
||||
|
||||
/*
|
||||
* ADCX/ADOX instructions:
|
||||
* - Intel: ebx[bit 19] in structured feature info (ecx = 0).
|
||||
*/
|
||||
isa.adx = !!(structured_feature_info0.ebx & UINT32_C(0x00080000));
|
||||
|
||||
/*
|
||||
* AES instructions:
|
||||
* - Intel: ecx[bit 25] in basic info (reserved bit on AMD CPUs).
|
||||
*/
|
||||
isa.aes = !!(basic_info.ecx & UINT32_C(0x02000000));
|
||||
|
||||
/*
|
||||
* VAES instructions:
|
||||
* - Intel: ecx[bit 9] in structured feature info (ecx = 0).
|
||||
*/
|
||||
isa.vaes = !!(structured_feature_info0.ecx & UINT32_C(0x00000200));
|
||||
|
||||
/*
|
||||
* PCLMULQDQ instruction:
|
||||
* - Intel: ecx[bit 1] in basic info (reserved bit on AMD CPUs).
|
||||
*/
|
||||
isa.pclmulqdq = !!(basic_info.ecx & UINT32_C(0x00000002));
|
||||
|
||||
/*
|
||||
* VPCLMULQDQ instruction:
|
||||
* - Intel: ecx[bit 10] in structured feature info (ecx = 0).
|
||||
*/
|
||||
isa.vpclmulqdq = !!(structured_feature_info0.ecx & UINT32_C(0x00000400));
|
||||
|
||||
/*
|
||||
* GFNI instructions:
|
||||
* - Intel: ecx[bit 8] in structured feature info (ecx = 0).
|
||||
*/
|
||||
isa.gfni = !!(structured_feature_info0.ecx & UINT32_C(0x00000100));
|
||||
|
||||
/*
|
||||
* RDRAND instruction:
|
||||
* - Intel: ecx[bit 30] in basic info (reserved bit on AMD CPUs).
|
||||
*/
|
||||
isa.rdrand = !!(basic_info.ecx & UINT32_C(0x40000000));
|
||||
|
||||
/*
|
||||
* RDSEED instruction:
|
||||
* - Intel: ebx[bit 18] in structured feature info (ecx = 0).
|
||||
*/
|
||||
isa.rdseed = !!(structured_feature_info0.ebx & UINT32_C(0x00040000));
|
||||
|
||||
/*
|
||||
* SHA instructions:
|
||||
* - Intel: ebx[bit 29] in structured feature info (ecx = 0).
|
||||
*/
|
||||
isa.sha = !!(structured_feature_info0.ebx & UINT32_C(0x20000000));
|
||||
|
||||
if (vendor == cpuinfo_vendor_via) {
|
||||
const struct cpuid_regs padlock_meta_info = cpuid(UINT32_C(0xC0000000));
|
||||
const uint32_t max_padlock_index = padlock_meta_info.eax;
|
||||
const uint32_t padlock_info_index = UINT32_C(0xC0000001);
|
||||
if (max_padlock_index >= padlock_info_index) {
|
||||
const struct cpuid_regs padlock_info = cpuid(padlock_info_index);
|
||||
|
||||
/*
|
||||
* Padlock RNG extension:
|
||||
* - VIA: edx[bit 2] in padlock info = RNG exists on chip flag.
|
||||
* - VIA: edx[bit 3] in padlock info = RNG enabled by OS.
|
||||
*/
|
||||
const uint32_t padlock_rng_mask = UINT32_C(0x0000000C);
|
||||
isa.rng = (padlock_info.edx & padlock_rng_mask) == padlock_rng_mask;
|
||||
|
||||
/*
|
||||
* Padlock ACE extension:
|
||||
* - VIA: edx[bit 6] in padlock info = ACE exists on chip flag.
|
||||
* - VIA: edx[bit 7] in padlock info = ACE enabled by OS.
|
||||
*/
|
||||
const uint32_t padlock_ace_mask = UINT32_C(0x000000C0);
|
||||
isa.ace = (padlock_info.edx & padlock_ace_mask) == padlock_ace_mask;
|
||||
|
||||
/*
|
||||
* Padlock ACE 2 extension:
|
||||
* - VIA: edx[bit 8] in padlock info = ACE2 exists on chip flag.
|
||||
* - VIA: edx[bit 9] in padlock info = ACE 2 enabled by OS.
|
||||
*/
|
||||
const uint32_t padlock_ace2_mask = UINT32_C(0x00000300);
|
||||
isa.ace2 = (padlock_info.edx & padlock_ace2_mask) == padlock_ace2_mask;
|
||||
|
||||
/*
|
||||
* Padlock PHE extension:
|
||||
* - VIA: edx[bit 10] in padlock info = PHE exists on chip flag.
|
||||
* - VIA: edx[bit 11] in padlock info = PHE enabled by OS.
|
||||
*/
|
||||
const uint32_t padlock_phe_mask = UINT32_C(0x00000C00);
|
||||
isa.phe = (padlock_info.edx & padlock_phe_mask) == padlock_phe_mask;
|
||||
|
||||
/*
|
||||
* Padlock PMM extension:
|
||||
* - VIA: edx[bit 12] in padlock info = PMM exists on chip flag.
|
||||
* - VIA: edx[bit 13] in padlock info = PMM enabled by OS.
|
||||
*/
|
||||
const uint32_t padlock_pmm_mask = UINT32_C(0x00003000);
|
||||
isa.pmm = (padlock_info.edx & padlock_pmm_mask) == padlock_pmm_mask;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* LWP instructions:
|
||||
* - AMD: ecx[bit 15] in extended info (reserved bit on Intel CPUs).
|
||||
*/
|
||||
isa.lwp = !!(extended_info.ecx & UINT32_C(0x00008000));
|
||||
|
||||
/*
|
||||
* RDTSCP instruction:
|
||||
* - Intel, AMD: edx[bit 27] in extended info.
|
||||
*/
|
||||
isa.rdtscp = !!(extended_info.edx & UINT32_C(0x08000000));
|
||||
|
||||
/*
|
||||
* RDPID instruction:
|
||||
* - Intel: ecx[bit 22] in structured feature info (ecx = 0).
|
||||
*/
|
||||
isa.rdpid = !!(structured_feature_info0.ecx & UINT32_C(0x00400000));
|
||||
|
||||
return isa;
|
||||
}
|
20
dep/cpuinfo/src/x86/linux/api.h
Normal file
20
dep/cpuinfo/src/x86/linux/api.h
Normal file
@ -0,0 +1,20 @@
|
||||
#pragma once
|
||||
|
||||
#include <stdbool.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#include <cpuinfo.h>
|
||||
#include <cpuinfo/common.h>
|
||||
#include <x86/api.h>
|
||||
#include <linux/api.h>
|
||||
|
||||
|
||||
struct cpuinfo_x86_linux_processor {
|
||||
uint32_t apic_id;
|
||||
uint32_t linux_id;
|
||||
uint32_t flags;
|
||||
};
|
||||
|
||||
CPUINFO_INTERNAL bool cpuinfo_x86_linux_parse_proc_cpuinfo(
|
||||
uint32_t max_processors_count,
|
||||
struct cpuinfo_x86_linux_processor processors[restrict static max_processors_count]);
|
207
dep/cpuinfo/src/x86/linux/cpuinfo.c
Normal file
207
dep/cpuinfo/src/x86/linux/cpuinfo.c
Normal file
@ -0,0 +1,207 @@
|
||||
#include <stdbool.h>
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
#include <stddef.h>
|
||||
#include <string.h>
|
||||
|
||||
#include <linux/api.h>
|
||||
#include <x86/linux/api.h>
|
||||
#include <cpuinfo/log.h>
|
||||
|
||||
/*
|
||||
* Size, in chars, of the on-stack buffer used for parsing lines of /proc/cpuinfo.
|
||||
* This is also the limit on the length of a single line.
|
||||
*/
|
||||
#define BUFFER_SIZE 2048
|
||||
|
||||
|
||||
static uint32_t parse_processor_number(
|
||||
const char* processor_start,
|
||||
const char* processor_end)
|
||||
{
|
||||
const size_t processor_length = (size_t) (processor_end - processor_start);
|
||||
|
||||
if (processor_length == 0) {
|
||||
cpuinfo_log_warning("Processor number in /proc/cpuinfo is ignored: string is empty");
|
||||
return 0;
|
||||
}
|
||||
|
||||
uint32_t processor_number = 0;
|
||||
for (const char* digit_ptr = processor_start; digit_ptr != processor_end; digit_ptr++) {
|
||||
const uint32_t digit = (uint32_t) (*digit_ptr - '0');
|
||||
if (digit > 10) {
|
||||
cpuinfo_log_warning("non-decimal suffix %.*s in /proc/cpuinfo processor number is ignored",
|
||||
(int) (processor_end - digit_ptr), digit_ptr);
|
||||
break;
|
||||
}
|
||||
|
||||
processor_number = processor_number * 10 + digit;
|
||||
}
|
||||
|
||||
return processor_number;
|
||||
}
|
||||
|
||||
/*
|
||||
* Decode APIC ID reported by Linux kernel for x86/x86-64 architecture.
|
||||
* Example of APIC ID reported in /proc/cpuinfo:
|
||||
*
|
||||
* apicid : 2
|
||||
*/
|
||||
static void parse_apic_id(
|
||||
const char* apic_start,
|
||||
const char* apic_end,
|
||||
struct cpuinfo_x86_linux_processor processor[restrict static 1])
|
||||
{
|
||||
uint32_t apic_id = 0;
|
||||
for (const char* digit_ptr = apic_start; digit_ptr != apic_end; digit_ptr++) {
|
||||
const uint32_t digit = *digit_ptr - '0';
|
||||
if (digit >= 10) {
|
||||
cpuinfo_log_warning("APIC ID %.*s in /proc/cpuinfo is ignored due to unexpected non-digit character '%c' at offset %zu",
|
||||
(int) (apic_end - apic_start), apic_start,
|
||||
*digit_ptr, (size_t) (digit_ptr - apic_start));
|
||||
return;
|
||||
}
|
||||
|
||||
apic_id = apic_id * 10 + digit;
|
||||
}
|
||||
|
||||
processor->apic_id = apic_id;
|
||||
processor->flags |= CPUINFO_LINUX_FLAG_APIC_ID;
|
||||
}
|
||||
|
||||
struct proc_cpuinfo_parser_state {
|
||||
uint32_t processor_index;
|
||||
uint32_t max_processors_count;
|
||||
struct cpuinfo_x86_linux_processor* processors;
|
||||
struct cpuinfo_x86_linux_processor dummy_processor;
|
||||
};
|
||||
|
||||
/*
|
||||
* Decode a single line of /proc/cpuinfo information.
|
||||
* Lines have format <words-with-spaces>[ ]*:[ ]<space-separated words>
|
||||
*/
|
||||
static bool parse_line(
|
||||
const char* line_start,
|
||||
const char* line_end,
|
||||
struct proc_cpuinfo_parser_state state[restrict static 1],
|
||||
uint64_t line_number)
|
||||
{
|
||||
/* Empty line. Skip. */
|
||||
if (line_start == line_end) {
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Search for ':' on the line. */
|
||||
const char* separator = line_start;
|
||||
for (; separator != line_end; separator++) {
|
||||
if (*separator == ':') {
|
||||
break;
|
||||
}
|
||||
}
|
||||
/* Skip line if no ':' separator was found. */
|
||||
if (separator == line_end) {
|
||||
cpuinfo_log_info("Line %.*s in /proc/cpuinfo is ignored: key/value separator ':' not found",
|
||||
(int) (line_end - line_start), line_start);
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Skip trailing spaces in key part. */
|
||||
const char* key_end = separator;
|
||||
for (; key_end != line_start; key_end--) {
|
||||
if (key_end[-1] != ' ' && key_end[-1] != '\t') {
|
||||
break;
|
||||
}
|
||||
}
|
||||
/* Skip line if key contains nothing but spaces. */
|
||||
if (key_end == line_start) {
|
||||
cpuinfo_log_info("Line %.*s in /proc/cpuinfo is ignored: key contains only spaces",
|
||||
(int) (line_end - line_start), line_start);
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Skip leading spaces in value part. */
|
||||
const char* value_start = separator + 1;
|
||||
for (; value_start != line_end; value_start++) {
|
||||
if (*value_start != ' ') {
|
||||
break;
|
||||
}
|
||||
}
|
||||
/* Value part contains nothing but spaces. Skip line. */
|
||||
if (value_start == line_end) {
|
||||
cpuinfo_log_info("Line %.*s in /proc/cpuinfo is ignored: value contains only spaces",
|
||||
(int) (line_end - line_start), line_start);
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Skip trailing spaces in value part (if any) */
|
||||
const char* value_end = line_end;
|
||||
for (; value_end != value_start; value_end--) {
|
||||
if (value_end[-1] != ' ') {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
const uint32_t processor_index = state->processor_index;
|
||||
const uint32_t max_processors_count = state->max_processors_count;
|
||||
struct cpuinfo_x86_linux_processor* processors = state->processors;
|
||||
struct cpuinfo_x86_linux_processor* processor = &state->dummy_processor;
|
||||
if (processor_index < max_processors_count) {
|
||||
processor = &processors[processor_index];
|
||||
}
|
||||
|
||||
const size_t key_length = key_end - line_start;
|
||||
switch (key_length) {
|
||||
case 6:
|
||||
if (memcmp(line_start, "apicid", key_length) == 0) {
|
||||
parse_apic_id(value_start, value_end, processor);
|
||||
} else {
|
||||
goto unknown;
|
||||
}
|
||||
break;
|
||||
case 9:
|
||||
if (memcmp(line_start, "processor", key_length) == 0) {
|
||||
const uint32_t new_processor_index = parse_processor_number(value_start, value_end);
|
||||
if (new_processor_index < processor_index) {
|
||||
/* Strange: decreasing processor number */
|
||||
cpuinfo_log_warning(
|
||||
"unexpectedly low processor number %"PRIu32" following processor %"PRIu32" in /proc/cpuinfo",
|
||||
new_processor_index, processor_index);
|
||||
} else if (new_processor_index > processor_index + 1) {
|
||||
/* Strange, but common: skipped processor $(processor_index + 1) */
|
||||
cpuinfo_log_info(
|
||||
"unexpectedly high processor number %"PRIu32" following processor %"PRIu32" in /proc/cpuinfo",
|
||||
new_processor_index, processor_index);
|
||||
}
|
||||
if (new_processor_index >= max_processors_count) {
|
||||
/* Log and ignore processor */
|
||||
cpuinfo_log_warning("processor %"PRIu32" in /proc/cpuinfo is ignored: index exceeds system limit %"PRIu32,
|
||||
new_processor_index, max_processors_count - 1);
|
||||
} else {
|
||||
processors[new_processor_index].flags |= CPUINFO_LINUX_FLAG_PROC_CPUINFO;
|
||||
}
|
||||
state->processor_index = new_processor_index;
|
||||
return true;
|
||||
} else {
|
||||
goto unknown;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
unknown:
|
||||
cpuinfo_log_debug("unknown /proc/cpuinfo key: %.*s", (int) key_length, line_start);
|
||||
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool cpuinfo_x86_linux_parse_proc_cpuinfo(
|
||||
uint32_t max_processors_count,
|
||||
struct cpuinfo_x86_linux_processor processors[restrict static max_processors_count])
|
||||
{
|
||||
struct proc_cpuinfo_parser_state state = {
|
||||
.processor_index = 0,
|
||||
.max_processors_count = max_processors_count,
|
||||
.processors = processors,
|
||||
};
|
||||
return cpuinfo_linux_parse_multiline_file("/proc/cpuinfo", BUFFER_SIZE,
|
||||
(cpuinfo_line_callback) parse_line, &state);
|
||||
}
|
629
dep/cpuinfo/src/x86/linux/init.c
Normal file
629
dep/cpuinfo/src/x86/linux/init.c
Normal file
@ -0,0 +1,629 @@
|
||||
#include <stdint.h>
|
||||
#include <stddef.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include <cpuinfo.h>
|
||||
#include <x86/api.h>
|
||||
#include <x86/linux/api.h>
|
||||
#include <linux/api.h>
|
||||
#include <cpuinfo/internal-api.h>
|
||||
#include <cpuinfo/log.h>
|
||||
|
||||
|
||||
static inline uint32_t bit_mask(uint32_t bits) {
|
||||
return (UINT32_C(1) << bits) - UINT32_C(1);
|
||||
}
|
||||
|
||||
static inline bool bitmask_all(uint32_t bitfield, uint32_t mask) {
|
||||
return (bitfield & mask) == mask;
|
||||
}
|
||||
|
||||
static inline uint32_t min(uint32_t a, uint32_t b) {
|
||||
return a < b ? a : b;
|
||||
}
|
||||
|
||||
static inline int cmp(uint32_t a, uint32_t b) {
|
||||
return (a > b) - (a < b);
|
||||
}
|
||||
|
||||
static int cmp_x86_linux_processor(const void* ptr_a, const void* ptr_b) {
|
||||
const struct cpuinfo_x86_linux_processor* processor_a = (const struct cpuinfo_x86_linux_processor*) ptr_a;
|
||||
const struct cpuinfo_x86_linux_processor* processor_b = (const struct cpuinfo_x86_linux_processor*) ptr_b;
|
||||
|
||||
/* Move usable processors towards the start of the array */
|
||||
const bool usable_a = bitmask_all(processor_a->flags, CPUINFO_LINUX_FLAG_VALID);
|
||||
const bool usable_b = bitmask_all(processor_b->flags, CPUINFO_LINUX_FLAG_VALID);
|
||||
if (usable_a != usable_b) {
|
||||
return (int) usable_b - (int) usable_a;
|
||||
}
|
||||
|
||||
/* Compare based on APIC ID (i.e. processor 0 < processor 1) */
|
||||
const uint32_t id_a = processor_a->apic_id;
|
||||
const uint32_t id_b = processor_b->apic_id;
|
||||
return cmp(id_a, id_b);
|
||||
}
|
||||
|
||||
static void cpuinfo_x86_count_objects(
|
||||
uint32_t linux_processors_count,
|
||||
const struct cpuinfo_x86_linux_processor linux_processors[restrict static linux_processors_count],
|
||||
const struct cpuinfo_x86_processor processor[restrict static 1],
|
||||
uint32_t valid_processor_mask,
|
||||
uint32_t llc_apic_bits,
|
||||
uint32_t cores_count_ptr[restrict static 1],
|
||||
uint32_t clusters_count_ptr[restrict static 1],
|
||||
uint32_t packages_count_ptr[restrict static 1],
|
||||
uint32_t l1i_count_ptr[restrict static 1],
|
||||
uint32_t l1d_count_ptr[restrict static 1],
|
||||
uint32_t l2_count_ptr[restrict static 1],
|
||||
uint32_t l3_count_ptr[restrict static 1],
|
||||
uint32_t l4_count_ptr[restrict static 1])
|
||||
{
|
||||
const uint32_t core_apic_mask =
|
||||
~(bit_mask(processor->topology.thread_bits_length) << processor->topology.thread_bits_offset);
|
||||
const uint32_t package_apic_mask =
|
||||
core_apic_mask & ~(bit_mask(processor->topology.core_bits_length) << processor->topology.core_bits_offset);
|
||||
const uint32_t llc_apic_mask = ~bit_mask(llc_apic_bits);
|
||||
const uint32_t cluster_apic_mask = package_apic_mask | llc_apic_mask;
|
||||
|
||||
uint32_t cores_count = 0, clusters_count = 0, packages_count = 0;
|
||||
uint32_t l1i_count = 0, l1d_count = 0, l2_count = 0, l3_count = 0, l4_count = 0;
|
||||
uint32_t last_core_id = UINT32_MAX, last_cluster_id = UINT32_MAX, last_package_id = UINT32_MAX;
|
||||
uint32_t last_l1i_id = UINT32_MAX, last_l1d_id = UINT32_MAX;
|
||||
uint32_t last_l2_id = UINT32_MAX, last_l3_id = UINT32_MAX, last_l4_id = UINT32_MAX;
|
||||
for (uint32_t i = 0; i < linux_processors_count; i++) {
|
||||
if (bitmask_all(linux_processors[i].flags, valid_processor_mask)) {
|
||||
const uint32_t apic_id = linux_processors[i].apic_id;
|
||||
cpuinfo_log_debug("APID ID %"PRIu32": system processor %"PRIu32, apic_id, linux_processors[i].linux_id);
|
||||
|
||||
/* All bits of APIC ID except thread ID mask */
|
||||
const uint32_t core_id = apic_id & core_apic_mask;
|
||||
if (core_id != last_core_id) {
|
||||
last_core_id = core_id;
|
||||
cores_count++;
|
||||
}
|
||||
/* All bits of APIC ID except thread ID and core ID masks */
|
||||
const uint32_t package_id = apic_id & package_apic_mask;
|
||||
if (package_id != last_package_id) {
|
||||
last_package_id = package_id;
|
||||
packages_count++;
|
||||
}
|
||||
/* Bits of APIC ID which are part of either LLC or package ID mask */
|
||||
const uint32_t cluster_id = apic_id & cluster_apic_mask;
|
||||
if (cluster_id != last_cluster_id) {
|
||||
last_cluster_id = cluster_id;
|
||||
clusters_count++;
|
||||
}
|
||||
if (processor->cache.l1i.size != 0) {
|
||||
const uint32_t l1i_id = apic_id & ~bit_mask(processor->cache.l1i.apic_bits);
|
||||
if (l1i_id != last_l1i_id) {
|
||||
last_l1i_id = l1i_id;
|
||||
l1i_count++;
|
||||
}
|
||||
}
|
||||
if (processor->cache.l1d.size != 0) {
|
||||
const uint32_t l1d_id = apic_id & ~bit_mask(processor->cache.l1d.apic_bits);
|
||||
if (l1d_id != last_l1d_id) {
|
||||
last_l1d_id = l1d_id;
|
||||
l1d_count++;
|
||||
}
|
||||
}
|
||||
if (processor->cache.l2.size != 0) {
|
||||
const uint32_t l2_id = apic_id & ~bit_mask(processor->cache.l2.apic_bits);
|
||||
if (l2_id != last_l2_id) {
|
||||
last_l2_id = l2_id;
|
||||
l2_count++;
|
||||
}
|
||||
}
|
||||
if (processor->cache.l3.size != 0) {
|
||||
const uint32_t l3_id = apic_id & ~bit_mask(processor->cache.l3.apic_bits);
|
||||
if (l3_id != last_l3_id) {
|
||||
last_l3_id = l3_id;
|
||||
l3_count++;
|
||||
}
|
||||
}
|
||||
if (processor->cache.l4.size != 0) {
|
||||
const uint32_t l4_id = apic_id & ~bit_mask(processor->cache.l4.apic_bits);
|
||||
if (l4_id != last_l4_id) {
|
||||
last_l4_id = l4_id;
|
||||
l4_count++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
*cores_count_ptr = cores_count;
|
||||
*clusters_count_ptr = clusters_count;
|
||||
*packages_count_ptr = packages_count;
|
||||
*l1i_count_ptr = l1i_count;
|
||||
*l1d_count_ptr = l1d_count;
|
||||
*l2_count_ptr = l2_count;
|
||||
*l3_count_ptr = l3_count;
|
||||
*l4_count_ptr = l4_count;
|
||||
}
|
||||
|
||||
void cpuinfo_x86_linux_init(void) {
|
||||
struct cpuinfo_x86_linux_processor* x86_linux_processors = NULL;
|
||||
struct cpuinfo_processor* processors = NULL;
|
||||
struct cpuinfo_core* cores = NULL;
|
||||
struct cpuinfo_cluster* clusters = NULL;
|
||||
struct cpuinfo_package* packages = NULL;
|
||||
const struct cpuinfo_processor** linux_cpu_to_processor_map = NULL;
|
||||
const struct cpuinfo_core** linux_cpu_to_core_map = NULL;
|
||||
struct cpuinfo_cache* l1i = NULL;
|
||||
struct cpuinfo_cache* l1d = NULL;
|
||||
struct cpuinfo_cache* l2 = NULL;
|
||||
struct cpuinfo_cache* l3 = NULL;
|
||||
struct cpuinfo_cache* l4 = NULL;
|
||||
|
||||
const uint32_t max_processors_count = cpuinfo_linux_get_max_processors_count();
|
||||
cpuinfo_log_debug("system maximum processors count: %"PRIu32, max_processors_count);
|
||||
|
||||
const uint32_t max_possible_processors_count = 1 +
|
||||
cpuinfo_linux_get_max_possible_processor(max_processors_count);
|
||||
cpuinfo_log_debug("maximum possible processors count: %"PRIu32, max_possible_processors_count);
|
||||
const uint32_t max_present_processors_count = 1 +
|
||||
cpuinfo_linux_get_max_present_processor(max_processors_count);
|
||||
cpuinfo_log_debug("maximum present processors count: %"PRIu32, max_present_processors_count);
|
||||
|
||||
uint32_t valid_processor_mask = 0;
|
||||
uint32_t x86_linux_processors_count = max_processors_count;
|
||||
if (max_present_processors_count != 0) {
|
||||
x86_linux_processors_count = min(x86_linux_processors_count, max_present_processors_count);
|
||||
valid_processor_mask = CPUINFO_LINUX_FLAG_PRESENT;
|
||||
} else {
|
||||
valid_processor_mask = CPUINFO_LINUX_FLAG_PROC_CPUINFO;
|
||||
}
|
||||
if (max_possible_processors_count != 0) {
|
||||
x86_linux_processors_count = min(x86_linux_processors_count, max_possible_processors_count);
|
||||
valid_processor_mask |= CPUINFO_LINUX_FLAG_POSSIBLE;
|
||||
}
|
||||
|
||||
x86_linux_processors = calloc(x86_linux_processors_count, sizeof(struct cpuinfo_x86_linux_processor));
|
||||
if (x86_linux_processors == NULL) {
|
||||
cpuinfo_log_error(
|
||||
"failed to allocate %zu bytes for descriptions of %"PRIu32" x86 logical processors",
|
||||
x86_linux_processors_count * sizeof(struct cpuinfo_x86_linux_processor),
|
||||
x86_linux_processors_count);
|
||||
return;
|
||||
}
|
||||
|
||||
if (max_possible_processors_count != 0) {
|
||||
cpuinfo_linux_detect_possible_processors(
|
||||
x86_linux_processors_count, &x86_linux_processors->flags,
|
||||
sizeof(struct cpuinfo_x86_linux_processor),
|
||||
CPUINFO_LINUX_FLAG_POSSIBLE);
|
||||
}
|
||||
|
||||
if (max_present_processors_count != 0) {
|
||||
cpuinfo_linux_detect_present_processors(
|
||||
x86_linux_processors_count, &x86_linux_processors->flags,
|
||||
sizeof(struct cpuinfo_x86_linux_processor),
|
||||
CPUINFO_LINUX_FLAG_PRESENT);
|
||||
}
|
||||
|
||||
if (!cpuinfo_x86_linux_parse_proc_cpuinfo(x86_linux_processors_count, x86_linux_processors)) {
|
||||
cpuinfo_log_error("failed to parse processor information from /proc/cpuinfo");
|
||||
return;
|
||||
}
|
||||
|
||||
for (uint32_t i = 0; i < x86_linux_processors_count; i++) {
|
||||
if (bitmask_all(x86_linux_processors[i].flags, valid_processor_mask)) {
|
||||
x86_linux_processors[i].flags |= CPUINFO_LINUX_FLAG_VALID;
|
||||
}
|
||||
}
|
||||
|
||||
struct cpuinfo_x86_processor x86_processor;
|
||||
memset(&x86_processor, 0, sizeof(x86_processor));
|
||||
cpuinfo_x86_init_processor(&x86_processor);
|
||||
char brand_string[48];
|
||||
cpuinfo_x86_normalize_brand_string(x86_processor.brand_string, brand_string);
|
||||
|
||||
uint32_t processors_count = 0;
|
||||
for (uint32_t i = 0; i < x86_linux_processors_count; i++) {
|
||||
if (bitmask_all(x86_linux_processors[i].flags, CPUINFO_LINUX_FLAG_VALID)) {
|
||||
x86_linux_processors[i].linux_id = i;
|
||||
processors_count++;
|
||||
}
|
||||
}
|
||||
|
||||
qsort(x86_linux_processors, x86_linux_processors_count, sizeof(struct cpuinfo_x86_linux_processor),
|
||||
cmp_x86_linux_processor);
|
||||
|
||||
processors = calloc(processors_count, sizeof(struct cpuinfo_processor));
|
||||
if (processors == NULL) {
|
||||
cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" logical processors",
|
||||
processors_count * sizeof(struct cpuinfo_processor), processors_count);
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
uint32_t llc_apic_bits = 0;
|
||||
if (x86_processor.cache.l4.size != 0) {
|
||||
llc_apic_bits = x86_processor.cache.l4.apic_bits;
|
||||
} else if (x86_processor.cache.l3.size != 0) {
|
||||
llc_apic_bits = x86_processor.cache.l3.apic_bits;
|
||||
} else if (x86_processor.cache.l2.size != 0) {
|
||||
llc_apic_bits = x86_processor.cache.l2.apic_bits;
|
||||
} else if (x86_processor.cache.l1d.size != 0) {
|
||||
llc_apic_bits = x86_processor.cache.l1d.apic_bits;
|
||||
}
|
||||
uint32_t packages_count = 0, clusters_count = 0, cores_count = 0;
|
||||
uint32_t l1i_count = 0, l1d_count = 0, l2_count = 0, l3_count = 0, l4_count = 0;
|
||||
cpuinfo_x86_count_objects(
|
||||
x86_linux_processors_count, x86_linux_processors, &x86_processor, valid_processor_mask, llc_apic_bits,
|
||||
&cores_count, &clusters_count, &packages_count, &l1i_count, &l1d_count, &l2_count, &l3_count, &l4_count);
|
||||
|
||||
cpuinfo_log_debug("detected %"PRIu32" cores", cores_count);
|
||||
cpuinfo_log_debug("detected %"PRIu32" clusters", clusters_count);
|
||||
cpuinfo_log_debug("detected %"PRIu32" packages", packages_count);
|
||||
cpuinfo_log_debug("detected %"PRIu32" L1I caches", l1i_count);
|
||||
cpuinfo_log_debug("detected %"PRIu32" L1D caches", l1d_count);
|
||||
cpuinfo_log_debug("detected %"PRIu32" L2 caches", l2_count);
|
||||
cpuinfo_log_debug("detected %"PRIu32" L3 caches", l3_count);
|
||||
cpuinfo_log_debug("detected %"PRIu32" L4 caches", l4_count);
|
||||
|
||||
linux_cpu_to_processor_map = calloc(x86_linux_processors_count, sizeof(struct cpuinfo_processor*));
|
||||
if (linux_cpu_to_processor_map == NULL) {
|
||||
cpuinfo_log_error("failed to allocate %zu bytes for mapping entries of %"PRIu32" logical processors",
|
||||
x86_linux_processors_count * sizeof(struct cpuinfo_processor*),
|
||||
x86_linux_processors_count);
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
linux_cpu_to_core_map = calloc(x86_linux_processors_count, sizeof(struct cpuinfo_core*));
|
||||
if (linux_cpu_to_core_map == NULL) {
|
||||
cpuinfo_log_error("failed to allocate %zu bytes for mapping entries of %"PRIu32" cores",
|
||||
x86_linux_processors_count * sizeof(struct cpuinfo_core*),
|
||||
x86_linux_processors_count);
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
cores = calloc(cores_count, sizeof(struct cpuinfo_core));
|
||||
if (cores == NULL) {
|
||||
cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" cores",
|
||||
cores_count * sizeof(struct cpuinfo_core), cores_count);
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
clusters = calloc(clusters_count, sizeof(struct cpuinfo_cluster));
|
||||
if (clusters == NULL) {
|
||||
cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" core clusters",
|
||||
clusters_count * sizeof(struct cpuinfo_cluster), clusters_count);
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
packages = calloc(packages_count, sizeof(struct cpuinfo_package));
|
||||
if (packages == NULL) {
|
||||
cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" physical packages",
|
||||
packages_count * sizeof(struct cpuinfo_package), packages_count);
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
if (l1i_count != 0) {
|
||||
l1i = calloc(l1i_count, sizeof(struct cpuinfo_cache));
|
||||
if (l1i == NULL) {
|
||||
cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" L1I caches",
|
||||
l1i_count * sizeof(struct cpuinfo_cache), l1i_count);
|
||||
goto cleanup;
|
||||
}
|
||||
}
|
||||
if (l1d_count != 0) {
|
||||
l1d = calloc(l1d_count, sizeof(struct cpuinfo_cache));
|
||||
if (l1d == NULL) {
|
||||
cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" L1D caches",
|
||||
l1d_count * sizeof(struct cpuinfo_cache), l1d_count);
|
||||
goto cleanup;
|
||||
}
|
||||
}
|
||||
if (l2_count != 0) {
|
||||
l2 = calloc(l2_count, sizeof(struct cpuinfo_cache));
|
||||
if (l2 == NULL) {
|
||||
cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" L2 caches",
|
||||
l2_count * sizeof(struct cpuinfo_cache), l2_count);
|
||||
goto cleanup;
|
||||
}
|
||||
}
|
||||
if (l3_count != 0) {
|
||||
l3 = calloc(l3_count, sizeof(struct cpuinfo_cache));
|
||||
if (l3 == NULL) {
|
||||
cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" L3 caches",
|
||||
l3_count * sizeof(struct cpuinfo_cache), l3_count);
|
||||
goto cleanup;
|
||||
}
|
||||
}
|
||||
if (l4_count != 0) {
|
||||
l4 = calloc(l4_count, sizeof(struct cpuinfo_cache));
|
||||
if (l4 == NULL) {
|
||||
cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" L4 caches",
|
||||
l4_count * sizeof(struct cpuinfo_cache), l4_count);
|
||||
goto cleanup;
|
||||
}
|
||||
}
|
||||
|
||||
const uint32_t core_apic_mask =
|
||||
~(bit_mask(x86_processor.topology.thread_bits_length) << x86_processor.topology.thread_bits_offset);
|
||||
const uint32_t package_apic_mask =
|
||||
core_apic_mask & ~(bit_mask(x86_processor.topology.core_bits_length) << x86_processor.topology.core_bits_offset);
|
||||
const uint32_t llc_apic_mask = ~bit_mask(llc_apic_bits);
|
||||
const uint32_t cluster_apic_mask = package_apic_mask | llc_apic_mask;
|
||||
|
||||
uint32_t processor_index = UINT32_MAX, core_index = UINT32_MAX, cluster_index = UINT32_MAX, package_index = UINT32_MAX;
|
||||
uint32_t l1i_index = UINT32_MAX, l1d_index = UINT32_MAX, l2_index = UINT32_MAX, l3_index = UINT32_MAX, l4_index = UINT32_MAX;
|
||||
uint32_t cluster_id = 0, core_id = 0, smt_id = 0;
|
||||
uint32_t last_apic_core_id = UINT32_MAX, last_apic_cluster_id = UINT32_MAX, last_apic_package_id = UINT32_MAX;
|
||||
uint32_t last_l1i_id = UINT32_MAX, last_l1d_id = UINT32_MAX;
|
||||
uint32_t last_l2_id = UINT32_MAX, last_l3_id = UINT32_MAX, last_l4_id = UINT32_MAX;
|
||||
for (uint32_t i = 0; i < x86_linux_processors_count; i++) {
|
||||
if (bitmask_all(x86_linux_processors[i].flags, CPUINFO_LINUX_FLAG_VALID)) {
|
||||
const uint32_t apic_id = x86_linux_processors[i].apic_id;
|
||||
processor_index++;
|
||||
smt_id++;
|
||||
|
||||
/* All bits of APIC ID except thread ID mask */
|
||||
const uint32_t apid_core_id = apic_id & core_apic_mask;
|
||||
if (apid_core_id != last_apic_core_id) {
|
||||
core_index++;
|
||||
core_id++;
|
||||
smt_id = 0;
|
||||
}
|
||||
/* Bits of APIC ID which are part of either LLC or package ID mask */
|
||||
const uint32_t apic_cluster_id = apic_id & cluster_apic_mask;
|
||||
if (apic_cluster_id != last_apic_cluster_id) {
|
||||
cluster_index++;
|
||||
cluster_id++;
|
||||
}
|
||||
/* All bits of APIC ID except thread ID and core ID masks */
|
||||
const uint32_t apic_package_id = apic_id & package_apic_mask;
|
||||
if (apic_package_id != last_apic_package_id) {
|
||||
package_index++;
|
||||
core_id = 0;
|
||||
cluster_id = 0;
|
||||
}
|
||||
|
||||
/* Initialize logical processor object */
|
||||
processors[processor_index].smt_id = smt_id;
|
||||
processors[processor_index].core = cores + core_index;
|
||||
processors[processor_index].cluster = clusters + cluster_index;
|
||||
processors[processor_index].package = packages + package_index;
|
||||
processors[processor_index].linux_id = x86_linux_processors[i].linux_id;
|
||||
processors[processor_index].apic_id = x86_linux_processors[i].apic_id;
|
||||
|
||||
if (apid_core_id != last_apic_core_id) {
|
||||
/* new core */
|
||||
cores[core_index] = (struct cpuinfo_core) {
|
||||
.processor_start = processor_index,
|
||||
.processor_count = 1,
|
||||
.core_id = core_id,
|
||||
.cluster = clusters + cluster_index,
|
||||
.package = packages + package_index,
|
||||
.vendor = x86_processor.vendor,
|
||||
.uarch = x86_processor.uarch,
|
||||
.cpuid = x86_processor.cpuid,
|
||||
};
|
||||
clusters[cluster_index].core_count += 1;
|
||||
packages[package_index].core_count += 1;
|
||||
last_apic_core_id = apid_core_id;
|
||||
} else {
|
||||
/* another logical processor on the same core */
|
||||
cores[core_index].processor_count++;
|
||||
}
|
||||
|
||||
if (apic_cluster_id != last_apic_cluster_id) {
|
||||
/* new cluster */
|
||||
clusters[cluster_index].processor_start = processor_index;
|
||||
clusters[cluster_index].processor_count = 1;
|
||||
clusters[cluster_index].core_start = core_index;
|
||||
clusters[cluster_index].cluster_id = cluster_id;
|
||||
clusters[cluster_index].package = packages + package_index;
|
||||
clusters[cluster_index].vendor = x86_processor.vendor;
|
||||
clusters[cluster_index].uarch = x86_processor.uarch;
|
||||
clusters[cluster_index].cpuid = x86_processor.cpuid;
|
||||
packages[package_index].cluster_count += 1;
|
||||
last_apic_cluster_id = apic_cluster_id;
|
||||
} else {
|
||||
/* another logical processor on the same cluster */
|
||||
clusters[cluster_index].processor_count++;
|
||||
}
|
||||
|
||||
if (apic_package_id != last_apic_package_id) {
|
||||
/* new package */
|
||||
packages[package_index].processor_start = processor_index;
|
||||
packages[package_index].processor_count = 1;
|
||||
packages[package_index].core_start = core_index;
|
||||
packages[package_index].cluster_start = cluster_index;
|
||||
cpuinfo_x86_format_package_name(x86_processor.vendor, brand_string, packages[package_index].name);
|
||||
last_apic_package_id = apic_package_id;
|
||||
} else {
|
||||
/* another logical processor on the same package */
|
||||
packages[package_index].processor_count++;
|
||||
}
|
||||
|
||||
linux_cpu_to_processor_map[x86_linux_processors[i].linux_id] = processors + processor_index;
|
||||
linux_cpu_to_core_map[x86_linux_processors[i].linux_id] = cores + core_index;
|
||||
|
||||
if (x86_processor.cache.l1i.size != 0) {
|
||||
const uint32_t l1i_id = apic_id & ~bit_mask(x86_processor.cache.l1i.apic_bits);
|
||||
processors[i].cache.l1i = &l1i[l1i_index];
|
||||
if (l1i_id != last_l1i_id) {
|
||||
/* new cache */
|
||||
last_l1i_id = l1i_id;
|
||||
l1i[++l1i_index] = (struct cpuinfo_cache) {
|
||||
.size = x86_processor.cache.l1i.size,
|
||||
.associativity = x86_processor.cache.l1i.associativity,
|
||||
.sets = x86_processor.cache.l1i.sets,
|
||||
.partitions = x86_processor.cache.l1i.partitions,
|
||||
.line_size = x86_processor.cache.l1i.line_size,
|
||||
.flags = x86_processor.cache.l1i.flags,
|
||||
.processor_start = processor_index,
|
||||
.processor_count = 1,
|
||||
};
|
||||
} else {
|
||||
/* another processor sharing the same cache */
|
||||
l1i[l1i_index].processor_count += 1;
|
||||
}
|
||||
processors[i].cache.l1i = &l1i[l1i_index];
|
||||
} else {
|
||||
/* reset cache id */
|
||||
last_l1i_id = UINT32_MAX;
|
||||
}
|
||||
if (x86_processor.cache.l1d.size != 0) {
|
||||
const uint32_t l1d_id = apic_id & ~bit_mask(x86_processor.cache.l1d.apic_bits);
|
||||
processors[i].cache.l1d = &l1d[l1d_index];
|
||||
if (l1d_id != last_l1d_id) {
|
||||
/* new cache */
|
||||
last_l1d_id = l1d_id;
|
||||
l1d[++l1d_index] = (struct cpuinfo_cache) {
|
||||
.size = x86_processor.cache.l1d.size,
|
||||
.associativity = x86_processor.cache.l1d.associativity,
|
||||
.sets = x86_processor.cache.l1d.sets,
|
||||
.partitions = x86_processor.cache.l1d.partitions,
|
||||
.line_size = x86_processor.cache.l1d.line_size,
|
||||
.flags = x86_processor.cache.l1d.flags,
|
||||
.processor_start = processor_index,
|
||||
.processor_count = 1,
|
||||
};
|
||||
} else {
|
||||
/* another processor sharing the same cache */
|
||||
l1d[l1d_index].processor_count += 1;
|
||||
}
|
||||
processors[i].cache.l1d = &l1d[l1d_index];
|
||||
} else {
|
||||
/* reset cache id */
|
||||
last_l1d_id = UINT32_MAX;
|
||||
}
|
||||
if (x86_processor.cache.l2.size != 0) {
|
||||
const uint32_t l2_id = apic_id & ~bit_mask(x86_processor.cache.l2.apic_bits);
|
||||
processors[i].cache.l2 = &l2[l2_index];
|
||||
if (l2_id != last_l2_id) {
|
||||
/* new cache */
|
||||
last_l2_id = l2_id;
|
||||
l2[++l2_index] = (struct cpuinfo_cache) {
|
||||
.size = x86_processor.cache.l2.size,
|
||||
.associativity = x86_processor.cache.l2.associativity,
|
||||
.sets = x86_processor.cache.l2.sets,
|
||||
.partitions = x86_processor.cache.l2.partitions,
|
||||
.line_size = x86_processor.cache.l2.line_size,
|
||||
.flags = x86_processor.cache.l2.flags,
|
||||
.processor_start = processor_index,
|
||||
.processor_count = 1,
|
||||
};
|
||||
} else {
|
||||
/* another processor sharing the same cache */
|
||||
l2[l2_index].processor_count += 1;
|
||||
}
|
||||
processors[i].cache.l2 = &l2[l2_index];
|
||||
} else {
|
||||
/* reset cache id */
|
||||
last_l2_id = UINT32_MAX;
|
||||
}
|
||||
if (x86_processor.cache.l3.size != 0) {
|
||||
const uint32_t l3_id = apic_id & ~bit_mask(x86_processor.cache.l3.apic_bits);
|
||||
processors[i].cache.l3 = &l3[l3_index];
|
||||
if (l3_id != last_l3_id) {
|
||||
/* new cache */
|
||||
last_l3_id = l3_id;
|
||||
l3[++l3_index] = (struct cpuinfo_cache) {
|
||||
.size = x86_processor.cache.l3.size,
|
||||
.associativity = x86_processor.cache.l3.associativity,
|
||||
.sets = x86_processor.cache.l3.sets,
|
||||
.partitions = x86_processor.cache.l3.partitions,
|
||||
.line_size = x86_processor.cache.l3.line_size,
|
||||
.flags = x86_processor.cache.l3.flags,
|
||||
.processor_start = processor_index,
|
||||
.processor_count = 1,
|
||||
};
|
||||
} else {
|
||||
/* another processor sharing the same cache */
|
||||
l3[l3_index].processor_count += 1;
|
||||
}
|
||||
processors[i].cache.l3 = &l3[l3_index];
|
||||
} else {
|
||||
/* reset cache id */
|
||||
last_l3_id = UINT32_MAX;
|
||||
}
|
||||
if (x86_processor.cache.l4.size != 0) {
|
||||
const uint32_t l4_id = apic_id & ~bit_mask(x86_processor.cache.l4.apic_bits);
|
||||
processors[i].cache.l4 = &l4[l4_index];
|
||||
if (l4_id != last_l4_id) {
|
||||
/* new cache */
|
||||
last_l4_id = l4_id;
|
||||
l4[++l4_index] = (struct cpuinfo_cache) {
|
||||
.size = x86_processor.cache.l4.size,
|
||||
.associativity = x86_processor.cache.l4.associativity,
|
||||
.sets = x86_processor.cache.l4.sets,
|
||||
.partitions = x86_processor.cache.l4.partitions,
|
||||
.line_size = x86_processor.cache.l4.line_size,
|
||||
.flags = x86_processor.cache.l4.flags,
|
||||
.processor_start = processor_index,
|
||||
.processor_count = 1,
|
||||
};
|
||||
} else {
|
||||
/* another processor sharing the same cache */
|
||||
l4[l4_index].processor_count += 1;
|
||||
}
|
||||
processors[i].cache.l4 = &l4[l4_index];
|
||||
} else {
|
||||
/* reset cache id */
|
||||
last_l4_id = UINT32_MAX;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Commit changes */
|
||||
cpuinfo_processors = processors;
|
||||
cpuinfo_cores = cores;
|
||||
cpuinfo_clusters = clusters;
|
||||
cpuinfo_packages = packages;
|
||||
cpuinfo_cache[cpuinfo_cache_level_1i] = l1i;
|
||||
cpuinfo_cache[cpuinfo_cache_level_1d] = l1d;
|
||||
cpuinfo_cache[cpuinfo_cache_level_2] = l2;
|
||||
cpuinfo_cache[cpuinfo_cache_level_3] = l3;
|
||||
cpuinfo_cache[cpuinfo_cache_level_4] = l4;
|
||||
|
||||
cpuinfo_processors_count = processors_count;
|
||||
cpuinfo_cores_count = cores_count;
|
||||
cpuinfo_clusters_count = clusters_count;
|
||||
cpuinfo_packages_count = packages_count;
|
||||
cpuinfo_cache_count[cpuinfo_cache_level_1i] = l1i_count;
|
||||
cpuinfo_cache_count[cpuinfo_cache_level_1d] = l1d_count;
|
||||
cpuinfo_cache_count[cpuinfo_cache_level_2] = l2_count;
|
||||
cpuinfo_cache_count[cpuinfo_cache_level_3] = l3_count;
|
||||
cpuinfo_cache_count[cpuinfo_cache_level_4] = l4_count;
|
||||
cpuinfo_max_cache_size = cpuinfo_compute_max_cache_size(&processors[0]);
|
||||
|
||||
cpuinfo_global_uarch = (struct cpuinfo_uarch_info) {
|
||||
.uarch = x86_processor.uarch,
|
||||
.cpuid = x86_processor.cpuid,
|
||||
.processor_count = processors_count,
|
||||
.core_count = cores_count,
|
||||
};
|
||||
|
||||
cpuinfo_linux_cpu_max = x86_linux_processors_count;
|
||||
cpuinfo_linux_cpu_to_processor_map = linux_cpu_to_processor_map;
|
||||
cpuinfo_linux_cpu_to_core_map = linux_cpu_to_core_map;
|
||||
|
||||
__sync_synchronize();
|
||||
|
||||
cpuinfo_is_initialized = true;
|
||||
|
||||
processors = NULL;
|
||||
cores = NULL;
|
||||
clusters = NULL;
|
||||
packages = NULL;
|
||||
l1i = l1d = l2 = l3 = l4 = NULL;
|
||||
linux_cpu_to_processor_map = NULL;
|
||||
linux_cpu_to_core_map = NULL;
|
||||
|
||||
cleanup:
|
||||
free(x86_linux_processors);
|
||||
free(processors);
|
||||
free(cores);
|
||||
free(clusters);
|
||||
free(packages);
|
||||
free(l1i);
|
||||
free(l1d);
|
||||
free(l2);
|
||||
free(l3);
|
||||
free(l4);
|
||||
free(linux_cpu_to_processor_map);
|
||||
free(linux_cpu_to_core_map);
|
||||
}
|
356
dep/cpuinfo/src/x86/mach/init.c
Normal file
356
dep/cpuinfo/src/x86/mach/init.c
Normal file
@ -0,0 +1,356 @@
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include <cpuinfo.h>
|
||||
#include <x86/api.h>
|
||||
#include <mach/api.h>
|
||||
#include <cpuinfo/internal-api.h>
|
||||
#include <cpuinfo/log.h>
|
||||
|
||||
|
||||
static inline uint32_t max(uint32_t a, uint32_t b) {
|
||||
return a > b ? a : b;
|
||||
}
|
||||
|
||||
static inline uint32_t bit_mask(uint32_t bits) {
|
||||
return (UINT32_C(1) << bits) - UINT32_C(1);
|
||||
}
|
||||
|
||||
void cpuinfo_x86_mach_init(void) {
|
||||
struct cpuinfo_processor* processors = NULL;
|
||||
struct cpuinfo_core* cores = NULL;
|
||||
struct cpuinfo_cluster* clusters = NULL;
|
||||
struct cpuinfo_package* packages = NULL;
|
||||
struct cpuinfo_cache* l1i = NULL;
|
||||
struct cpuinfo_cache* l1d = NULL;
|
||||
struct cpuinfo_cache* l2 = NULL;
|
||||
struct cpuinfo_cache* l3 = NULL;
|
||||
struct cpuinfo_cache* l4 = NULL;
|
||||
|
||||
struct cpuinfo_mach_topology mach_topology = cpuinfo_mach_detect_topology();
|
||||
processors = calloc(mach_topology.threads, sizeof(struct cpuinfo_processor));
|
||||
if (processors == NULL) {
|
||||
cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" logical processors",
|
||||
mach_topology.threads * sizeof(struct cpuinfo_processor), mach_topology.threads);
|
||||
goto cleanup;
|
||||
}
|
||||
cores = calloc(mach_topology.cores, sizeof(struct cpuinfo_core));
|
||||
if (cores == NULL) {
|
||||
cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" cores",
|
||||
mach_topology.cores * sizeof(struct cpuinfo_core), mach_topology.cores);
|
||||
goto cleanup;
|
||||
}
|
||||
/* On x86 cluster of cores is a physical package */
|
||||
clusters = calloc(mach_topology.packages, sizeof(struct cpuinfo_cluster));
|
||||
if (clusters == NULL) {
|
||||
cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" core clusters",
|
||||
mach_topology.packages * sizeof(struct cpuinfo_cluster), mach_topology.packages);
|
||||
goto cleanup;
|
||||
}
|
||||
packages = calloc(mach_topology.packages, sizeof(struct cpuinfo_package));
|
||||
if (packages == NULL) {
|
||||
cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" physical packages",
|
||||
mach_topology.packages * sizeof(struct cpuinfo_package), mach_topology.packages);
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
struct cpuinfo_x86_processor x86_processor;
|
||||
memset(&x86_processor, 0, sizeof(x86_processor));
|
||||
cpuinfo_x86_init_processor(&x86_processor);
|
||||
char brand_string[48];
|
||||
cpuinfo_x86_normalize_brand_string(x86_processor.brand_string, brand_string);
|
||||
|
||||
const uint32_t threads_per_core = mach_topology.threads / mach_topology.cores;
|
||||
const uint32_t threads_per_package = mach_topology.threads / mach_topology.packages;
|
||||
const uint32_t cores_per_package = mach_topology.cores / mach_topology.packages;
|
||||
for (uint32_t i = 0; i < mach_topology.packages; i++) {
|
||||
clusters[i] = (struct cpuinfo_cluster) {
|
||||
.processor_start = i * threads_per_package,
|
||||
.processor_count = threads_per_package,
|
||||
.core_start = i * cores_per_package,
|
||||
.core_count = cores_per_package,
|
||||
.cluster_id = 0,
|
||||
.package = packages + i,
|
||||
.vendor = x86_processor.vendor,
|
||||
.uarch = x86_processor.uarch,
|
||||
.cpuid = x86_processor.cpuid,
|
||||
};
|
||||
packages[i].processor_start = i * threads_per_package;
|
||||
packages[i].processor_count = threads_per_package;
|
||||
packages[i].core_start = i * cores_per_package;
|
||||
packages[i].core_count = cores_per_package;
|
||||
packages[i].cluster_start = i;
|
||||
packages[i].cluster_count = 1;
|
||||
cpuinfo_x86_format_package_name(x86_processor.vendor, brand_string, packages[i].name);
|
||||
}
|
||||
for (uint32_t i = 0; i < mach_topology.cores; i++) {
|
||||
cores[i] = (struct cpuinfo_core) {
|
||||
.processor_start = i * threads_per_core,
|
||||
.processor_count = threads_per_core,
|
||||
.core_id = i % cores_per_package,
|
||||
.cluster = clusters + i / cores_per_package,
|
||||
.package = packages + i / cores_per_package,
|
||||
.vendor = x86_processor.vendor,
|
||||
.uarch = x86_processor.uarch,
|
||||
.cpuid = x86_processor.cpuid,
|
||||
};
|
||||
}
|
||||
for (uint32_t i = 0; i < mach_topology.threads; i++) {
|
||||
const uint32_t smt_id = i % threads_per_core;
|
||||
const uint32_t core_id = i / threads_per_core;
|
||||
const uint32_t package_id = i / threads_per_package;
|
||||
|
||||
/* Reconstruct APIC IDs from topology components */
|
||||
const uint32_t thread_bits_mask = bit_mask(x86_processor.topology.thread_bits_length);
|
||||
const uint32_t core_bits_mask = bit_mask(x86_processor.topology.core_bits_length);
|
||||
const uint32_t package_bits_offset = max(
|
||||
x86_processor.topology.thread_bits_offset + x86_processor.topology.thread_bits_length,
|
||||
x86_processor.topology.core_bits_offset + x86_processor.topology.core_bits_length);
|
||||
const uint32_t apic_id =
|
||||
((smt_id & thread_bits_mask) << x86_processor.topology.thread_bits_offset) |
|
||||
((core_id & core_bits_mask) << x86_processor.topology.core_bits_offset) |
|
||||
(package_id << package_bits_offset);
|
||||
cpuinfo_log_debug("reconstructed APIC ID 0x%08"PRIx32" for thread %"PRIu32, apic_id, i);
|
||||
|
||||
processors[i].smt_id = smt_id;
|
||||
processors[i].core = cores + i / threads_per_core;
|
||||
processors[i].cluster = clusters + i / threads_per_package;
|
||||
processors[i].package = packages + i / threads_per_package;
|
||||
processors[i].apic_id = apic_id;
|
||||
}
|
||||
|
||||
uint32_t threads_per_l1 = 0, l1_count = 0;
|
||||
if (x86_processor.cache.l1i.size != 0 || x86_processor.cache.l1d.size != 0) {
|
||||
threads_per_l1 = mach_topology.threads_per_cache[1];
|
||||
if (threads_per_l1 == 0) {
|
||||
/* Assume that threads on the same core share L1 */
|
||||
threads_per_l1 = mach_topology.threads / mach_topology.cores;
|
||||
cpuinfo_log_warning("Mach kernel did not report number of threads sharing L1 cache; assume %"PRIu32,
|
||||
threads_per_l1);
|
||||
}
|
||||
l1_count = mach_topology.threads / threads_per_l1;
|
||||
cpuinfo_log_debug("detected %"PRIu32" L1 caches", l1_count);
|
||||
}
|
||||
|
||||
uint32_t threads_per_l2 = 0, l2_count = 0;
|
||||
if (x86_processor.cache.l2.size != 0) {
|
||||
threads_per_l2 = mach_topology.threads_per_cache[2];
|
||||
if (threads_per_l2 == 0) {
|
||||
if (x86_processor.cache.l3.size != 0) {
|
||||
/* This is not a last-level cache; assume that threads on the same core share L2 */
|
||||
threads_per_l2 = mach_topology.threads / mach_topology.cores;
|
||||
} else {
|
||||
/* This is a last-level cache; assume that threads on the same package share L2 */
|
||||
threads_per_l2 = mach_topology.threads / mach_topology.packages;
|
||||
}
|
||||
cpuinfo_log_warning("Mach kernel did not report number of threads sharing L2 cache; assume %"PRIu32,
|
||||
threads_per_l2);
|
||||
}
|
||||
l2_count = mach_topology.threads / threads_per_l2;
|
||||
cpuinfo_log_debug("detected %"PRIu32" L2 caches", l2_count);
|
||||
}
|
||||
|
||||
uint32_t threads_per_l3 = 0, l3_count = 0;
|
||||
if (x86_processor.cache.l3.size != 0) {
|
||||
threads_per_l3 = mach_topology.threads_per_cache[3];
|
||||
if (threads_per_l3 == 0) {
|
||||
/*
|
||||
* Assume that threads on the same package share L3.
|
||||
* However, is it not necessarily the last-level cache (there may be L4 cache as well)
|
||||
*/
|
||||
threads_per_l3 = mach_topology.threads / mach_topology.packages;
|
||||
cpuinfo_log_warning("Mach kernel did not report number of threads sharing L3 cache; assume %"PRIu32,
|
||||
threads_per_l3);
|
||||
}
|
||||
l3_count = mach_topology.threads / threads_per_l3;
|
||||
cpuinfo_log_debug("detected %"PRIu32" L3 caches", l3_count);
|
||||
}
|
||||
|
||||
uint32_t threads_per_l4 = 0, l4_count = 0;
|
||||
if (x86_processor.cache.l4.size != 0) {
|
||||
threads_per_l4 = mach_topology.threads_per_cache[4];
|
||||
if (threads_per_l4 == 0) {
|
||||
/*
|
||||
* Assume that all threads share this L4.
|
||||
* As of now, L4 cache exists only on notebook x86 CPUs, which are single-package,
|
||||
* but multi-socket systems could have shared L4 (like on IBM POWER8).
|
||||
*/
|
||||
threads_per_l4 = mach_topology.threads;
|
||||
cpuinfo_log_warning("Mach kernel did not report number of threads sharing L4 cache; assume %"PRIu32,
|
||||
threads_per_l4);
|
||||
}
|
||||
l4_count = mach_topology.threads / threads_per_l4;
|
||||
cpuinfo_log_debug("detected %"PRIu32" L4 caches", l4_count);
|
||||
}
|
||||
|
||||
if (x86_processor.cache.l1i.size != 0) {
|
||||
l1i = calloc(l1_count, sizeof(struct cpuinfo_cache));
|
||||
if (l1i == NULL) {
|
||||
cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" L1I caches",
|
||||
l1_count * sizeof(struct cpuinfo_cache), l1_count);
|
||||
return;
|
||||
}
|
||||
for (uint32_t c = 0; c < l1_count; c++) {
|
||||
l1i[c] = (struct cpuinfo_cache) {
|
||||
.size = x86_processor.cache.l1i.size,
|
||||
.associativity = x86_processor.cache.l1i.associativity,
|
||||
.sets = x86_processor.cache.l1i.sets,
|
||||
.partitions = x86_processor.cache.l1i.partitions,
|
||||
.line_size = x86_processor.cache.l1i.line_size,
|
||||
.flags = x86_processor.cache.l1i.flags,
|
||||
.processor_start = c * threads_per_l1,
|
||||
.processor_count = threads_per_l1,
|
||||
};
|
||||
}
|
||||
for (uint32_t t = 0; t < mach_topology.threads; t++) {
|
||||
processors[t].cache.l1i = &l1i[t / threads_per_l1];
|
||||
}
|
||||
}
|
||||
|
||||
if (x86_processor.cache.l1d.size != 0) {
|
||||
l1d = calloc(l1_count, sizeof(struct cpuinfo_cache));
|
||||
if (l1d == NULL) {
|
||||
cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" L1D caches",
|
||||
l1_count * sizeof(struct cpuinfo_cache), l1_count);
|
||||
return;
|
||||
}
|
||||
for (uint32_t c = 0; c < l1_count; c++) {
|
||||
l1d[c] = (struct cpuinfo_cache) {
|
||||
.size = x86_processor.cache.l1d.size,
|
||||
.associativity = x86_processor.cache.l1d.associativity,
|
||||
.sets = x86_processor.cache.l1d.sets,
|
||||
.partitions = x86_processor.cache.l1d.partitions,
|
||||
.line_size = x86_processor.cache.l1d.line_size,
|
||||
.flags = x86_processor.cache.l1d.flags,
|
||||
.processor_start = c * threads_per_l1,
|
||||
.processor_count = threads_per_l1,
|
||||
};
|
||||
}
|
||||
for (uint32_t t = 0; t < mach_topology.threads; t++) {
|
||||
processors[t].cache.l1d = &l1d[t / threads_per_l1];
|
||||
}
|
||||
}
|
||||
|
||||
if (l2_count != 0) {
|
||||
l2 = calloc(l2_count, sizeof(struct cpuinfo_cache));
|
||||
if (l2 == NULL) {
|
||||
cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" L2 caches",
|
||||
l2_count * sizeof(struct cpuinfo_cache), l2_count);
|
||||
return;
|
||||
}
|
||||
for (uint32_t c = 0; c < l2_count; c++) {
|
||||
l2[c] = (struct cpuinfo_cache) {
|
||||
.size = x86_processor.cache.l2.size,
|
||||
.associativity = x86_processor.cache.l2.associativity,
|
||||
.sets = x86_processor.cache.l2.sets,
|
||||
.partitions = x86_processor.cache.l2.partitions,
|
||||
.line_size = x86_processor.cache.l2.line_size,
|
||||
.flags = x86_processor.cache.l2.flags,
|
||||
.processor_start = c * threads_per_l2,
|
||||
.processor_count = threads_per_l2,
|
||||
};
|
||||
}
|
||||
for (uint32_t t = 0; t < mach_topology.threads; t++) {
|
||||
processors[t].cache.l2 = &l2[t / threads_per_l2];
|
||||
}
|
||||
}
|
||||
|
||||
if (l3_count != 0) {
|
||||
l3 = calloc(l3_count, sizeof(struct cpuinfo_cache));
|
||||
if (l3 == NULL) {
|
||||
cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" L3 caches",
|
||||
l3_count * sizeof(struct cpuinfo_cache), l3_count);
|
||||
return;
|
||||
}
|
||||
for (uint32_t c = 0; c < l3_count; c++) {
|
||||
l3[c] = (struct cpuinfo_cache) {
|
||||
.size = x86_processor.cache.l3.size,
|
||||
.associativity = x86_processor.cache.l3.associativity,
|
||||
.sets = x86_processor.cache.l3.sets,
|
||||
.partitions = x86_processor.cache.l3.partitions,
|
||||
.line_size = x86_processor.cache.l3.line_size,
|
||||
.flags = x86_processor.cache.l3.flags,
|
||||
.processor_start = c * threads_per_l3,
|
||||
.processor_count = threads_per_l3,
|
||||
};
|
||||
}
|
||||
for (uint32_t t = 0; t < mach_topology.threads; t++) {
|
||||
processors[t].cache.l3 = &l3[t / threads_per_l3];
|
||||
}
|
||||
}
|
||||
|
||||
if (l4_count != 0) {
|
||||
l4 = calloc(l4_count, sizeof(struct cpuinfo_cache));
|
||||
if (l4 == NULL) {
|
||||
cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" L4 caches",
|
||||
l4_count * sizeof(struct cpuinfo_cache), l4_count);
|
||||
return;
|
||||
}
|
||||
for (uint32_t c = 0; c < l4_count; c++) {
|
||||
l4[c] = (struct cpuinfo_cache) {
|
||||
.size = x86_processor.cache.l4.size,
|
||||
.associativity = x86_processor.cache.l4.associativity,
|
||||
.sets = x86_processor.cache.l4.sets,
|
||||
.partitions = x86_processor.cache.l4.partitions,
|
||||
.line_size = x86_processor.cache.l4.line_size,
|
||||
.flags = x86_processor.cache.l4.flags,
|
||||
.processor_start = c * threads_per_l4,
|
||||
.processor_count = threads_per_l4,
|
||||
};
|
||||
}
|
||||
for (uint32_t t = 0; t < mach_topology.threads; t++) {
|
||||
processors[t].cache.l4 = &l4[t / threads_per_l4];
|
||||
}
|
||||
}
|
||||
|
||||
/* Commit changes */
|
||||
cpuinfo_processors = processors;
|
||||
cpuinfo_cores = cores;
|
||||
cpuinfo_clusters = clusters;
|
||||
cpuinfo_packages = packages;
|
||||
cpuinfo_cache[cpuinfo_cache_level_1i] = l1i;
|
||||
cpuinfo_cache[cpuinfo_cache_level_1d] = l1d;
|
||||
cpuinfo_cache[cpuinfo_cache_level_2] = l2;
|
||||
cpuinfo_cache[cpuinfo_cache_level_3] = l3;
|
||||
cpuinfo_cache[cpuinfo_cache_level_4] = l4;
|
||||
|
||||
cpuinfo_processors_count = mach_topology.threads;
|
||||
cpuinfo_cores_count = mach_topology.cores;
|
||||
cpuinfo_clusters_count = mach_topology.packages;
|
||||
cpuinfo_packages_count = mach_topology.packages;
|
||||
cpuinfo_cache_count[cpuinfo_cache_level_1i] = l1_count;
|
||||
cpuinfo_cache_count[cpuinfo_cache_level_1d] = l1_count;
|
||||
cpuinfo_cache_count[cpuinfo_cache_level_2] = l2_count;
|
||||
cpuinfo_cache_count[cpuinfo_cache_level_3] = l3_count;
|
||||
cpuinfo_cache_count[cpuinfo_cache_level_4] = l4_count;
|
||||
cpuinfo_max_cache_size = cpuinfo_compute_max_cache_size(&processors[0]);
|
||||
|
||||
cpuinfo_global_uarch = (struct cpuinfo_uarch_info) {
|
||||
.uarch = x86_processor.uarch,
|
||||
.cpuid = x86_processor.cpuid,
|
||||
.processor_count = mach_topology.threads,
|
||||
.core_count = mach_topology.cores,
|
||||
};
|
||||
|
||||
__sync_synchronize();
|
||||
|
||||
cpuinfo_is_initialized = true;
|
||||
|
||||
processors = NULL;
|
||||
cores = NULL;
|
||||
clusters = NULL;
|
||||
packages = NULL;
|
||||
l1i = l1d = l2 = l3 = l4 = NULL;
|
||||
|
||||
cleanup:
|
||||
free(processors);
|
||||
free(cores);
|
||||
free(clusters);
|
||||
free(packages);
|
||||
free(l1i);
|
||||
free(l1d);
|
||||
free(l2);
|
||||
free(l3);
|
||||
free(l4);
|
||||
}
|
70
dep/cpuinfo/src/x86/mockcpuid.c
Normal file
70
dep/cpuinfo/src/x86/mockcpuid.c
Normal file
@ -0,0 +1,70 @@
|
||||
#include <stdint.h>
|
||||
#include <stddef.h>
|
||||
|
||||
#if !CPUINFO_MOCK
|
||||
#error This file should be built only in mock mode
|
||||
#endif
|
||||
|
||||
#include <cpuinfo-mock.h>
|
||||
|
||||
|
||||
static struct cpuinfo_mock_cpuid* cpuinfo_mock_cpuid_data = NULL;
|
||||
static uint32_t cpuinfo_mock_cpuid_entries = 0;
|
||||
static uint32_t cpuinfo_mock_cpuid_leaf4_iteration = 0;
|
||||
|
||||
void CPUINFO_ABI cpuinfo_mock_set_cpuid(struct cpuinfo_mock_cpuid* dump, size_t entries) {
|
||||
cpuinfo_mock_cpuid_data = dump;
|
||||
cpuinfo_mock_cpuid_entries = entries;
|
||||
};
|
||||
|
||||
void CPUINFO_ABI cpuinfo_mock_get_cpuid(uint32_t eax, uint32_t regs[4]) {
|
||||
if (eax != 4) {
|
||||
cpuinfo_mock_cpuid_leaf4_iteration = 0;
|
||||
}
|
||||
if (cpuinfo_mock_cpuid_data != NULL && cpuinfo_mock_cpuid_entries != 0) {
|
||||
if (eax == 4) {
|
||||
uint32_t skip_entries = cpuinfo_mock_cpuid_leaf4_iteration;
|
||||
for (uint32_t i = 0; i < cpuinfo_mock_cpuid_entries; i++) {
|
||||
if (eax == cpuinfo_mock_cpuid_data[i].input_eax) {
|
||||
if (skip_entries-- == 0) {
|
||||
regs[0] = cpuinfo_mock_cpuid_data[i].eax;
|
||||
regs[1] = cpuinfo_mock_cpuid_data[i].ebx;
|
||||
regs[2] = cpuinfo_mock_cpuid_data[i].ecx;
|
||||
regs[3] = cpuinfo_mock_cpuid_data[i].edx;
|
||||
cpuinfo_mock_cpuid_leaf4_iteration++;
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for (uint32_t i = 0; i < cpuinfo_mock_cpuid_entries; i++) {
|
||||
if (eax == cpuinfo_mock_cpuid_data[i].input_eax) {
|
||||
regs[0] = cpuinfo_mock_cpuid_data[i].eax;
|
||||
regs[1] = cpuinfo_mock_cpuid_data[i].ebx;
|
||||
regs[2] = cpuinfo_mock_cpuid_data[i].ecx;
|
||||
regs[3] = cpuinfo_mock_cpuid_data[i].edx;
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
regs[0] = regs[1] = regs[2] = regs[3] = 0;
|
||||
}
|
||||
|
||||
void CPUINFO_ABI cpuinfo_mock_get_cpuidex(uint32_t eax, uint32_t ecx, uint32_t regs[4]) {
|
||||
cpuinfo_mock_cpuid_leaf4_iteration = 0;
|
||||
if (cpuinfo_mock_cpuid_data != NULL && cpuinfo_mock_cpuid_entries != 0) {
|
||||
for (uint32_t i = 0; i < cpuinfo_mock_cpuid_entries; i++) {
|
||||
if (eax == cpuinfo_mock_cpuid_data[i].input_eax &&
|
||||
ecx == cpuinfo_mock_cpuid_data[i].input_ecx)
|
||||
{
|
||||
regs[0] = cpuinfo_mock_cpuid_data[i].eax;
|
||||
regs[1] = cpuinfo_mock_cpuid_data[i].ebx;
|
||||
regs[2] = cpuinfo_mock_cpuid_data[i].ecx;
|
||||
regs[3] = cpuinfo_mock_cpuid_data[i].edx;
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
regs[0] = regs[1] = regs[2] = regs[3] = 0;
|
||||
}
|
708
dep/cpuinfo/src/x86/name.c
Normal file
708
dep/cpuinfo/src/x86/name.c
Normal file
@ -0,0 +1,708 @@
|
||||
#include <stdbool.h>
|
||||
#include <stdint.h>
|
||||
#include <stddef.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
#include <cpuinfo.h>
|
||||
#include <cpuinfo/common.h>
|
||||
#include <x86/api.h>
|
||||
|
||||
|
||||
/* The state of the parser to be preserved between parsing different tokens. */
|
||||
struct parser_state {
|
||||
/*
|
||||
* Pointer to the start of the previous token if it is "model".
|
||||
* NULL if previous token is not "model".
|
||||
*/
|
||||
char* context_model;
|
||||
/*
|
||||
* Pointer to the start of the previous token if it is a single-uppercase-letter token.
|
||||
* NULL if previous token is anything different.
|
||||
*/
|
||||
char* context_upper_letter;
|
||||
/*
|
||||
* Pointer to the start of the previous token if it is "Dual".
|
||||
* NULL if previous token is not "Dual".
|
||||
*/
|
||||
char* context_dual;
|
||||
/*
|
||||
* Pointer to the start of the previous token if it is "Core", "Dual-Core", "QuadCore", etc.
|
||||
* NULL if previous token is anything different.
|
||||
*/
|
||||
char* context_core;
|
||||
/*
|
||||
* Pointer to the start of the previous token if it is "Eng" or "Engineering", etc.
|
||||
* NULL if previous token is anything different.
|
||||
*/
|
||||
char* context_engineering;
|
||||
/*
|
||||
* Pointer to the '@' symbol in the brand string (separates frequency specification).
|
||||
* NULL if there is no '@' symbol.
|
||||
*/
|
||||
char* frequency_separator;
|
||||
/* Indicates whether the brand string (after transformations) contains frequency. */
|
||||
bool frequency_token;
|
||||
/* Indicates whether the processor is of Xeon family (contains "Xeon" substring). */
|
||||
bool xeon;
|
||||
/* Indicates whether the processor model number was already parsed. */
|
||||
bool parsed_model_number;
|
||||
/* Indicates whether the processor is an engineering sample (contains "Engineering Sample" or "Eng Sample" substrings). */
|
||||
bool engineering_sample;
|
||||
};
|
||||
|
||||
/** @brief Resets information about the previous token. Keeps all other state information. */
|
||||
static void reset_context(struct parser_state* state) {
|
||||
state->context_model = NULL;
|
||||
state->context_upper_letter = NULL;
|
||||
state->context_dual = NULL;
|
||||
state->context_core = NULL;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Overwrites the supplied string with space characters if it exactly matches the given string.
|
||||
* @param string The string to be compared against other string, and erased in case of matching.
|
||||
* @param length The length of the two string to be compared against each other.
|
||||
* @param target The string to compare against.
|
||||
* @retval true If the two strings match and the first supplied string was erased (overwritten with space characters).
|
||||
* @retval false If the two strings are different and the first supplied string remained unchanged.
|
||||
*/
|
||||
static inline bool erase_matching(char* string, size_t length, const char* target) {
|
||||
const bool match = memcmp(string, target, length) == 0;
|
||||
if (match) {
|
||||
memset(string, ' ', length);
|
||||
}
|
||||
return match;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Checks if the supplied ASCII character is an uppercase latin letter.
|
||||
* @param character The character to analyse.
|
||||
* @retval true If the supplied character is an uppercase latin letter ('A' to 'Z').
|
||||
* @retval false If the supplied character is anything different.
|
||||
*/
|
||||
static inline bool is_upper_letter(char character) {
|
||||
return (uint32_t) (character - 'A') <= (uint32_t)('Z' - 'A');
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Checks if the supplied ASCII character is a digit.
|
||||
* @param character The character to analyse.
|
||||
* @retval true If the supplied character is a digit ('0' to '9').
|
||||
* @retval false If the supplied character is anything different.
|
||||
*/
|
||||
static inline bool is_digit(char character) {
|
||||
return (uint32_t) (character - '0') < UINT32_C(10);
|
||||
}
|
||||
|
||||
static inline bool is_zero_number(const char* token_start, const char* token_end) {
|
||||
for (const char* char_ptr = token_start; char_ptr != token_end; char_ptr++) {
|
||||
if (*char_ptr != '0') {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline bool is_space(const char* token_start, const char* token_end) {
|
||||
for (const char* char_ptr = token_start; char_ptr != token_end; char_ptr++) {
|
||||
if (*char_ptr != ' ') {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline bool is_number(const char* token_start, const char* token_end) {
|
||||
for (const char* char_ptr = token_start; char_ptr != token_end; char_ptr++) {
|
||||
if (!is_digit(*char_ptr)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline bool is_model_number(const char* token_start, const char* token_end) {
|
||||
for (const char* char_ptr = token_start + 1; char_ptr < token_end; char_ptr++) {
|
||||
if (is_digit(char_ptr[-1]) && is_digit(char_ptr[0])) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline bool is_frequency(const char* token_start, const char* token_end) {
|
||||
const size_t token_length = (size_t) (token_end - token_start);
|
||||
if (token_length > 3 && token_end[-2] == 'H' && token_end[-1] == 'z') {
|
||||
switch (token_end[-3]) {
|
||||
case 'K':
|
||||
case 'M':
|
||||
case 'G':
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* @warning Input and output tokens can overlap
|
||||
*/
|
||||
static inline char* move_token(const char* token_start, const char* token_end, char* output_ptr) {
|
||||
const size_t token_length = (size_t) (token_end - token_start);
|
||||
memmove(output_ptr, token_start, token_length);
|
||||
return output_ptr + token_length;
|
||||
}
|
||||
|
||||
static bool transform_token(char* token_start, char* token_end, struct parser_state* state) {
|
||||
const struct parser_state previousState = *state;
|
||||
reset_context(state);
|
||||
|
||||
size_t token_length = (size_t) (token_end - token_start);
|
||||
|
||||
if (state->frequency_separator != NULL) {
|
||||
if (token_start > state->frequency_separator) {
|
||||
if (state->parsed_model_number) {
|
||||
memset(token_start, ' ', token_length);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* Early AMD and Cyrix processors have "tm" suffix for trademark, e.g.
|
||||
* "AMD-K6tm w/ multimedia extensions"
|
||||
* "Cyrix MediaGXtm MMXtm Enhanced"
|
||||
*/
|
||||
if (token_length > 2) {
|
||||
const char context_char = token_end[-3];
|
||||
if (is_digit(context_char) || is_upper_letter(context_char)) {
|
||||
if (erase_matching(token_end - 2, 2, "tm")) {
|
||||
token_end -= 2;
|
||||
token_length -= 2;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (token_length > 4) {
|
||||
/* Some early AMD CPUs have "AMD-" at the beginning, e.g.
|
||||
* "AMD-K5(tm) Processor"
|
||||
* "AMD-K6tm w/ multimedia extensions"
|
||||
* "AMD-K6(tm) 3D+ Processor"
|
||||
* "AMD-K6(tm)-III Processor"
|
||||
*/
|
||||
if (erase_matching(token_start, 4, "AMD-")) {
|
||||
token_start += 4;
|
||||
token_length -= 4;
|
||||
}
|
||||
}
|
||||
switch (token_length) {
|
||||
case 1:
|
||||
/*
|
||||
* On some Intel processors there is a space between the first letter of
|
||||
* the name and the number after it, e.g.
|
||||
* "Intel(R) Core(TM) i7 CPU X 990 @ 3.47GHz"
|
||||
* "Intel(R) Core(TM) CPU Q 820 @ 1.73GHz"
|
||||
* We want to merge these parts together, in reverse order, i.e. "X 990" -> "990X", "820" -> "820Q"
|
||||
*/
|
||||
if (is_upper_letter(token_start[0])) {
|
||||
state->context_upper_letter = token_start;
|
||||
return true;
|
||||
}
|
||||
break;
|
||||
case 2:
|
||||
/* Erase everything after "w/" in "AMD-K6tm w/ multimedia extensions" */
|
||||
if (erase_matching(token_start, token_length, "w/")) {
|
||||
return false;
|
||||
}
|
||||
/*
|
||||
* Intel Xeon processors since Ivy Bridge use versions, e.g.
|
||||
* "Intel Xeon E3-1230 v2"
|
||||
* Some processor branch strings report them as "V<N>", others report as "v<N>".
|
||||
* Normalize the former (upper-case) to the latter (lower-case) version
|
||||
*/
|
||||
if (token_start[0] == 'V' && is_digit(token_start[1])) {
|
||||
token_start[0] = 'v';
|
||||
return true;
|
||||
}
|
||||
break;
|
||||
case 3:
|
||||
/*
|
||||
* Erase "CPU" in brand string on Intel processors, e.g.
|
||||
* "Intel(R) Core(TM) i5 CPU 650 @ 3.20GHz"
|
||||
* "Intel(R) Xeon(R) CPU X3210 @ 2.13GHz"
|
||||
* "Intel(R) Atom(TM) CPU Z2760 @ 1.80GHz"
|
||||
*/
|
||||
if (erase_matching(token_start, token_length, "CPU")) {
|
||||
return true;
|
||||
}
|
||||
/*
|
||||
* Erase everywhing after "SOC" on AMD System-on-Chips, e.g.
|
||||
* "AMD GX-212JC SOC with Radeon(TM) R2E Graphics \0"
|
||||
*/
|
||||
if (erase_matching(token_start, token_length, "SOC")) {
|
||||
return false;
|
||||
}
|
||||
/*
|
||||
* Erase "AMD" in brand string on AMD processors, e.g.
|
||||
* "AMD Athlon(tm) Processor"
|
||||
* "AMD Engineering Sample"
|
||||
* "Quad-Core AMD Opteron(tm) Processor 2344 HE"
|
||||
*/
|
||||
if (erase_matching(token_start, token_length, "AMD")) {
|
||||
return true;
|
||||
}
|
||||
/*
|
||||
* Erase "VIA" in brand string on VIA processors, e.g.
|
||||
* "VIA C3 Ezra"
|
||||
* "VIA C7-M Processor 1200MHz"
|
||||
* "VIA Nano L3050@1800MHz"
|
||||
*/
|
||||
if (erase_matching(token_start, token_length, "VIA")) {
|
||||
return true;
|
||||
}
|
||||
/* Erase "IDT" in brand string on early Centaur processors, e.g. "IDT WinChip 2-3D" */
|
||||
if (erase_matching(token_start, token_length, "IDT")) {
|
||||
return true;
|
||||
}
|
||||
/*
|
||||
* Erase everything starting with "MMX" in
|
||||
* "Cyrix MediaGXtm MMXtm Enhanced" ("tm" suffix is removed by this point)
|
||||
*/
|
||||
if (erase_matching(token_start, token_length, "MMX")) {
|
||||
return false;
|
||||
}
|
||||
/*
|
||||
* Erase everything starting with "APU" on AMD processors, e.g.
|
||||
* "AMD A10-4600M APU with Radeon(tm) HD Graphics"
|
||||
* "AMD A10-7850K APU with Radeon(TM) R7 Graphics"
|
||||
* "AMD A6-6310 APU with AMD Radeon R4 Graphics"
|
||||
*/
|
||||
if (erase_matching(token_start, token_length, "APU")) {
|
||||
return false;
|
||||
}
|
||||
/*
|
||||
* Remember to discard string if it contains "Eng Sample",
|
||||
* e.g. "Eng Sample, ZD302046W4K43_36/30/20_2/8_A"
|
||||
*/
|
||||
if (memcmp(token_start, "Eng", token_length) == 0) {
|
||||
state->context_engineering = token_start;
|
||||
}
|
||||
break;
|
||||
case 4:
|
||||
/* Remember to erase "Dual Core" in "AMD Athlon(tm) 64 X2 Dual Core Processor 3800+" */
|
||||
if (memcmp(token_start, "Dual", token_length) == 0) {
|
||||
state->context_dual = token_start;
|
||||
}
|
||||
/* Remember if the processor is on Xeon family */
|
||||
if (memcmp(token_start, "Xeon", token_length) == 0) {
|
||||
state->xeon = true;
|
||||
}
|
||||
/* Erase "Dual Core" in "AMD Athlon(tm) 64 X2 Dual Core Processor 3800+" */
|
||||
if (previousState.context_dual != NULL) {
|
||||
if (memcmp(token_start, "Core", token_length) == 0) {
|
||||
memset(previousState.context_dual, ' ', (size_t) (token_end - previousState.context_dual));
|
||||
state->context_core = token_end;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
break;
|
||||
case 5:
|
||||
/*
|
||||
* Erase "Intel" in brand string on Intel processors, e.g.
|
||||
* "Intel(R) Xeon(R) CPU X3210 @ 2.13GHz"
|
||||
* "Intel(R) Atom(TM) CPU D2700 @ 2.13GHz"
|
||||
* "Genuine Intel(R) processor 800MHz"
|
||||
*/
|
||||
if (erase_matching(token_start, token_length, "Intel")) {
|
||||
return true;
|
||||
}
|
||||
/*
|
||||
* Erase "Cyrix" in brand string on Cyrix processors, e.g.
|
||||
* "Cyrix MediaGXtm MMXtm Enhanced"
|
||||
*/
|
||||
if (erase_matching(token_start, token_length, "Cyrix")) {
|
||||
return true;
|
||||
}
|
||||
/*
|
||||
* Erase everything following "Geode" (but not "Geode" token itself) on Geode processors, e.g.
|
||||
* "Geode(TM) Integrated Processor by AMD PCS"
|
||||
* "Geode(TM) Integrated Processor by National Semi"
|
||||
*/
|
||||
if (memcmp(token_start, "Geode", token_length) == 0) {
|
||||
return false;
|
||||
}
|
||||
/* Remember to erase "model unknown" in "AMD Processor model unknown" */
|
||||
if (memcmp(token_start, "model", token_length) == 0) {
|
||||
state->context_model = token_start;
|
||||
return true;
|
||||
}
|
||||
break;
|
||||
case 6:
|
||||
/*
|
||||
* Erase everything starting with "Radeon" or "RADEON" on AMD APUs, e.g.
|
||||
* "A8-7670K Radeon R7, 10 Compute Cores 4C+6G"
|
||||
* "FX-8800P Radeon R7, 12 Compute Cores 4C+8G"
|
||||
* "A12-9800 RADEON R7, 12 COMPUTE CORES 4C+8G"
|
||||
* "A9-9410 RADEON R5, 5 COMPUTE CORES 2C+3G"
|
||||
*/
|
||||
if (erase_matching(token_start, token_length, "Radeon") || erase_matching(token_start, token_length, "RADEON")) {
|
||||
return false;
|
||||
}
|
||||
/*
|
||||
* Erase "Mobile" when it is not part of the processor name,
|
||||
* e.g. in "AMD Turion(tm) X2 Ultra Dual-Core Mobile ZM-82"
|
||||
*/
|
||||
if (previousState.context_core != NULL) {
|
||||
if (erase_matching(token_start, token_length, "Mobile")) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
/* Erase "family" in "Intel(R) Pentium(R) III CPU family 1266MHz" */
|
||||
if (erase_matching(token_start, token_length, "family")) {
|
||||
return true;
|
||||
}
|
||||
/* Discard the string if it contains "Engineering Sample" */
|
||||
if (previousState.context_engineering != NULL) {
|
||||
if (memcmp(token_start, "Sample", token_length) == 0) {
|
||||
state->engineering_sample = true;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
break;
|
||||
case 7:
|
||||
/*
|
||||
* Erase "Geniune" in brand string on Intel engineering samples, e.g.
|
||||
* "Genuine Intel(R) processor 800MHz"
|
||||
* "Genuine Intel(R) CPU @ 2.13GHz"
|
||||
* "Genuine Intel(R) CPU 0000 @ 1.73GHz"
|
||||
*/
|
||||
if (erase_matching(token_start, token_length, "Genuine")) {
|
||||
return true;
|
||||
}
|
||||
/*
|
||||
* Erase "12-core" in brand string on AMD Threadripper, e.g.
|
||||
* "AMD Ryzen Threadripper 1920X 12-Core Processor"
|
||||
*/
|
||||
if (erase_matching(token_start, token_length, "12-Core")) {
|
||||
return true;
|
||||
}
|
||||
/*
|
||||
* Erase "16-core" in brand string on AMD Threadripper, e.g.
|
||||
* "AMD Ryzen Threadripper 1950X 16-Core Processor"
|
||||
*/
|
||||
if (erase_matching(token_start, token_length, "16-Core")) {
|
||||
return true;
|
||||
}
|
||||
/* Erase "model unknown" in "AMD Processor model unknown" */
|
||||
if (previousState.context_model != NULL) {
|
||||
if (memcmp(token_start, "unknown", token_length) == 0) {
|
||||
memset(previousState.context_model, ' ', token_end - previousState.context_model);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
/*
|
||||
* Discard the string if it contains "Eng Sample:" or "Eng Sample," e.g.
|
||||
* "AMD Eng Sample, ZD302046W4K43_36/30/20_2/8_A"
|
||||
* "AMD Eng Sample: 2D3151A2M88E4_35/31_N"
|
||||
*/
|
||||
if (previousState.context_engineering != NULL) {
|
||||
if (memcmp(token_start, "Sample,", token_length) == 0 || memcmp(token_start, "Sample:", token_length) == 0) {
|
||||
state->engineering_sample = true;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
break;
|
||||
case 8:
|
||||
/* Erase "QuadCore" in "VIA QuadCore L4700 @ 1.2+ GHz" */
|
||||
if (erase_matching(token_start, token_length, "QuadCore")) {
|
||||
state->context_core = token_end;
|
||||
return true;
|
||||
}
|
||||
/* Erase "Six-Core" in "AMD FX(tm)-6100 Six-Core Processor" */
|
||||
if (erase_matching(token_start, token_length, "Six-Core")) {
|
||||
state->context_core = token_end;
|
||||
return true;
|
||||
}
|
||||
break;
|
||||
case 9:
|
||||
if (erase_matching(token_start, token_length, "Processor")) {
|
||||
return true;
|
||||
}
|
||||
if (erase_matching(token_start, token_length, "processor")) {
|
||||
return true;
|
||||
}
|
||||
/* Erase "Dual-Core" in "Pentium(R) Dual-Core CPU T4200 @ 2.00GHz" */
|
||||
if (erase_matching(token_start, token_length, "Dual-Core")) {
|
||||
state->context_core = token_end;
|
||||
return true;
|
||||
}
|
||||
/* Erase "Quad-Core" in AMD processors, e.g.
|
||||
* "Quad-Core AMD Opteron(tm) Processor 2347 HE"
|
||||
* "AMD FX(tm)-4170 Quad-Core Processor"
|
||||
*/
|
||||
if (erase_matching(token_start, token_length, "Quad-Core")) {
|
||||
state->context_core = token_end;
|
||||
return true;
|
||||
}
|
||||
/* Erase "Transmeta" in brand string on Transmeta processors, e.g.
|
||||
* "Transmeta(tm) Crusoe(tm) Processor TM5800"
|
||||
* "Transmeta Efficeon(tm) Processor TM8000"
|
||||
*/
|
||||
if (erase_matching(token_start, token_length, "Transmeta")) {
|
||||
return true;
|
||||
}
|
||||
break;
|
||||
case 10:
|
||||
/*
|
||||
* Erase "Eight-Core" in AMD processors, e.g.
|
||||
* "AMD FX(tm)-8150 Eight-Core Processor"
|
||||
*/
|
||||
if (erase_matching(token_start, token_length, "Eight-Core")) {
|
||||
state->context_core = token_end;
|
||||
return true;
|
||||
}
|
||||
break;
|
||||
case 11:
|
||||
/*
|
||||
* Erase "Triple-Core" in AMD processors, e.g.
|
||||
* "AMD Phenom(tm) II N830 Triple-Core Processor"
|
||||
* "AMD Phenom(tm) 8650 Triple-Core Processor"
|
||||
*/
|
||||
if (erase_matching(token_start, token_length, "Triple-Core")) {
|
||||
state->context_core = token_end;
|
||||
return true;
|
||||
}
|
||||
/*
|
||||
* Remember to discard string if it contains "Engineering Sample",
|
||||
* e.g. "AMD Engineering Sample"
|
||||
*/
|
||||
if (memcmp(token_start, "Engineering", token_length) == 0) {
|
||||
state->context_engineering = token_start;
|
||||
return true;
|
||||
}
|
||||
break;
|
||||
}
|
||||
if (is_zero_number(token_start, token_end)) {
|
||||
memset(token_start, ' ', token_length);
|
||||
return true;
|
||||
}
|
||||
/* On some Intel processors the last letter of the name is put before the number,
|
||||
* and an additional space it added, e.g.
|
||||
* "Intel(R) Core(TM) i7 CPU X 990 @ 3.47GHz"
|
||||
* "Intel(R) Core(TM) CPU Q 820 @ 1.73GHz"
|
||||
* "Intel(R) Core(TM) i5 CPU M 480 @ 2.67GHz"
|
||||
* We fix this issue, i.e. "X 990" -> "990X", "Q 820" -> "820Q"
|
||||
*/
|
||||
if (previousState.context_upper_letter != 0) {
|
||||
/* A single letter token followed by 2-to-5 digit letter is merged together */
|
||||
switch (token_length) {
|
||||
case 2:
|
||||
case 3:
|
||||
case 4:
|
||||
case 5:
|
||||
if (is_number(token_start, token_end)) {
|
||||
/* Load the previous single-letter token */
|
||||
const char letter = *previousState.context_upper_letter;
|
||||
/* Erase the previous single-letter token */
|
||||
*previousState.context_upper_letter = ' ';
|
||||
/* Move the current token one position to the left */
|
||||
move_token(token_start, token_end, token_start - 1);
|
||||
token_start -= 1;
|
||||
/*
|
||||
* Add the letter on the end
|
||||
* Note: accessing token_start[-1] is safe because this is not the first token
|
||||
*/
|
||||
token_end[-1] = letter;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (state->frequency_separator != NULL) {
|
||||
if (is_model_number(token_start, token_end)) {
|
||||
state->parsed_model_number = true;
|
||||
}
|
||||
}
|
||||
if (is_frequency(token_start, token_end)) {
|
||||
state->frequency_token = true;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
uint32_t cpuinfo_x86_normalize_brand_string(
|
||||
const char raw_name[48],
|
||||
char normalized_name[48])
|
||||
{
|
||||
normalized_name[0] = '\0';
|
||||
char name[48];
|
||||
memcpy(name, raw_name, sizeof(name));
|
||||
|
||||
/*
|
||||
* First find the end of the string
|
||||
* Start search from the end because some brand strings contain zeroes in the middle
|
||||
*/
|
||||
char* name_end = &name[48];
|
||||
while (name_end[-1] == '\0') {
|
||||
/*
|
||||
* Adject name_end by 1 position and check that we didn't reach the start of the brand string.
|
||||
* This is possible if all characters are zero.
|
||||
*/
|
||||
if (--name_end == name) {
|
||||
/* All characters are zeros */
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
struct parser_state parser_state = { 0 };
|
||||
|
||||
/* Now unify all whitespace characters: replace tabs and '\0' with spaces */
|
||||
{
|
||||
bool inside_parentheses = false;
|
||||
for (char* char_ptr = name; char_ptr != name_end; char_ptr++) {
|
||||
switch (*char_ptr) {
|
||||
case '(':
|
||||
inside_parentheses = true;
|
||||
*char_ptr = ' ';
|
||||
break;
|
||||
case ')':
|
||||
inside_parentheses = false;
|
||||
*char_ptr = ' ';
|
||||
break;
|
||||
case '@':
|
||||
parser_state.frequency_separator = char_ptr;
|
||||
case '\0':
|
||||
case '\t':
|
||||
*char_ptr = ' ';
|
||||
break;
|
||||
default:
|
||||
if (inside_parentheses) {
|
||||
*char_ptr = ' ';
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Iterate through all tokens and erase redundant parts */
|
||||
{
|
||||
bool is_token = false;
|
||||
char* token_start = name;
|
||||
for (char* char_ptr = name; char_ptr != name_end; char_ptr++) {
|
||||
if (*char_ptr == ' ') {
|
||||
if (is_token) {
|
||||
is_token = false;
|
||||
if (!transform_token(token_start, char_ptr, &parser_state)) {
|
||||
name_end = char_ptr;
|
||||
break;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if (!is_token) {
|
||||
is_token = true;
|
||||
token_start = char_ptr;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (is_token) {
|
||||
transform_token(token_start, name_end, &parser_state);
|
||||
}
|
||||
}
|
||||
|
||||
/* If this is an engineering sample, return empty string */
|
||||
if (parser_state.engineering_sample) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Check if there is some string before the frequency separator. */
|
||||
if (parser_state.frequency_separator != NULL) {
|
||||
if (is_space(name, parser_state.frequency_separator)) {
|
||||
/* If only frequency is available, return empty string */
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
/* Compact tokens: collapse multiple spacing into one */
|
||||
{
|
||||
char* output_ptr = normalized_name;
|
||||
char* token_start = name;
|
||||
bool is_token = false;
|
||||
bool previous_token_ends_with_dash = true;
|
||||
bool current_token_starts_with_dash = false;
|
||||
uint32_t token_count = 1;
|
||||
for (char* char_ptr = name; char_ptr != name_end; char_ptr++) {
|
||||
const char character = *char_ptr;
|
||||
if (character == ' ') {
|
||||
if (is_token) {
|
||||
is_token = false;
|
||||
if (!current_token_starts_with_dash && !previous_token_ends_with_dash) {
|
||||
token_count += 1;
|
||||
*output_ptr++ = ' ';
|
||||
}
|
||||
output_ptr = move_token(token_start, char_ptr, output_ptr);
|
||||
/* Note: char_ptr[-1] exists because there is a token before this space */
|
||||
previous_token_ends_with_dash = (char_ptr[-1] == '-');
|
||||
}
|
||||
} else {
|
||||
if (!is_token) {
|
||||
is_token = true;
|
||||
token_start = char_ptr;
|
||||
current_token_starts_with_dash = (character == '-');
|
||||
}
|
||||
}
|
||||
}
|
||||
if (is_token) {
|
||||
if (!current_token_starts_with_dash && !previous_token_ends_with_dash) {
|
||||
token_count += 1;
|
||||
*output_ptr++ = ' ';
|
||||
}
|
||||
output_ptr = move_token(token_start, name_end, output_ptr);
|
||||
}
|
||||
if (parser_state.frequency_token && token_count <= 1) {
|
||||
/* The only remaining part is frequency */
|
||||
normalized_name[0] = '\0';
|
||||
return 0;
|
||||
}
|
||||
if (output_ptr < &normalized_name[48]) {
|
||||
*output_ptr = '\0';
|
||||
} else {
|
||||
normalized_name[47] = '\0';
|
||||
}
|
||||
return (uint32_t) (output_ptr - normalized_name);
|
||||
}
|
||||
}
|
||||
|
||||
static const char* vendor_string_map[] = {
|
||||
[cpuinfo_vendor_intel] = "Intel",
|
||||
[cpuinfo_vendor_amd] = "AMD",
|
||||
[cpuinfo_vendor_via] = "VIA",
|
||||
[cpuinfo_vendor_hygon] = "Hygon",
|
||||
[cpuinfo_vendor_rdc] = "RDC",
|
||||
[cpuinfo_vendor_dmp] = "DM&P",
|
||||
[cpuinfo_vendor_transmeta] = "Transmeta",
|
||||
[cpuinfo_vendor_cyrix] = "Cyrix",
|
||||
[cpuinfo_vendor_rise] = "Rise",
|
||||
[cpuinfo_vendor_nsc] = "NSC",
|
||||
[cpuinfo_vendor_sis] = "SiS",
|
||||
[cpuinfo_vendor_nexgen] = "NexGen",
|
||||
[cpuinfo_vendor_umc] = "UMC",
|
||||
};
|
||||
|
||||
uint32_t cpuinfo_x86_format_package_name(
|
||||
enum cpuinfo_vendor vendor,
|
||||
const char normalized_brand_string[48],
|
||||
char package_name[CPUINFO_PACKAGE_NAME_MAX])
|
||||
{
|
||||
if (normalized_brand_string[0] == '\0') {
|
||||
package_name[0] = '\0';
|
||||
return 0;
|
||||
}
|
||||
|
||||
const char* vendor_string = NULL;
|
||||
if ((uint32_t) vendor < (uint32_t) CPUINFO_COUNT_OF(vendor_string_map)) {
|
||||
vendor_string = vendor_string_map[(uint32_t) vendor];
|
||||
}
|
||||
if (vendor_string == NULL) {
|
||||
strncpy(package_name, normalized_brand_string, CPUINFO_PACKAGE_NAME_MAX);
|
||||
package_name[CPUINFO_PACKAGE_NAME_MAX - 1] = '\0';
|
||||
return 0;
|
||||
} else {
|
||||
snprintf(package_name, CPUINFO_PACKAGE_NAME_MAX,
|
||||
"%s %s", vendor_string, normalized_brand_string);
|
||||
return (uint32_t) strlen(vendor_string) + 1;
|
||||
}
|
||||
}
|
127
dep/cpuinfo/src/x86/topology.c
Normal file
127
dep/cpuinfo/src/x86/topology.c
Normal file
@ -0,0 +1,127 @@
|
||||
#include <stdint.h>
|
||||
#include <stdbool.h>
|
||||
|
||||
#include <cpuinfo.h>
|
||||
#include <cpuinfo/utils.h>
|
||||
#include <cpuinfo/log.h>
|
||||
#include <x86/api.h>
|
||||
#include <x86/cpuid.h>
|
||||
|
||||
|
||||
enum topology_type {
|
||||
topology_type_invalid = 0,
|
||||
topology_type_smt = 1,
|
||||
topology_type_core = 2,
|
||||
};
|
||||
|
||||
void cpuinfo_x86_detect_topology(
|
||||
uint32_t max_base_index,
|
||||
uint32_t max_extended_index,
|
||||
struct cpuid_regs leaf1,
|
||||
struct cpuinfo_x86_topology* topology)
|
||||
{
|
||||
/*
|
||||
* HTT: indicates multi-core/hyper-threading support on this core.
|
||||
* - Intel, AMD: edx[bit 28] in basic info.
|
||||
*/
|
||||
const bool htt = !!(leaf1.edx & UINT32_C(0x10000000));
|
||||
|
||||
uint32_t apic_id = 0;
|
||||
if (htt) {
|
||||
apic_id = leaf1.ebx >> 24;
|
||||
bool amd_cmp_legacy = false;
|
||||
if (max_extended_index >= UINT32_C(0x80000001)) {
|
||||
const struct cpuid_regs leaf0x80000001 = cpuid(UINT32_C(0x80000001));
|
||||
/*
|
||||
* CmpLegacy: core multi-processing legacy mode.
|
||||
* - AMD: ecx[bit 1] in extended info (reserved bit on Intel CPUs).
|
||||
*/
|
||||
amd_cmp_legacy = !!(leaf0x80000001.ecx & UINT32_C(0x00000002));
|
||||
}
|
||||
if (amd_cmp_legacy) {
|
||||
if (max_extended_index >= UINT32_C(0x80000008)) {
|
||||
const struct cpuid_regs leaf0x80000008 = cpuid(UINT32_C(0x80000008));
|
||||
/*
|
||||
* NC: number of physical cores - 1. The number of cores in the processor is NC+1.
|
||||
* - AMD: ecx[bits 0-7] in leaf 0x80000008 (reserved zero bits on Intel CPUs).
|
||||
*/
|
||||
const uint32_t cores_per_processor = 1 + (leaf0x80000008.ecx & UINT32_C(0x000000FF));
|
||||
topology->core_bits_length = bit_length(cores_per_processor);
|
||||
cpuinfo_log_debug("HTT: APIC ID = %08"PRIx32", cores per processor = %"PRIu32, apic_id, cores_per_processor);
|
||||
} else {
|
||||
/*
|
||||
* LogicalProcessorCount: the number of cores per processor.
|
||||
* - AMD: ebx[bits 16-23] in basic info (different interpretation on Intel CPUs).
|
||||
*/
|
||||
const uint32_t cores_per_processor = (leaf1.ebx >> 16) & UINT32_C(0x000000FF);
|
||||
if (cores_per_processor != 0) {
|
||||
topology->core_bits_length = bit_length(cores_per_processor);
|
||||
}
|
||||
cpuinfo_log_debug("HTT: APIC ID = %08"PRIx32", cores per processor = %"PRIu32, apic_id, cores_per_processor);
|
||||
}
|
||||
} else {
|
||||
/*
|
||||
* Maximum number of addressable IDs for logical processors in this physical package.
|
||||
* - Intel: ebx[bits 16-23] in basic info (different interpretation on AMD CPUs).
|
||||
*/
|
||||
const uint32_t logical_processors = (leaf1.ebx >> 16) & UINT32_C(0x000000FF);
|
||||
if (logical_processors != 0) {
|
||||
const uint32_t log2_max_logical_processors = bit_length(logical_processors);
|
||||
const uint32_t log2_max_threads_per_core = log2_max_logical_processors - topology->core_bits_length;
|
||||
topology->core_bits_offset = log2_max_threads_per_core;
|
||||
topology->thread_bits_length = log2_max_threads_per_core;
|
||||
}
|
||||
cpuinfo_log_debug("HTT: APIC ID = %08"PRIx32", logical processors = %"PRIu32, apic_id, logical_processors);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* x2APIC: indicated support for x2APIC feature.
|
||||
* - Intel: ecx[bit 21] in basic info (reserved bit on AMD CPUs).
|
||||
*/
|
||||
const bool x2apic = !!(leaf1.ecx & UINT32_C(0x00200000));
|
||||
if (x2apic && (max_base_index >= UINT32_C(0xB))) {
|
||||
uint32_t level = 0;
|
||||
uint32_t type;
|
||||
uint32_t total_shift = 0;
|
||||
topology->thread_bits_offset = topology->thread_bits_length = 0;
|
||||
topology->core_bits_offset = topology->core_bits_length = 0;
|
||||
do {
|
||||
const struct cpuid_regs leafB = cpuidex(UINT32_C(0xB), level);
|
||||
type = (leafB.ecx >> 8) & UINT32_C(0x000000FF);
|
||||
const uint32_t level_shift = leafB.eax & UINT32_C(0x0000001F);
|
||||
const uint32_t x2apic_id = leafB.edx;
|
||||
apic_id = x2apic_id;
|
||||
switch (type) {
|
||||
case topology_type_invalid:
|
||||
break;
|
||||
case topology_type_smt:
|
||||
cpuinfo_log_debug("x2 level %"PRIu32": APIC ID = %08"PRIx32", "
|
||||
"type SMT, shift %"PRIu32", total shift %"PRIu32,
|
||||
level, apic_id, level_shift, total_shift);
|
||||
topology->thread_bits_offset = total_shift;
|
||||
topology->thread_bits_length = level_shift;
|
||||
break;
|
||||
case topology_type_core:
|
||||
cpuinfo_log_debug("x2 level %"PRIu32": APIC ID = %08"PRIx32", "
|
||||
"type core, shift %"PRIu32", total shift %"PRIu32,
|
||||
level, apic_id, level_shift, total_shift);
|
||||
topology->core_bits_offset = total_shift;
|
||||
topology->core_bits_length = level_shift;
|
||||
break;
|
||||
default:
|
||||
cpuinfo_log_warning("unexpected topology type %"PRIu32" (offset %"PRIu32", length %"PRIu32") "
|
||||
"reported in leaf 0x0000000B is ignored", type, total_shift, level_shift);
|
||||
break;
|
||||
}
|
||||
total_shift += level_shift;
|
||||
level += 1;
|
||||
} while (type != 0);
|
||||
cpuinfo_log_debug("x2APIC ID 0x%08"PRIx32", "
|
||||
"SMT offset %"PRIu32" length %"PRIu32", core offset %"PRIu32" length %"PRIu32, apic_id,
|
||||
topology->thread_bits_offset, topology->thread_bits_length,
|
||||
topology->core_bits_offset, topology->core_bits_length);
|
||||
}
|
||||
|
||||
topology->apic_id = apic_id;
|
||||
}
|
241
dep/cpuinfo/src/x86/uarch.c
Normal file
241
dep/cpuinfo/src/x86/uarch.c
Normal file
@ -0,0 +1,241 @@
|
||||
#include <stdint.h>
|
||||
|
||||
#include <cpuinfo.h>
|
||||
#include <x86/api.h>
|
||||
|
||||
|
||||
enum cpuinfo_uarch cpuinfo_x86_decode_uarch(
|
||||
enum cpuinfo_vendor vendor,
|
||||
const struct cpuinfo_x86_model_info* model_info)
|
||||
{
|
||||
switch (vendor) {
|
||||
case cpuinfo_vendor_intel:
|
||||
switch (model_info->family) {
|
||||
#if CPUINFO_ARCH_X86
|
||||
case 0x05:
|
||||
switch (model_info->model) {
|
||||
case 0x01: // Pentium (60, 66)
|
||||
case 0x02: // Pentium (75, 90, 100, 120, 133, 150, 166, 200)
|
||||
case 0x03: // Pentium OverDrive for Intel486-based systems
|
||||
case 0x04: // Pentium MMX
|
||||
return cpuinfo_uarch_p5;
|
||||
case 0x09:
|
||||
return cpuinfo_uarch_quark;
|
||||
}
|
||||
break;
|
||||
#endif /* CPUINFO_ARCH_X86 */
|
||||
case 0x06:
|
||||
switch (model_info->model) {
|
||||
/* Mainstream cores */
|
||||
#if CPUINFO_ARCH_X86
|
||||
case 0x01: // Pentium Pro
|
||||
case 0x03: // Pentium II (Klamath) and Pentium II Overdrive
|
||||
case 0x05: // Pentium II (Deschutes, Tonga), Pentium II Celeron (Covington), Pentium II Xeon (Drake)
|
||||
case 0x06: // Pentium II (Dixon), Pentium II Celeron (Mendocino)
|
||||
case 0x07: // Pentium III (Katmai), Pentium III Xeon (Tanner)
|
||||
case 0x08: // Pentium III (Coppermine), Pentium II Celeron (Coppermine-128), Pentium III Xeon (Cascades)
|
||||
case 0x0A: // Pentium III Xeon (Cascades-2MB)
|
||||
case 0x0B: // Pentium III (Tualatin), Pentium III Celeron (Tualatin-256)
|
||||
return cpuinfo_uarch_p6;
|
||||
case 0x09: // Pentium M (Banias), Pentium M Celeron (Banias-0, Banias-512)
|
||||
case 0x0D: // Pentium M (Dothan), Pentium M Celeron (Dothan-512, Dothan-1024)
|
||||
case 0x15: // Intel 80579 (Tolapai)
|
||||
return cpuinfo_uarch_dothan;
|
||||
case 0x0E: // Core Solo/Duo (Yonah), Pentium Dual-Core T2xxx (Yonah), Celeron M (Yonah-512, Yonah-1024), Dual-Core Xeon (Sossaman)
|
||||
return cpuinfo_uarch_yonah;
|
||||
#endif /* CPUINFO_ARCH_X86 */
|
||||
case 0x0F: // Core 2 Duo (Conroe, Conroe-2M, Merom), Core 2 Quad (Tigerton), Xeon (Woodcrest, Clovertown, Kentsfield)
|
||||
case 0x16: // Celeron (Conroe-L, Merom-L), Core 2 Duo (Merom)
|
||||
return cpuinfo_uarch_conroe;
|
||||
case 0x17: // Core 2 Duo (Penryn-3M), Core 2 Quad (Yorkfield), Core 2 Extreme (Yorkfield), Xeon (Harpertown), Pentium Dual-Core (Penryn)
|
||||
case 0x1D: // Xeon (Dunnington)
|
||||
return cpuinfo_uarch_penryn;
|
||||
case 0x1A: // Core iX (Bloomfield), Xeon (Gainestown)
|
||||
case 0x1E: // Core iX (Lynnfield, Clarksfield)
|
||||
case 0x1F: // Core iX (Havendale)
|
||||
case 0x2E: // Xeon (Beckton)
|
||||
case 0x25: // Core iX (Clarkdale)
|
||||
case 0x2C: // Core iX (Gulftown), Xeon (Gulftown)
|
||||
case 0x2F: // Xeon (Eagleton)
|
||||
return cpuinfo_uarch_nehalem;
|
||||
case 0x2A: // Core iX (Sandy Bridge)
|
||||
case 0x2D: // Core iX (Sandy Bridge-E), Xeon (Sandy Bridge EP/EX)
|
||||
return cpuinfo_uarch_sandy_bridge;
|
||||
case 0x3A: // Core iX (Ivy Bridge)
|
||||
case 0x3E: // Ivy Bridge-E
|
||||
return cpuinfo_uarch_ivy_bridge;
|
||||
case 0x3C:
|
||||
case 0x3F: // Haswell-E
|
||||
case 0x45: // Haswell ULT
|
||||
case 0x46: // Haswell with eDRAM
|
||||
return cpuinfo_uarch_haswell;
|
||||
case 0x3D: // Broadwell-U
|
||||
case 0x47: // Broadwell-H
|
||||
case 0x4F: // Broadwell-E
|
||||
case 0x56: // Broadwell-DE
|
||||
return cpuinfo_uarch_broadwell;
|
||||
case 0x4E: // Sky Lake Client Y/U
|
||||
case 0x55: // Sky/Cascade/Cooper Lake Server
|
||||
case 0x5E: // Sky Lake Client DT/H/S
|
||||
case 0x8E: // Kaby/Whiskey/Amber/Comet Lake Y/U
|
||||
case 0x9E: // Kaby/Coffee Lake DT/H/S
|
||||
case 0xA5: // Comet Lake H/S
|
||||
case 0xA6: // Comet Lake U/Y
|
||||
return cpuinfo_uarch_sky_lake;
|
||||
case 0x66: // Cannon Lake (Core i3-8121U)
|
||||
return cpuinfo_uarch_palm_cove;
|
||||
case 0x6A: // Ice Lake-DE
|
||||
case 0x6C: // Ice Lake-SP
|
||||
case 0x7D: // Ice Lake-Y
|
||||
case 0x7E: // Ice Lake-U
|
||||
return cpuinfo_uarch_sunny_cove;
|
||||
|
||||
/* Low-power cores */
|
||||
case 0x1C: // Diamondville, Silverthorne, Pineview
|
||||
case 0x26: // Tunnel Creek
|
||||
return cpuinfo_uarch_bonnell;
|
||||
case 0x27: // Medfield
|
||||
case 0x35: // Cloverview
|
||||
case 0x36: // Cedarview, Centerton
|
||||
return cpuinfo_uarch_saltwell;
|
||||
case 0x37: // Bay Trail
|
||||
case 0x4A: // Merrifield
|
||||
case 0x4D: // Avoton, Rangeley
|
||||
case 0x5A: // Moorefield
|
||||
case 0x5D: // SoFIA
|
||||
return cpuinfo_uarch_silvermont;
|
||||
case 0x4C: // Braswell, Cherry Trail
|
||||
case 0x75: // Spreadtrum SC9853I-IA
|
||||
return cpuinfo_uarch_airmont;
|
||||
case 0x5C: // Apollo Lake
|
||||
case 0x5F: // Denverton
|
||||
return cpuinfo_uarch_goldmont;
|
||||
case 0x7A: // Gemini Lake
|
||||
return cpuinfo_uarch_goldmont_plus;
|
||||
|
||||
/* Knights-series cores */
|
||||
case 0x57:
|
||||
return cpuinfo_uarch_knights_landing;
|
||||
case 0x85:
|
||||
return cpuinfo_uarch_knights_mill;
|
||||
}
|
||||
break;
|
||||
case 0x0F:
|
||||
switch (model_info->model) {
|
||||
case 0x00: // Pentium 4 Xeon (Foster)
|
||||
case 0x01: // Pentium 4 Celeron (Willamette-128), Pentium 4 Xeon (Foster, Foster MP)
|
||||
case 0x02: // Pentium 4 (Northwood), Pentium 4 EE (Gallatin), Pentium 4 Celeron (Northwood-128, Northwood-256), Pentium 4 Xeon (Gallatin DP, Prestonia)
|
||||
return cpuinfo_uarch_willamette;
|
||||
break;
|
||||
case 0x03: // Pentium 4 (Prescott), Pentium 4 Xeon (Nocona)
|
||||
case 0x04: // Pentium 4 (Prescott-2M), Pentium 4 EE (Prescott-2M), Pentium D (Smithfield), Celeron D (Prescott-256), Pentium 4 Xeon (Cranford, Irwindale, Paxville)
|
||||
case 0x06: // Pentium 4 (Cedar Mill), Pentium D EE (Presler), Celeron D (Cedar Mill), Pentium 4 Xeon (Dempsey, Tulsa)
|
||||
return cpuinfo_uarch_prescott;
|
||||
}
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case cpuinfo_vendor_amd:
|
||||
switch (model_info->family) {
|
||||
#if CPUINFO_ARCH_X86
|
||||
case 0x5:
|
||||
switch (model_info->model) {
|
||||
case 0x00:
|
||||
case 0x01:
|
||||
case 0x02:
|
||||
return cpuinfo_uarch_k5;
|
||||
case 0x06:
|
||||
case 0x07:
|
||||
case 0x08:
|
||||
case 0x0D:
|
||||
return cpuinfo_uarch_k6;
|
||||
case 0x0A:
|
||||
return cpuinfo_uarch_geode;
|
||||
}
|
||||
break;
|
||||
case 0x6:
|
||||
return cpuinfo_uarch_k7;
|
||||
#endif /* CPUINFO_ARCH_X86 */
|
||||
case 0xF: // Opteron, Athlon 64, Sempron
|
||||
case 0x11: // Turion
|
||||
return cpuinfo_uarch_k8;
|
||||
case 0x10: // Opteron, Phenom, Athlon, Sempron
|
||||
case 0x12: // Llano APU
|
||||
return cpuinfo_uarch_k10;
|
||||
case 0x14:
|
||||
return cpuinfo_uarch_bobcat;
|
||||
case 0x15:
|
||||
switch (model_info->model) {
|
||||
case 0x00: // Engineering samples
|
||||
case 0x01: // Zambezi, Interlagos
|
||||
return cpuinfo_uarch_bulldozer;
|
||||
case 0x02: // Vishera
|
||||
case 0x10: // Trinity
|
||||
case 0x13: // Richland
|
||||
return cpuinfo_uarch_piledriver;
|
||||
case 0x38: // Godavari
|
||||
case 0x30: // Kaveri
|
||||
return cpuinfo_uarch_steamroller;
|
||||
case 0x60: // Carrizo
|
||||
case 0x65: // Bristol Ridge
|
||||
case 0x70: // Stoney Ridge
|
||||
return cpuinfo_uarch_excavator;
|
||||
default:
|
||||
switch (model_info->extended_model) {
|
||||
case 0x0:
|
||||
return cpuinfo_uarch_bulldozer;
|
||||
case 0x1: // No L3 cache
|
||||
case 0x2: // With L3 cache
|
||||
return cpuinfo_uarch_piledriver;
|
||||
case 0x3: // With L3 cache
|
||||
case 0x4: // No L3 cache
|
||||
return cpuinfo_uarch_steamroller;
|
||||
}
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case 0x16:
|
||||
if (model_info->model >= 0x03) {
|
||||
return cpuinfo_uarch_puma;
|
||||
} else {
|
||||
return cpuinfo_uarch_jaguar;
|
||||
}
|
||||
case 0x17:
|
||||
switch (model_info->model) {
|
||||
case 0x01: // 14 nm Naples, Whitehaven, Summit Ridge, Snowy Owl
|
||||
case 0x08: // 12 nm Pinnacle Ridge
|
||||
case 0x11: // 14 nm Raven Ridge, Great Horned Owl
|
||||
case 0x18: // 12 nm Picasso
|
||||
return cpuinfo_uarch_zen;
|
||||
case 0x31: // Rome, Castle Peak
|
||||
case 0x60: // Renoir
|
||||
case 0x68: // Lucienne
|
||||
case 0x71: // Matisse
|
||||
case 0x90: // Van Gogh
|
||||
case 0x98: // Mero
|
||||
return cpuinfo_uarch_zen2;
|
||||
}
|
||||
break;
|
||||
case 0x19:
|
||||
switch (model_info->model) {
|
||||
case 0x01: // Genesis
|
||||
case 0x21: // Vermeer
|
||||
case 0x30: // Badami, Trento
|
||||
case 0x40: // Rembrandt
|
||||
case 0x50: // Cezanne
|
||||
return cpuinfo_uarch_zen3;
|
||||
}
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case cpuinfo_vendor_hygon:
|
||||
switch (model_info->family) {
|
||||
case 0x00:
|
||||
return cpuinfo_uarch_dhyana;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
return cpuinfo_uarch_unknown;
|
||||
}
|
189
dep/cpuinfo/src/x86/vendor.c
Normal file
189
dep/cpuinfo/src/x86/vendor.c
Normal file
@ -0,0 +1,189 @@
|
||||
#include <stdint.h>
|
||||
|
||||
#include <cpuinfo.h>
|
||||
#include <x86/api.h>
|
||||
|
||||
|
||||
/* Intel vendor string: "GenuineIntel" */
|
||||
#define Genu UINT32_C(0x756E6547)
|
||||
#define ineI UINT32_C(0x49656E69)
|
||||
#define ntel UINT32_C(0x6C65746E)
|
||||
|
||||
/* AMD vendor strings: "AuthenticAMD", "AMDisbetter!", "AMD ISBETTER" */
|
||||
#define Auth UINT32_C(0x68747541)
|
||||
#define enti UINT32_C(0x69746E65)
|
||||
#define cAMD UINT32_C(0x444D4163)
|
||||
#define AMDi UINT32_C(0x69444D41)
|
||||
#define sbet UINT32_C(0x74656273)
|
||||
#define ter UINT32_C(0x21726574)
|
||||
#define AMD UINT32_C(0x20444D41)
|
||||
#define ISBE UINT32_C(0x45425349)
|
||||
#define TTER UINT32_C(0x52455454)
|
||||
|
||||
/* VIA (Centaur) vendor strings: "CentaurHauls", "VIA VIA VIA " */
|
||||
#define Cent UINT32_C(0x746E6543)
|
||||
#define aurH UINT32_C(0x48727561)
|
||||
#define auls UINT32_C(0x736C7561)
|
||||
#define VIA UINT32_C(0x20414956)
|
||||
|
||||
/* Hygon vendor string: "HygonGenuine" */
|
||||
#define Hygo UINT32_C(0x6F677948)
|
||||
#define nGen UINT32_C(0x6E65476E)
|
||||
#define uine UINT32_C(0x656E6975)
|
||||
|
||||
/* Transmeta vendor strings: "GenuineTMx86", "TransmetaCPU" */
|
||||
#define ineT UINT32_C(0x54656E69)
|
||||
#define Mx86 UINT32_C(0x3638784D)
|
||||
#define Tran UINT32_C(0x6E617254)
|
||||
#define smet UINT32_C(0x74656D73)
|
||||
#define aCPU UINT32_C(0x55504361)
|
||||
|
||||
/* Cyrix vendor string: "CyrixInstead" */
|
||||
#define Cyri UINT32_C(0x69727943)
|
||||
#define xIns UINT32_C(0x736E4978)
|
||||
#define tead UINT32_C(0x64616574)
|
||||
|
||||
/* Rise vendor string: "RiseRiseRise" */
|
||||
#define Rise UINT32_C(0x65736952)
|
||||
|
||||
/* NSC vendor string: "Geode by NSC" */
|
||||
#define Geod UINT32_C(0x646F6547)
|
||||
#define e_by UINT32_C(0x79622065)
|
||||
#define NSC UINT32_C(0x43534E20)
|
||||
|
||||
/* SiS vendor string: "SiS SiS SiS " */
|
||||
#define SiS UINT32_C(0x20536953)
|
||||
|
||||
/* NexGen vendor string: "NexGenDriven" */
|
||||
#define NexG UINT32_C(0x4778654E)
|
||||
#define enDr UINT32_C(0x72446E65)
|
||||
#define iven UINT32_C(0x6E657669)
|
||||
|
||||
/* UMC vendor string: "UMC UMC UMC " */
|
||||
#define UMC UINT32_C(0x20434D55)
|
||||
|
||||
/* RDC vendor string: "Genuine RDC" */
|
||||
#define ine UINT32_C(0x20656E69)
|
||||
#define RDC UINT32_C(0x43445220)
|
||||
|
||||
/* D&MP vendor string: "Vortex86 SoC" */
|
||||
#define Vort UINT32_C(0x74726F56)
|
||||
#define ex86 UINT32_C(0x36387865)
|
||||
#define SoC UINT32_C(0x436F5320)
|
||||
|
||||
|
||||
enum cpuinfo_vendor cpuinfo_x86_decode_vendor(uint32_t ebx, uint32_t ecx, uint32_t edx) {
|
||||
switch (ebx) {
|
||||
case Genu:
|
||||
switch (edx) {
|
||||
case ineI:
|
||||
if (ecx == ntel) {
|
||||
/* "GenuineIntel" */
|
||||
return cpuinfo_vendor_intel;
|
||||
}
|
||||
break;
|
||||
#if CPUINFO_ARCH_X86
|
||||
case ineT:
|
||||
if (ecx == Mx86) {
|
||||
/* "GenuineTMx86" */
|
||||
return cpuinfo_vendor_transmeta;
|
||||
}
|
||||
break;
|
||||
case ine:
|
||||
if (ecx == RDC) {
|
||||
/* "Genuine RDC" */
|
||||
return cpuinfo_vendor_rdc;
|
||||
}
|
||||
break;
|
||||
#endif
|
||||
}
|
||||
break;
|
||||
case Auth:
|
||||
if (edx == enti && ecx == cAMD) {
|
||||
/* "AuthenticAMD" */
|
||||
return cpuinfo_vendor_amd;
|
||||
}
|
||||
break;
|
||||
case Cent:
|
||||
if (edx == aurH && ecx == auls) {
|
||||
/* "CentaurHauls" */
|
||||
return cpuinfo_vendor_via;
|
||||
}
|
||||
break;
|
||||
case Hygo:
|
||||
if (edx == nGen && ecx == uine) {
|
||||
/* "HygonGenuine" */
|
||||
return cpuinfo_vendor_hygon;
|
||||
}
|
||||
break;
|
||||
#if CPUINFO_ARCH_X86
|
||||
case AMDi:
|
||||
if (edx == sbet && ecx == ter) {
|
||||
/* "AMDisbetter!" */
|
||||
return cpuinfo_vendor_amd;
|
||||
}
|
||||
break;
|
||||
case AMD:
|
||||
if (edx == ISBE && ecx == TTER) {
|
||||
/* "AMD ISBETTER" */
|
||||
return cpuinfo_vendor_amd;
|
||||
}
|
||||
break;
|
||||
case VIA:
|
||||
if (edx == VIA && ecx == VIA) {
|
||||
/* "VIA VIA VIA " */
|
||||
return cpuinfo_vendor_via;
|
||||
}
|
||||
break;
|
||||
case Tran:
|
||||
if (edx == smet && ecx == aCPU) {
|
||||
/* "TransmetaCPU" */
|
||||
return cpuinfo_vendor_transmeta;
|
||||
}
|
||||
break;
|
||||
case Cyri:
|
||||
if (edx == xIns && ecx == tead) {
|
||||
/* "CyrixInstead" */
|
||||
return cpuinfo_vendor_cyrix;
|
||||
}
|
||||
break;
|
||||
case Rise:
|
||||
if (edx == Rise && ecx == Rise) {
|
||||
/* "RiseRiseRise" */
|
||||
return cpuinfo_vendor_rise;
|
||||
}
|
||||
break;
|
||||
case Geod:
|
||||
if (edx == e_by && ecx == NSC) {
|
||||
/* "Geode by NSC" */
|
||||
return cpuinfo_vendor_nsc;
|
||||
}
|
||||
break;
|
||||
case SiS:
|
||||
if (edx == SiS && ecx == SiS) {
|
||||
/* "SiS SiS SiS " */
|
||||
return cpuinfo_vendor_sis;
|
||||
}
|
||||
break;
|
||||
case NexG:
|
||||
if (edx == enDr && ecx == iven) {
|
||||
/* "NexGenDriven" */
|
||||
return cpuinfo_vendor_nexgen;
|
||||
}
|
||||
break;
|
||||
case UMC:
|
||||
if (edx == UMC && ecx == UMC) {
|
||||
/* "UMC UMC UMC " */
|
||||
return cpuinfo_vendor_umc;
|
||||
}
|
||||
break;
|
||||
case Vort:
|
||||
if (edx == ex86 && ecx == SoC) {
|
||||
/* "Vortex86 SoC" */
|
||||
return cpuinfo_vendor_dmp;
|
||||
}
|
||||
break;
|
||||
#endif
|
||||
}
|
||||
return cpuinfo_vendor_unknown;
|
||||
}
|
41
dep/cpuinfo/src/x86/windows/api.h
Normal file
41
dep/cpuinfo/src/x86/windows/api.h
Normal file
@ -0,0 +1,41 @@
|
||||
#pragma once
|
||||
|
||||
#include <stdbool.h>
|
||||
#include <stdint.h>
|
||||
#include <windows.h>
|
||||
|
||||
#include <cpuinfo.h>
|
||||
#include <x86/api.h>
|
||||
|
||||
struct cpuinfo_arm_linux_processor {
|
||||
/**
|
||||
* Minimum processor ID on the package which includes this logical processor.
|
||||
* This value can serve as an ID for the cluster of logical processors: it is the
|
||||
* same for all logical processors on the same package.
|
||||
*/
|
||||
uint32_t package_leader_id;
|
||||
/**
|
||||
* Minimum processor ID on the core which includes this logical processor.
|
||||
* This value can serve as an ID for the cluster of logical processors: it is the
|
||||
* same for all logical processors on the same package.
|
||||
*/
|
||||
/**
|
||||
* Number of logical processors in the package.
|
||||
*/
|
||||
uint32_t package_processor_count;
|
||||
/**
|
||||
* Maximum frequency, in kHZ.
|
||||
* The value is parsed from /sys/devices/system/cpu/cpu<N>/cpufreq/cpuinfo_max_freq
|
||||
* If failed to read or parse the file, the value is 0.
|
||||
*/
|
||||
uint32_t max_frequency;
|
||||
/**
|
||||
* Minimum frequency, in kHZ.
|
||||
* The value is parsed from /sys/devices/system/cpu/cpu<N>/cpufreq/cpuinfo_min_freq
|
||||
* If failed to read or parse the file, the value is 0.
|
||||
*/
|
||||
uint32_t min_frequency;
|
||||
/** Linux processor ID */
|
||||
uint32_t system_processor_id;
|
||||
uint32_t flags;
|
||||
};
|
634
dep/cpuinfo/src/x86/windows/init.c
Normal file
634
dep/cpuinfo/src/x86/windows/init.c
Normal file
@ -0,0 +1,634 @@
|
||||
#include <stdint.h>
|
||||
#include <stddef.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include <cpuinfo.h>
|
||||
#include <x86/api.h>
|
||||
#include <cpuinfo/internal-api.h>
|
||||
#include <cpuinfo/log.h>
|
||||
|
||||
#include <windows.h>
|
||||
|
||||
#ifdef __GNUC__
|
||||
#define CPUINFO_ALLOCA __builtin_alloca
|
||||
#else
|
||||
#define CPUINFO_ALLOCA _alloca
|
||||
#endif
|
||||
|
||||
|
||||
static inline uint32_t bit_mask(uint32_t bits) {
|
||||
return (UINT32_C(1) << bits) - UINT32_C(1);
|
||||
}
|
||||
|
||||
static inline uint32_t low_index_from_kaffinity(KAFFINITY kaffinity) {
|
||||
#if defined(_M_X64) || defined(_M_AMD64)
|
||||
unsigned long index;
|
||||
_BitScanForward64(&index, (unsigned __int64) kaffinity);
|
||||
return (uint32_t) index;
|
||||
#elif defined(_M_IX86)
|
||||
unsigned long index;
|
||||
_BitScanForward(&index, (unsigned long) kaffinity);
|
||||
return (uint32_t) index;
|
||||
#else
|
||||
#error Platform-specific implementation required
|
||||
#endif
|
||||
}
|
||||
|
||||
static void cpuinfo_x86_count_caches(
|
||||
uint32_t processors_count,
|
||||
const struct cpuinfo_processor* processors,
|
||||
const struct cpuinfo_x86_processor* x86_processor,
|
||||
uint32_t* l1i_count_ptr,
|
||||
uint32_t* l1d_count_ptr,
|
||||
uint32_t* l2_count_ptr,
|
||||
uint32_t* l3_count_ptr,
|
||||
uint32_t* l4_count_ptr)
|
||||
{
|
||||
uint32_t l1i_count = 0, l1d_count = 0, l2_count = 0, l3_count = 0, l4_count = 0;
|
||||
uint32_t last_l1i_id = UINT32_MAX, last_l1d_id = UINT32_MAX;
|
||||
uint32_t last_l2_id = UINT32_MAX, last_l3_id = UINT32_MAX, last_l4_id = UINT32_MAX;
|
||||
for (uint32_t i = 0; i < processors_count; i++) {
|
||||
const uint32_t apic_id = processors[i].apic_id;
|
||||
cpuinfo_log_debug("APID ID %"PRIu32": logical processor %"PRIu32, apic_id, i);
|
||||
|
||||
if (x86_processor->cache.l1i.size != 0) {
|
||||
const uint32_t l1i_id = apic_id & ~bit_mask(x86_processor->cache.l1i.apic_bits);
|
||||
if (l1i_id != last_l1i_id) {
|
||||
last_l1i_id = l1i_id;
|
||||
l1i_count++;
|
||||
}
|
||||
}
|
||||
if (x86_processor->cache.l1d.size != 0) {
|
||||
const uint32_t l1d_id = apic_id & ~bit_mask(x86_processor->cache.l1d.apic_bits);
|
||||
if (l1d_id != last_l1d_id) {
|
||||
last_l1d_id = l1d_id;
|
||||
l1d_count++;
|
||||
}
|
||||
}
|
||||
if (x86_processor->cache.l2.size != 0) {
|
||||
const uint32_t l2_id = apic_id & ~bit_mask(x86_processor->cache.l2.apic_bits);
|
||||
if (l2_id != last_l2_id) {
|
||||
last_l2_id = l2_id;
|
||||
l2_count++;
|
||||
}
|
||||
}
|
||||
if (x86_processor->cache.l3.size != 0) {
|
||||
const uint32_t l3_id = apic_id & ~bit_mask(x86_processor->cache.l3.apic_bits);
|
||||
if (l3_id != last_l3_id) {
|
||||
last_l3_id = l3_id;
|
||||
l3_count++;
|
||||
}
|
||||
}
|
||||
if (x86_processor->cache.l4.size != 0) {
|
||||
const uint32_t l4_id = apic_id & ~bit_mask(x86_processor->cache.l4.apic_bits);
|
||||
if (l4_id != last_l4_id) {
|
||||
last_l4_id = l4_id;
|
||||
l4_count++;
|
||||
}
|
||||
}
|
||||
}
|
||||
*l1i_count_ptr = l1i_count;
|
||||
*l1d_count_ptr = l1d_count;
|
||||
*l2_count_ptr = l2_count;
|
||||
*l3_count_ptr = l3_count;
|
||||
*l4_count_ptr = l4_count;
|
||||
}
|
||||
|
||||
static bool cpuinfo_x86_windows_is_wine(void) {
|
||||
HMODULE ntdll = GetModuleHandleW(L"ntdll.dll");
|
||||
if (ntdll == NULL) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return GetProcAddress(ntdll, "wine_get_version") != NULL;
|
||||
}
|
||||
|
||||
BOOL CALLBACK cpuinfo_x86_windows_init(PINIT_ONCE init_once, PVOID parameter, PVOID* context) {
|
||||
struct cpuinfo_processor* processors = NULL;
|
||||
struct cpuinfo_core* cores = NULL;
|
||||
struct cpuinfo_cluster* clusters = NULL;
|
||||
struct cpuinfo_package* packages = NULL;
|
||||
struct cpuinfo_cache* l1i = NULL;
|
||||
struct cpuinfo_cache* l1d = NULL;
|
||||
struct cpuinfo_cache* l2 = NULL;
|
||||
struct cpuinfo_cache* l3 = NULL;
|
||||
struct cpuinfo_cache* l4 = NULL;
|
||||
PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX processor_infos = NULL;
|
||||
|
||||
HANDLE heap = GetProcessHeap();
|
||||
const bool is_wine = cpuinfo_x86_windows_is_wine();
|
||||
|
||||
struct cpuinfo_x86_processor x86_processor;
|
||||
ZeroMemory(&x86_processor, sizeof(x86_processor));
|
||||
cpuinfo_x86_init_processor(&x86_processor);
|
||||
char brand_string[48];
|
||||
cpuinfo_x86_normalize_brand_string(x86_processor.brand_string, brand_string);
|
||||
|
||||
const uint32_t thread_bits_mask = bit_mask(x86_processor.topology.thread_bits_length);
|
||||
const uint32_t core_bits_mask = bit_mask(x86_processor.topology.core_bits_length);
|
||||
const uint32_t package_bits_offset = max(
|
||||
x86_processor.topology.thread_bits_offset + x86_processor.topology.thread_bits_length,
|
||||
x86_processor.topology.core_bits_offset + x86_processor.topology.core_bits_length);
|
||||
|
||||
/* WINE doesn't implement GetMaximumProcessorGroupCount and aborts when calling it */
|
||||
const uint32_t max_group_count = is_wine ? 1 : (uint32_t) GetMaximumProcessorGroupCount();
|
||||
cpuinfo_log_debug("detected %"PRIu32" processor groups", max_group_count);
|
||||
|
||||
uint32_t processors_count = 0;
|
||||
uint32_t* processors_per_group = (uint32_t*) CPUINFO_ALLOCA(max_group_count * sizeof(uint32_t));
|
||||
for (uint32_t i = 0; i < max_group_count; i++) {
|
||||
processors_per_group[i] = GetMaximumProcessorCount((WORD) i);
|
||||
cpuinfo_log_debug("detected %"PRIu32" processors in group %"PRIu32,
|
||||
processors_per_group[i], i);
|
||||
processors_count += processors_per_group[i];
|
||||
}
|
||||
|
||||
uint32_t* processors_before_group = (uint32_t*) CPUINFO_ALLOCA(max_group_count * sizeof(uint32_t));
|
||||
for (uint32_t i = 0, count = 0; i < max_group_count; i++) {
|
||||
processors_before_group[i] = count;
|
||||
cpuinfo_log_debug("detected %"PRIu32" processors before group %"PRIu32,
|
||||
processors_before_group[i], i);
|
||||
count += processors_per_group[i];
|
||||
}
|
||||
|
||||
processors = HeapAlloc(heap, HEAP_ZERO_MEMORY, processors_count * sizeof(struct cpuinfo_processor));
|
||||
if (processors == NULL) {
|
||||
cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" logical processors",
|
||||
processors_count * sizeof(struct cpuinfo_processor), processors_count);
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
DWORD cores_info_size = 0;
|
||||
if (GetLogicalProcessorInformationEx(RelationProcessorCore, NULL, &cores_info_size) == FALSE) {
|
||||
const DWORD last_error = GetLastError();
|
||||
if (last_error != ERROR_INSUFFICIENT_BUFFER) {
|
||||
cpuinfo_log_error("failed to query size of processor cores information: error %"PRIu32,
|
||||
(uint32_t) last_error);
|
||||
goto cleanup;
|
||||
}
|
||||
}
|
||||
|
||||
DWORD packages_info_size = 0;
|
||||
if (GetLogicalProcessorInformationEx(RelationProcessorPackage, NULL, &packages_info_size) == FALSE) {
|
||||
const DWORD last_error = GetLastError();
|
||||
if (last_error != ERROR_INSUFFICIENT_BUFFER) {
|
||||
cpuinfo_log_error("failed to query size of processor packages information: error %"PRIu32,
|
||||
(uint32_t) last_error);
|
||||
goto cleanup;
|
||||
}
|
||||
}
|
||||
|
||||
DWORD max_info_size = max(cores_info_size, packages_info_size);
|
||||
|
||||
processor_infos = HeapAlloc(heap, 0, max_info_size);
|
||||
if (processor_infos == NULL) {
|
||||
cpuinfo_log_error("failed to allocate %"PRIu32" bytes for logical processor information",
|
||||
(uint32_t) max_info_size);
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
if (GetLogicalProcessorInformationEx(RelationProcessorPackage, processor_infos, &max_info_size) == FALSE) {
|
||||
cpuinfo_log_error("failed to query processor packages information: error %"PRIu32,
|
||||
(uint32_t) GetLastError());
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
uint32_t packages_count = 0;
|
||||
PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX packages_info_end =
|
||||
(PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX) ((uintptr_t) processor_infos + packages_info_size);
|
||||
for (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX package_info = processor_infos;
|
||||
package_info < packages_info_end;
|
||||
package_info = (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX) ((uintptr_t) package_info + package_info->Size))
|
||||
{
|
||||
if (package_info->Relationship != RelationProcessorPackage) {
|
||||
cpuinfo_log_warning("unexpected processor info type (%"PRIu32") for processor package information",
|
||||
(uint32_t) package_info->Relationship);
|
||||
continue;
|
||||
}
|
||||
|
||||
/* We assume that packages are reported in APIC order */
|
||||
const uint32_t package_id = packages_count++;
|
||||
/* Reconstruct package part of APIC ID */
|
||||
const uint32_t package_apic_id = package_id << package_bits_offset;
|
||||
/* Iterate processor groups and set the package part of APIC ID */
|
||||
for (uint32_t i = 0; i < package_info->Processor.GroupCount; i++) {
|
||||
const uint32_t group_id = package_info->Processor.GroupMask[i].Group;
|
||||
/* Global index of the first logical processor belonging to this group */
|
||||
const uint32_t group_processors_start = processors_before_group[group_id];
|
||||
/* Bitmask representing processors in this group belonging to this package */
|
||||
KAFFINITY group_processors_mask = package_info->Processor.GroupMask[i].Mask;
|
||||
while (group_processors_mask != 0) {
|
||||
const uint32_t group_processor_id = low_index_from_kaffinity(group_processors_mask);
|
||||
const uint32_t processor_id = group_processors_start + group_processor_id;
|
||||
processors[processor_id].package = (const struct cpuinfo_package*) NULL + package_id;
|
||||
processors[processor_id].windows_group_id = (uint16_t) group_id;
|
||||
processors[processor_id].windows_processor_id = (uint16_t) group_processor_id;
|
||||
processors[processor_id].apic_id = package_apic_id;
|
||||
|
||||
/* Reset the lowest bit in affinity mask */
|
||||
group_processors_mask &= (group_processors_mask - 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
max_info_size = max(cores_info_size, packages_info_size);
|
||||
if (GetLogicalProcessorInformationEx(RelationProcessorCore, processor_infos, &max_info_size) == FALSE) {
|
||||
cpuinfo_log_error("failed to query processor cores information: error %"PRIu32,
|
||||
(uint32_t) GetLastError());
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
uint32_t cores_count = 0;
|
||||
/* Index (among all cores) of the the first core on the current package */
|
||||
uint32_t package_core_start = 0;
|
||||
uint32_t current_package_apic_id = 0;
|
||||
PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX cores_info_end =
|
||||
(PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX) ((uintptr_t) processor_infos + cores_info_size);
|
||||
for (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX core_info = processor_infos;
|
||||
core_info < cores_info_end;
|
||||
core_info = (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX) ((uintptr_t) core_info + core_info->Size))
|
||||
{
|
||||
if (core_info->Relationship != RelationProcessorCore) {
|
||||
cpuinfo_log_warning("unexpected processor info type (%"PRIu32") for processor core information",
|
||||
(uint32_t) core_info->Relationship);
|
||||
continue;
|
||||
}
|
||||
|
||||
/* We assume that cores and logical processors are reported in APIC order */
|
||||
const uint32_t core_id = cores_count++;
|
||||
uint32_t smt_id = 0;
|
||||
/* Reconstruct core part of APIC ID */
|
||||
const uint32_t core_apic_id = (core_id & core_bits_mask) << x86_processor.topology.core_bits_offset;
|
||||
/* Iterate processor groups and set the core & SMT parts of APIC ID */
|
||||
for (uint32_t i = 0; i < core_info->Processor.GroupCount; i++) {
|
||||
const uint32_t group_id = core_info->Processor.GroupMask[i].Group;
|
||||
/* Global index of the first logical processor belonging to this group */
|
||||
const uint32_t group_processors_start = processors_before_group[group_id];
|
||||
/* Bitmask representing processors in this group belonging to this package */
|
||||
KAFFINITY group_processors_mask = core_info->Processor.GroupMask[i].Mask;
|
||||
while (group_processors_mask != 0) {
|
||||
const uint32_t group_processor_id = low_index_from_kaffinity(group_processors_mask);
|
||||
const uint32_t processor_id = group_processors_start + group_processor_id;
|
||||
|
||||
/* Check if this is the first core on a new package */
|
||||
if (processors[processor_id].apic_id != current_package_apic_id) {
|
||||
package_core_start = core_id;
|
||||
current_package_apic_id = processors[processor_id].apic_id;
|
||||
}
|
||||
/* Core ID w.r.t package */
|
||||
const uint32_t package_core_id = core_id - package_core_start;
|
||||
|
||||
/* Update APIC ID with core and SMT parts */
|
||||
processors[processor_id].apic_id |=
|
||||
((smt_id & thread_bits_mask) << x86_processor.topology.thread_bits_offset) |
|
||||
((package_core_id & core_bits_mask) << x86_processor.topology.core_bits_offset);
|
||||
cpuinfo_log_debug("reconstructed APIC ID 0x%08"PRIx32" for processor %"PRIu32" in group %"PRIu32,
|
||||
processors[processor_id].apic_id, group_processor_id, group_id);
|
||||
|
||||
/* Set SMT ID (assume logical processors within the core are reported in APIC order) */
|
||||
processors[processor_id].smt_id = smt_id++;
|
||||
processors[processor_id].core = (const struct cpuinfo_core*) NULL + core_id;
|
||||
|
||||
/* Reset the lowest bit in affinity mask */
|
||||
group_processors_mask &= (group_processors_mask - 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
cores = HeapAlloc(heap, HEAP_ZERO_MEMORY, cores_count * sizeof(struct cpuinfo_core));
|
||||
if (cores == NULL) {
|
||||
cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" cores",
|
||||
cores_count * sizeof(struct cpuinfo_core), cores_count);
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
clusters = HeapAlloc(heap, HEAP_ZERO_MEMORY, packages_count * sizeof(struct cpuinfo_cluster));
|
||||
if (clusters == NULL) {
|
||||
cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" core clusters",
|
||||
packages_count * sizeof(struct cpuinfo_cluster), packages_count);
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
packages = HeapAlloc(heap, HEAP_ZERO_MEMORY, packages_count * sizeof(struct cpuinfo_package));
|
||||
if (packages == NULL) {
|
||||
cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" physical packages",
|
||||
packages_count * sizeof(struct cpuinfo_package), packages_count);
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
for (uint32_t i = processors_count; i != 0; i--) {
|
||||
const uint32_t processor_id = i - 1;
|
||||
struct cpuinfo_processor* processor = processors + processor_id;
|
||||
|
||||
/* Adjust core and package pointers for all logical processors */
|
||||
struct cpuinfo_core* core =
|
||||
(struct cpuinfo_core*) ((uintptr_t) cores + (uintptr_t) processor->core);
|
||||
processor->core = core;
|
||||
struct cpuinfo_cluster* cluster =
|
||||
(struct cpuinfo_cluster*) ((uintptr_t) clusters + (uintptr_t) processor->cluster);
|
||||
processor->cluster = cluster;
|
||||
struct cpuinfo_package* package =
|
||||
(struct cpuinfo_package*) ((uintptr_t) packages + (uintptr_t) processor->package);
|
||||
processor->package = package;
|
||||
|
||||
/* This can be overwritten by lower-index processors on the same package */
|
||||
package->processor_start = processor_id;
|
||||
package->processor_count += 1;
|
||||
|
||||
/* This can be overwritten by lower-index processors on the same cluster */
|
||||
cluster->processor_start = processor_id;
|
||||
cluster->processor_count += 1;
|
||||
|
||||
/* This can be overwritten by lower-index processors on the same core*/
|
||||
core->processor_start = processor_id;
|
||||
core->processor_count += 1;
|
||||
}
|
||||
|
||||
/* Set vendor/uarch/CPUID information for cores */
|
||||
for (uint32_t i = cores_count; i != 0; i--) {
|
||||
const uint32_t global_core_id = i - 1;
|
||||
struct cpuinfo_core* core = cores + global_core_id;
|
||||
const struct cpuinfo_processor* processor = processors + core->processor_start;
|
||||
struct cpuinfo_package* package = (struct cpuinfo_package*) processor->package;
|
||||
struct cpuinfo_cluster* cluster = (struct cpuinfo_cluster*) processor->cluster;
|
||||
|
||||
core->cluster = cluster;
|
||||
core->package = package;
|
||||
core->core_id = core_bits_mask &
|
||||
(processor->apic_id >> x86_processor.topology.core_bits_offset);
|
||||
core->vendor = x86_processor.vendor;
|
||||
core->uarch = x86_processor.uarch;
|
||||
core->cpuid = x86_processor.cpuid;
|
||||
|
||||
/* This can be overwritten by lower-index cores on the same cluster/package */
|
||||
cluster->core_start = global_core_id;
|
||||
cluster->core_count += 1;
|
||||
package->core_start = global_core_id;
|
||||
package->core_count += 1;
|
||||
}
|
||||
|
||||
for (uint32_t i = 0; i < packages_count; i++) {
|
||||
struct cpuinfo_package* package = packages + i;
|
||||
struct cpuinfo_cluster* cluster = clusters + i;
|
||||
|
||||
cluster->package = package;
|
||||
cluster->vendor = cores[cluster->core_start].vendor;
|
||||
cluster->uarch = cores[cluster->core_start].uarch;
|
||||
cluster->cpuid = cores[cluster->core_start].cpuid;
|
||||
package->cluster_start = i;
|
||||
package->cluster_count = 1;
|
||||
cpuinfo_x86_format_package_name(x86_processor.vendor, brand_string, package->name);
|
||||
}
|
||||
|
||||
/* Count caches */
|
||||
uint32_t l1i_count, l1d_count, l2_count, l3_count, l4_count;
|
||||
cpuinfo_x86_count_caches(processors_count, processors, &x86_processor,
|
||||
&l1i_count, &l1d_count, &l2_count, &l3_count, &l4_count);
|
||||
|
||||
/* Allocate cache descriptions */
|
||||
if (l1i_count != 0) {
|
||||
l1i = HeapAlloc(heap, HEAP_ZERO_MEMORY, l1i_count * sizeof(struct cpuinfo_cache));
|
||||
if (l1i == NULL) {
|
||||
cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" L1I caches",
|
||||
l1i_count * sizeof(struct cpuinfo_cache), l1i_count);
|
||||
goto cleanup;
|
||||
}
|
||||
}
|
||||
if (l1d_count != 0) {
|
||||
l1d = HeapAlloc(heap, HEAP_ZERO_MEMORY, l1d_count * sizeof(struct cpuinfo_cache));
|
||||
if (l1d == NULL) {
|
||||
cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" L1D caches",
|
||||
l1d_count * sizeof(struct cpuinfo_cache), l1d_count);
|
||||
goto cleanup;
|
||||
}
|
||||
}
|
||||
if (l2_count != 0) {
|
||||
l2 = HeapAlloc(heap, HEAP_ZERO_MEMORY, l2_count * sizeof(struct cpuinfo_cache));
|
||||
if (l2 == NULL) {
|
||||
cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" L2 caches",
|
||||
l2_count * sizeof(struct cpuinfo_cache), l2_count);
|
||||
goto cleanup;
|
||||
}
|
||||
}
|
||||
if (l3_count != 0) {
|
||||
l3 = HeapAlloc(heap, HEAP_ZERO_MEMORY, l3_count * sizeof(struct cpuinfo_cache));
|
||||
if (l3 == NULL) {
|
||||
cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" L3 caches",
|
||||
l3_count * sizeof(struct cpuinfo_cache), l3_count);
|
||||
goto cleanup;
|
||||
}
|
||||
}
|
||||
if (l4_count != 0) {
|
||||
l4 = HeapAlloc(heap, HEAP_ZERO_MEMORY, l4_count * sizeof(struct cpuinfo_cache));
|
||||
if (l4 == NULL) {
|
||||
cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" L4 caches",
|
||||
l4_count * sizeof(struct cpuinfo_cache), l4_count);
|
||||
goto cleanup;
|
||||
}
|
||||
}
|
||||
|
||||
/* Set cache information */
|
||||
uint32_t l1i_index = UINT32_MAX, l1d_index = UINT32_MAX, l2_index = UINT32_MAX, l3_index = UINT32_MAX, l4_index = UINT32_MAX;
|
||||
uint32_t last_l1i_id = UINT32_MAX, last_l1d_id = UINT32_MAX;
|
||||
uint32_t last_l2_id = UINT32_MAX, last_l3_id = UINT32_MAX, last_l4_id = UINT32_MAX;
|
||||
for (uint32_t i = 0; i < processors_count; i++) {
|
||||
const uint32_t apic_id = processors[i].apic_id;
|
||||
|
||||
if (x86_processor.cache.l1i.size != 0) {
|
||||
const uint32_t l1i_id = apic_id & ~bit_mask(x86_processor.cache.l1i.apic_bits);
|
||||
processors[i].cache.l1i = &l1i[l1i_index];
|
||||
if (l1i_id != last_l1i_id) {
|
||||
/* new cache */
|
||||
last_l1i_id = l1i_id;
|
||||
l1i[++l1i_index] = (struct cpuinfo_cache) {
|
||||
.size = x86_processor.cache.l1i.size,
|
||||
.associativity = x86_processor.cache.l1i.associativity,
|
||||
.sets = x86_processor.cache.l1i.sets,
|
||||
.partitions = x86_processor.cache.l1i.partitions,
|
||||
.line_size = x86_processor.cache.l1i.line_size,
|
||||
.flags = x86_processor.cache.l1i.flags,
|
||||
.processor_start = i,
|
||||
.processor_count = 1,
|
||||
};
|
||||
} else {
|
||||
/* another processor sharing the same cache */
|
||||
l1i[l1i_index].processor_count += 1;
|
||||
}
|
||||
processors[i].cache.l1i = &l1i[l1i_index];
|
||||
} else {
|
||||
/* reset cache id */
|
||||
last_l1i_id = UINT32_MAX;
|
||||
}
|
||||
if (x86_processor.cache.l1d.size != 0) {
|
||||
const uint32_t l1d_id = apic_id & ~bit_mask(x86_processor.cache.l1d.apic_bits);
|
||||
processors[i].cache.l1d = &l1d[l1d_index];
|
||||
if (l1d_id != last_l1d_id) {
|
||||
/* new cache */
|
||||
last_l1d_id = l1d_id;
|
||||
l1d[++l1d_index] = (struct cpuinfo_cache) {
|
||||
.size = x86_processor.cache.l1d.size,
|
||||
.associativity = x86_processor.cache.l1d.associativity,
|
||||
.sets = x86_processor.cache.l1d.sets,
|
||||
.partitions = x86_processor.cache.l1d.partitions,
|
||||
.line_size = x86_processor.cache.l1d.line_size,
|
||||
.flags = x86_processor.cache.l1d.flags,
|
||||
.processor_start = i,
|
||||
.processor_count = 1,
|
||||
};
|
||||
} else {
|
||||
/* another processor sharing the same cache */
|
||||
l1d[l1d_index].processor_count += 1;
|
||||
}
|
||||
processors[i].cache.l1d = &l1d[l1d_index];
|
||||
} else {
|
||||
/* reset cache id */
|
||||
last_l1d_id = UINT32_MAX;
|
||||
}
|
||||
if (x86_processor.cache.l2.size != 0) {
|
||||
const uint32_t l2_id = apic_id & ~bit_mask(x86_processor.cache.l2.apic_bits);
|
||||
processors[i].cache.l2 = &l2[l2_index];
|
||||
if (l2_id != last_l2_id) {
|
||||
/* new cache */
|
||||
last_l2_id = l2_id;
|
||||
l2[++l2_index] = (struct cpuinfo_cache) {
|
||||
.size = x86_processor.cache.l2.size,
|
||||
.associativity = x86_processor.cache.l2.associativity,
|
||||
.sets = x86_processor.cache.l2.sets,
|
||||
.partitions = x86_processor.cache.l2.partitions,
|
||||
.line_size = x86_processor.cache.l2.line_size,
|
||||
.flags = x86_processor.cache.l2.flags,
|
||||
.processor_start = i,
|
||||
.processor_count = 1,
|
||||
};
|
||||
} else {
|
||||
/* another processor sharing the same cache */
|
||||
l2[l2_index].processor_count += 1;
|
||||
}
|
||||
processors[i].cache.l2 = &l2[l2_index];
|
||||
} else {
|
||||
/* reset cache id */
|
||||
last_l2_id = UINT32_MAX;
|
||||
}
|
||||
if (x86_processor.cache.l3.size != 0) {
|
||||
const uint32_t l3_id = apic_id & ~bit_mask(x86_processor.cache.l3.apic_bits);
|
||||
processors[i].cache.l3 = &l3[l3_index];
|
||||
if (l3_id != last_l3_id) {
|
||||
/* new cache */
|
||||
last_l3_id = l3_id;
|
||||
l3[++l3_index] = (struct cpuinfo_cache) {
|
||||
.size = x86_processor.cache.l3.size,
|
||||
.associativity = x86_processor.cache.l3.associativity,
|
||||
.sets = x86_processor.cache.l3.sets,
|
||||
.partitions = x86_processor.cache.l3.partitions,
|
||||
.line_size = x86_processor.cache.l3.line_size,
|
||||
.flags = x86_processor.cache.l3.flags,
|
||||
.processor_start = i,
|
||||
.processor_count = 1,
|
||||
};
|
||||
} else {
|
||||
/* another processor sharing the same cache */
|
||||
l3[l3_index].processor_count += 1;
|
||||
}
|
||||
processors[i].cache.l3 = &l3[l3_index];
|
||||
} else {
|
||||
/* reset cache id */
|
||||
last_l3_id = UINT32_MAX;
|
||||
}
|
||||
if (x86_processor.cache.l4.size != 0) {
|
||||
const uint32_t l4_id = apic_id & ~bit_mask(x86_processor.cache.l4.apic_bits);
|
||||
processors[i].cache.l4 = &l4[l4_index];
|
||||
if (l4_id != last_l4_id) {
|
||||
/* new cache */
|
||||
last_l4_id = l4_id;
|
||||
l4[++l4_index] = (struct cpuinfo_cache) {
|
||||
.size = x86_processor.cache.l4.size,
|
||||
.associativity = x86_processor.cache.l4.associativity,
|
||||
.sets = x86_processor.cache.l4.sets,
|
||||
.partitions = x86_processor.cache.l4.partitions,
|
||||
.line_size = x86_processor.cache.l4.line_size,
|
||||
.flags = x86_processor.cache.l4.flags,
|
||||
.processor_start = i,
|
||||
.processor_count = 1,
|
||||
};
|
||||
} else {
|
||||
/* another processor sharing the same cache */
|
||||
l4[l4_index].processor_count += 1;
|
||||
}
|
||||
processors[i].cache.l4 = &l4[l4_index];
|
||||
} else {
|
||||
/* reset cache id */
|
||||
last_l4_id = UINT32_MAX;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* Commit changes */
|
||||
cpuinfo_processors = processors;
|
||||
cpuinfo_cores = cores;
|
||||
cpuinfo_clusters = clusters;
|
||||
cpuinfo_packages = packages;
|
||||
cpuinfo_cache[cpuinfo_cache_level_1i] = l1i;
|
||||
cpuinfo_cache[cpuinfo_cache_level_1d] = l1d;
|
||||
cpuinfo_cache[cpuinfo_cache_level_2] = l2;
|
||||
cpuinfo_cache[cpuinfo_cache_level_3] = l3;
|
||||
cpuinfo_cache[cpuinfo_cache_level_4] = l4;
|
||||
|
||||
cpuinfo_processors_count = processors_count;
|
||||
cpuinfo_cores_count = cores_count;
|
||||
cpuinfo_clusters_count = packages_count;
|
||||
cpuinfo_packages_count = packages_count;
|
||||
cpuinfo_cache_count[cpuinfo_cache_level_1i] = l1i_count;
|
||||
cpuinfo_cache_count[cpuinfo_cache_level_1d] = l1d_count;
|
||||
cpuinfo_cache_count[cpuinfo_cache_level_2] = l2_count;
|
||||
cpuinfo_cache_count[cpuinfo_cache_level_3] = l3_count;
|
||||
cpuinfo_cache_count[cpuinfo_cache_level_4] = l4_count;
|
||||
cpuinfo_max_cache_size = cpuinfo_compute_max_cache_size(&processors[0]);
|
||||
|
||||
cpuinfo_global_uarch = (struct cpuinfo_uarch_info) {
|
||||
.uarch = x86_processor.uarch,
|
||||
.cpuid = x86_processor.cpuid,
|
||||
.processor_count = processors_count,
|
||||
.core_count = cores_count,
|
||||
};
|
||||
|
||||
MemoryBarrier();
|
||||
|
||||
cpuinfo_is_initialized = true;
|
||||
|
||||
processors = NULL;
|
||||
cores = NULL;
|
||||
clusters = NULL;
|
||||
packages = NULL;
|
||||
l1i = l1d = l2 = l3 = l4 = NULL;
|
||||
|
||||
cleanup:
|
||||
if (processors != NULL) {
|
||||
HeapFree(heap, 0, processors);
|
||||
}
|
||||
if (cores != NULL) {
|
||||
HeapFree(heap, 0, cores);
|
||||
}
|
||||
if (clusters != NULL) {
|
||||
HeapFree(heap, 0, clusters);
|
||||
}
|
||||
if (packages != NULL) {
|
||||
HeapFree(heap, 0, packages);
|
||||
}
|
||||
if (l1i != NULL) {
|
||||
HeapFree(heap, 0, l1i);
|
||||
}
|
||||
if (l1d != NULL) {
|
||||
HeapFree(heap, 0, l1d);
|
||||
}
|
||||
if (l2 != NULL) {
|
||||
HeapFree(heap, 0, l2);
|
||||
}
|
||||
if (l3 != NULL) {
|
||||
HeapFree(heap, 0, l3);
|
||||
}
|
||||
if (l4 != NULL) {
|
||||
HeapFree(heap, 0, l4);
|
||||
}
|
||||
return TRUE;
|
||||
}
|
Reference in New Issue
Block a user