blob: ca3caa9c3b1501290073a21cf788cd8bd8a87e8b [file] [log] [blame]
diff --git CMakeLists.txt CMakeLists.txt
index e594def..cab4d05 100644
--- CMakeLists.txt
+++ CMakeLists.txt
@@ -119,7 +119,8 @@ ENDIF()
# ---[ cpuinfo library
SET(CPUINFO_SRCS
src/init.c
- src/api.c)
+ src/api.c
+ src/cache.c)
IF(CPUINFO_SUPPORTED_PLATFORM)
IF(CMAKE_SYSTEM_PROCESSOR MATCHES "^(i[3-6]86|AMD64|x86(_64)?)$" OR IOS_ARCH MATCHES "^(i386|x86_64)$")
diff --git LICENSE LICENSE
index 4910bfe..3f9a4f0 100644
--- LICENSE
+++ LICENSE
@@ -1,3 +1,4 @@
+Copyright (c) 2019 Google LLC
Copyright (c) 2017-2018 Facebook Inc.
Copyright (C) 2012-2017 Georgia Institute of Technology
Copyright (C) 2010-2012 Marat Dukhan
diff --git include/cpuinfo.h include/cpuinfo.h
index 7d5833f..9938d2b 100644
--- include/cpuinfo.h
+++ include/cpuinfo.h
@@ -38,10 +38,18 @@
#define CPUINFO_ARCH_PNACL 1
#endif
-#if defined(EMSCRIPTEN)
+#if defined(__asmjs__)
#define CPUINFO_ARCH_ASMJS 1
#endif
+#if defined(__wasm__)
+ #if defined(__wasm_simd128__)
+ #define CPUINFO_ARCH_WASMSIMD 1
+ #else
+ #define CPUINFO_ARCH_WASM 1
+ #endif
+#endif
+
#if CPUINFO_ARCH_X86 && defined(_MSC_VER)
#define CPUINFO_ABI __cdecl
#elif CPUINFO_ARCH_X86 && defined(__GNUC__)
@@ -80,6 +88,14 @@
#define CPUINFO_ARCH_ASMJS 0
#endif
+#ifndef CPUINFO_ARCH_WASM
+ #define CPUINFO_ARCH_WASM 0
+#endif
+
+#ifndef CPUINFO_ARCH_WASMSIMD
+ #define CPUINFO_ARCH_WASMSIMD 0
+#endif
+
#define CPUINFO_CACHE_UNIFIED 0x00000001
#define CPUINFO_CACHE_INCLUSIVE 0x00000002
#define CPUINFO_CACHE_COMPLEX_INDEXING 0x00000004
@@ -278,10 +294,14 @@ enum cpuinfo_uarch {
cpuinfo_uarch_haswell = 0x00100208,
/** Intel Broadwell microarchitecture. */
cpuinfo_uarch_broadwell = 0x00100209,
- /** Intel Sky Lake microarchitecture. */
+ /** Intel Sky Lake microarchitecture (14 nm, including Kaby/Coffee/Whiskey/Amber/Comet/Cascade/Cooper Lake). */
cpuinfo_uarch_sky_lake = 0x0010020A,
- /** Intel Kaby Lake microarchitecture. */
- cpuinfo_uarch_kaby_lake = 0x0010020B,
+ /** DEPRECATED (Intel Kaby Lake microarchitecture). */
+ cpuinfo_uarch_kaby_lake = 0x0010020A,
+ /** Intel Palm Cove microarchitecture (10 nm, Cannon Lake). */
+ cpuinfo_uarch_palm_cove = 0x0010020B,
+ /** Intel Sunny Cove microarchitecture (10 nm, Ice Lake). */
+ cpuinfo_uarch_sunny_cove = 0x0010020C,
/** Pentium 4 with Willamette, Northwood, or Foster cores. */
cpuinfo_uarch_willamette = 0x00100300,
@@ -289,13 +309,17 @@ enum cpuinfo_uarch {
cpuinfo_uarch_prescott = 0x00100301,
/** Intel Atom on 45 nm process. */
- cpuinfo_uarch_bonnell = 0x00100400,
+ cpuinfo_uarch_bonnell = 0x00100400,
/** Intel Atom on 32 nm process. */
- cpuinfo_uarch_saltwell = 0x00100401,
+ cpuinfo_uarch_saltwell = 0x00100401,
/** Intel Silvermont microarchitecture (22 nm out-of-order Atom). */
- cpuinfo_uarch_silvermont = 0x00100402,
+ cpuinfo_uarch_silvermont = 0x00100402,
/** Intel Airmont microarchitecture (14 nm out-of-order Atom). */
- cpuinfo_uarch_airmont = 0x00100403,
+ cpuinfo_uarch_airmont = 0x00100403,
+ /** Intel Goldmont microarchitecture (Denverton, Apollo Lake). */
+ cpuinfo_uarch_goldmont = 0x00100404,
+ /** Intel Goldmont Plus microarchitecture (Gemini Lake). */
+ cpuinfo_uarch_goldmont_plus = 0x00100405,
/** Intel Knights Ferry HPC boards. */
cpuinfo_uarch_knights_ferry = 0x00100500,
@@ -335,8 +359,10 @@ enum cpuinfo_uarch {
cpuinfo_uarch_steamroller = 0x00200107,
/** AMD Excavator microarchitecture (Carizzo APUs). */
cpuinfo_uarch_excavator = 0x00200108,
- /** AMD Zen microarchitecture (Ryzen CPUs). */
+ /** AMD Zen microarchitecture (12/14 nm Ryzen and EPYC CPUs). */
cpuinfo_uarch_zen = 0x00200109,
+ /** AMD Zen 2 microarchitecture (7 nm Ryzen and EPYC CPUs). */
+ cpuinfo_uarch_zen2 = 0x0020010A,
/** NSC Geode and AMD Geode GX and LX. */
cpuinfo_uarch_geode = 0x00200200,
@@ -370,23 +396,34 @@ enum cpuinfo_uarch {
cpuinfo_uarch_cortex_a17 = 0x00300217,
/** ARM Cortex-A32. */
- cpuinfo_uarch_cortex_a32 = 0x00300332,
+ cpuinfo_uarch_cortex_a32 = 0x00300332,
/** ARM Cortex-A35. */
- cpuinfo_uarch_cortex_a35 = 0x00300335,
+ cpuinfo_uarch_cortex_a35 = 0x00300335,
/** ARM Cortex-A53. */
- cpuinfo_uarch_cortex_a53 = 0x00300353,
+ cpuinfo_uarch_cortex_a53 = 0x00300353,
/** ARM Cortex-A55. */
- cpuinfo_uarch_cortex_a55 = 0x00300355,
+ cpuinfo_uarch_cortex_a55 = 0x00300355,
/** ARM Cortex-A57. */
- cpuinfo_uarch_cortex_a57 = 0x00300357,
+ cpuinfo_uarch_cortex_a57 = 0x00300357,
+ /** ARM Cortex-A65. */
+ cpuinfo_uarch_cortex_a65 = 0x00300365,
/** ARM Cortex-A72. */
- cpuinfo_uarch_cortex_a72 = 0x00300372,
+ cpuinfo_uarch_cortex_a72 = 0x00300372,
/** ARM Cortex-A73. */
- cpuinfo_uarch_cortex_a73 = 0x00300373,
+ cpuinfo_uarch_cortex_a73 = 0x00300373,
/** ARM Cortex-A75. */
- cpuinfo_uarch_cortex_a75 = 0x00300375,
+ cpuinfo_uarch_cortex_a75 = 0x00300375,
/** ARM Cortex-A76. */
- cpuinfo_uarch_cortex_a76 = 0x00300376,
+ cpuinfo_uarch_cortex_a76 = 0x00300376,
+ /** ARM Cortex-A76AE. */
+ cpuinfo_uarch_cortex_a76ae = 0x00300378,
+ /** ARM Cortex-A77. */
+ cpuinfo_uarch_cortex_a77 = 0x00300377,
+
+ /** ARM Neoverse N1. */
+ cpuinfo_uarch_neoverse_n1 = 0x00300400,
+ /** ARM Neoverse E1. */
+ cpuinfo_uarch_neoverse_e1 = 0x00300401,
/** Qualcomm Scorpion. */
cpuinfo_uarch_scorpion = 0x00400100,
@@ -406,12 +443,22 @@ enum cpuinfo_uarch {
/** Nvidia Carmel. */
cpuinfo_uarch_carmel = 0x00500102,
- /** Samsung Mongoose M1 (Exynos 8890 big cores). */
+ /** Samsung Exynos M1 (Exynos 8890 big cores). */
+ cpuinfo_uarch_exynos_m1 = 0x00600100,
+ /** Samsung Exynos M2 (Exynos 8895 big cores). */
+ cpuinfo_uarch_exynos_m2 = 0x00600101,
+ /** Samsung Exynos M3 (Exynos 9810 big cores). */
+ cpuinfo_uarch_exynos_m3 = 0x00600102,
+ /** Samsung Exynos M4 (Exynos 9820 big cores). */
+ cpuinfo_uarch_exynos_m4 = 0x00600103,
+ /** Samsung Exynos M5 (Exynos 9830 big cores). */
+ cpuinfo_uarch_exynos_m5 = 0x00600104,
+
+ /* Old names for Exynos. */
cpuinfo_uarch_mongoose_m1 = 0x00600100,
- /** Samsung Mongoose M2 (Exynos 8895 big cores). */
cpuinfo_uarch_mongoose_m2 = 0x00600101,
- /** Samsung Meerkat M3 (Exynos 9810 big cores). */
cpuinfo_uarch_meerkat_m3 = 0x00600102,
+ cpuinfo_uarch_meerkat_m4 = 0x00600103,
/** Apple A6 and A6X processors. */
cpuinfo_uarch_swift = 0x00700100,
@@ -640,6 +687,8 @@ void CPUINFO_ABI cpuinfo_deinitialize(void);
bool avx512bitalg;
bool avx512vpopcntdq;
bool avx512vnni;
+ bool avx512bf16;
+ bool avx512vp2intersect;
bool avx512_4vnniw;
bool avx512_4fmaps;
bool hle;
@@ -1110,6 +1159,22 @@ static inline bool cpuinfo_has_x86_avx512vnni(void) {
#endif
}
+static inline bool cpuinfo_has_x86_avx512bf16(void) {
+ #if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+ return cpuinfo_isa.avx512bf16;
+ #else
+ return false;
+ #endif
+}
+
+static inline bool cpuinfo_has_x86_avx512vp2intersect(void) {
+ #if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+ return cpuinfo_isa.avx512vp2intersect;
+ #else
+ return false;
+ #endif
+}
+
static inline bool cpuinfo_has_x86_avx512_4vnniw(void) {
#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
return cpuinfo_isa.avx512_4vnniw;
@@ -1682,6 +1747,11 @@ uint32_t CPUINFO_ABI cpuinfo_get_l2_caches_count(void);
uint32_t CPUINFO_ABI cpuinfo_get_l3_caches_count(void);
uint32_t CPUINFO_ABI cpuinfo_get_l4_caches_count(void);
+/**
+ * Returns upper bound on cache size.
+ */
+uint32_t CPUINFO_ABI cpuinfo_get_max_cache_size(void);
+
const struct cpuinfo_processor* CPUINFO_ABI cpuinfo_get_current_processor(void);
const struct cpuinfo_core* CPUINFO_ABI cpuinfo_get_current_core(void);
diff --git src/api.c src/api.c
index 98b5805..83744f5 100644
--- src/api.c
+++ src/api.c
@@ -18,6 +18,7 @@ uint32_t cpuinfo_cores_count = 0;
uint32_t cpuinfo_clusters_count = 0;
uint32_t cpuinfo_packages_count = 0;
uint32_t cpuinfo_cache_count[cpuinfo_cache_level_max] = { 0 };
+uint32_t cpuinfo_max_cache_size = 0;
const struct cpuinfo_processor* cpuinfo_get_processors(void) {
diff --git src/arm/api.h src/arm/api.h
index 11e588b..69274bc 100644
--- src/arm/api.h
+++ src/arm/api.h
@@ -104,6 +104,9 @@ CPUINFO_INTERNAL void cpuinfo_arm_decode_cache(
struct cpuinfo_cache l1d[restrict static 1],
struct cpuinfo_cache l2[restrict static 1],
struct cpuinfo_cache l3[restrict static 1]);
+
+CPUINFO_INTERNAL uint32_t cpuinfo_arm_compute_max_cache_size(
+ const struct cpuinfo_processor processor[restrict static 1]);
#else /* defined(__cplusplus) */
CPUINFO_INTERNAL void cpuinfo_arm_decode_cache(
enum cpuinfo_uarch uarch,
diff --git src/arm/cache.c src/arm/cache.c
index 5ada7d9..ccadeb4 100644
--- src/arm/cache.c
+++ src/arm/cache.c
@@ -1,10 +1,12 @@
#include <stdint.h>
#include <cpuinfo.h>
+#include <cpuinfo/internal-api.h>
#include <cpuinfo/log.h>
#include <arm/api.h>
#include <arm/midr.h>
+
void cpuinfo_arm_decode_cache(
enum cpuinfo_uarch uarch,
uint32_t cluster_cores,
@@ -109,7 +111,7 @@ void cpuinfo_arm_decode_cache(
* memory accesses and has been optimized for use with the Cortex-A5 processor.
* 8.1.7. Exclusive L2 cache
* The Cortex-A5 processor can be connected to an L2 cache that supports an exclusive cache mode.
- * This mode must be activated both in the Cortex-A5 processor and in the L2 cache controller.
+ * This mode must be activated both in the Cortex-A5 processor and in the L2 cache controller.
*
* +--------------------+-----------+-----------+----------+-----------+
* | Processor model | L1D cache | L1I cache | L2 cache | Reference |
@@ -698,7 +700,7 @@ void cpuinfo_arm_decode_cache(
* [3] https://en.wikichip.org/wiki/hisilicon/kirin/980
*/
if (midr_is_qualcomm_cortex_a55_silver(midr)) {
- /* Qualcomm-modified Cortex-A55 in Snapdragon 710 / 845 */
+ /* Qualcomm-modified Cortex-A55 in Snapdragon 670 / 710 / 845 */
uint32_t l3_size = 1024 * 1024;
switch (chipset->series) {
case cpuinfo_arm_chipset_series_qualcomm_snapdragon:
@@ -827,6 +829,62 @@ void cpuinfo_arm_decode_cache(
.flags = CPUINFO_CACHE_INCLUSIVE
};
break;
+ case cpuinfo_uarch_cortex_a65:
+ {
+ /*
+ * ARM Cortex‑A65 Core Technical Reference Manual
+ * A6.1. About the L1 memory system
+ * The L1 memory system enhances the performance and power efficiency in the Cortex‑A65 core.
+ * It consists of separate instruction and data caches. You can configure instruction and data caches
+ * independently during implementation to sizes of 32KB or 64KB.
+ *
+ * L1 instruction-side memory system
+ * The L1 instruction-side memory system provides an instruction stream to the DPU. Its key features are:
+ * - 64-byte instruction side cache line length.
+ * - 4-way set associative L1 instruction cache.
+ *
+ * L1 data-side memory system
+ * - 64-byte data side cache line length.
+ * - 4-way set associative L1 data cache.
+ *
+ * A7.1 About the L2 memory system
+ * The Cortex‑A65 L2 memory system is required to interface the Cortex‑A65 cores to the L3 memory system.
+ * The L2 memory subsystem consists of:
+ * - An optional 4-way, set-associative L2 cache with a configurable size of 64KB, 128KB, or 256KB.
+ * Cache lines have a fixed length of 64 bytes.
+ *
+ * The main features of the L2 memory system are:
+ * - Strictly exclusive with L1 data cache.
+ * - Pseudo-inclusive with L1 instruction cache.
+ * - Private per-core unified L2 cache.
+ */
+ const uint32_t l1_size = 32 * 1024;
+ const uint32_t l2_size = 128 * 1024;
+ const uint32_t l3_size = 512 * 1024;
+ *l1i = (struct cpuinfo_cache) {
+ .size = l1_size,
+ .associativity = 4,
+ .line_size = 64,
+ };
+ *l1d = (struct cpuinfo_cache) {
+ .size = l1_size,
+ .associativity = 4,
+ .line_size = 64,
+ };
+ *l2 = (struct cpuinfo_cache) {
+ .size = l2_size,
+ .associativity = 4,
+ .line_size = 64,
+ .flags = CPUINFO_CACHE_INCLUSIVE
+ };
+ *l3 = (struct cpuinfo_cache) {
+ .size = l3_size,
+ /* DynamIQ */
+ .associativity = 16,
+ .line_size = 64,
+ };
+ break;
+ }
case cpuinfo_uarch_cortex_a72:
{
/*
@@ -1047,6 +1105,7 @@ void cpuinfo_arm_decode_cache(
break;
}
case cpuinfo_uarch_cortex_a76:
+ case cpuinfo_uarch_cortex_a76ae:
{
/*
* ARM Cortex-A76 Core Technical Reference Manual
@@ -1119,6 +1178,57 @@ void cpuinfo_arm_decode_cache(
};
break;
}
+ case cpuinfo_uarch_cortex_a77:
+ {
+ /*
+ * ARM Cortex-A77 Core Technical Reference Manual
+ * A6.1. About the L1 memory system
+ * The L1 memory system consists of separate instruction and data caches. Both have a fixed size of 64KB.
+ *
+ * A6.1.1 L1 instruction-side memory system
+ * The L1 instruction memory system has the following key features:
+ * - Virtually Indexed, Physically Tagged (VIPT), which behaves as a Physically Indexed,
+ * Physically Tagged (PIPT) 4-way set-associative L1 data cache.
+ * - Fixed cache line length of 64 bytes.
+ *
+ * A6.1.2 L1 data-side memory system
+ * The L1 data memory system has the following features:
+ * - Virtually Indexed, Physically Tagged (VIPT), which behaves as a Physically Indexed,
+ * Physically Tagged (PIPT) 4-way set-associative L1 data cache.
+ * - Fixed cache line length of 64 bytes.
+ * - Pseudo-LRU cache replacement policy.
+ *
+ * A7.1 About the L2 memory system
+ * The L2 memory subsystem consist of:
+ * - An 8-way set associative L2 cache with a configurable size of 128KB, 256KB or 512KB. Cache lines
+ * have a fixed length of 64 bytes.
+ * - Strictly inclusive with L1 data cache. Weakly inclusive with L1 instruction cache.
+ */
+ const uint32_t l2_size = 256 * 1024;
+ const uint32_t l3_size = 1024 * 1024;
+ *l1i = (struct cpuinfo_cache) {
+ .size = 64 * 1024,
+ .associativity = 4,
+ .line_size = 64,
+ };
+ *l1d = (struct cpuinfo_cache) {
+ .size = 64 * 1024,
+ .associativity = 4,
+ .line_size = 64,
+ };
+ *l2 = (struct cpuinfo_cache) {
+ .size = l2_size,
+ .associativity = 8,
+ .line_size = 64,
+ .flags = CPUINFO_CACHE_INCLUSIVE,
+ };
+ *l3 = (struct cpuinfo_cache) {
+ .size = l3_size,
+ .associativity = 16,
+ .line_size = 64,
+ };
+ break;
+ }
#if CPUINFO_ARCH_ARM && !defined(__ARM_ARCH_8A__)
case cpuinfo_uarch_scorpion:
/*
@@ -1248,8 +1358,8 @@ void cpuinfo_arm_decode_cache(
.line_size = 64
};
break;
- case cpuinfo_uarch_mongoose_m1:
- case cpuinfo_uarch_mongoose_m2:
+ case cpuinfo_uarch_exynos_m1:
+ case cpuinfo_uarch_exynos_m2:
/*
* - "Moving past branch prediction we can see some elements of how the cache is set up for the L1 I$,
* namely 64 KB split into four sets with 128-byte line sizes for 128 cache lines per set" [1]
@@ -1283,7 +1393,7 @@ void cpuinfo_arm_decode_cache(
.line_size = 64
};
break;
- case cpuinfo_uarch_meerkat_m3:
+ case cpuinfo_uarch_exynos_m3:
/*
* +--------------------+-------+-----------+-----------+-----------+----------+------------+
* | Processor model | Cores | L1D cache | L1I cache | L2 cache | L3 cache | Reference |
@@ -1294,19 +1404,19 @@ void cpuinfo_arm_decode_cache(
* [1] https://www.anandtech.com/show/12478/exynos-9810-handson-awkward-first-results
*/
*l1i = (struct cpuinfo_cache) {
- .size = 64 * 1024 /* assume same as in Mongoose cores */,
- .associativity = 4 /* assume same as in Mongoose cores */,
- .line_size = 128 /* assume same as in Mongoose cores */
+ .size = 64 * 1024 /* assume same as in Exynos M1/M2 cores */,
+ .associativity = 4 /* assume same as in Exynos M1/M2 cores */,
+ .line_size = 128 /* assume same as in Exynos M1/M2 cores */
};
*l1d = (struct cpuinfo_cache) {
.size = 64 * 1024,
- .associativity = 8 /* assume same as in Mongoose cores */,
- .line_size = 64 /* assume same as in Mongoose cores */,
+ .associativity = 8 /* assume same as in Exynos M1/M2 cores */,
+ .line_size = 64 /* assume same as in Exynos M1/M2 cores */,
};
*l2 = (struct cpuinfo_cache) {
.size = 512 * 1024,
- .associativity = 16 /* assume same as in Mongoose cores */,
- .line_size = 64 /* assume same as in Mongoose cores */,
+ .associativity = 16 /* assume same as in Exynos M1/M2 cores */,
+ .line_size = 64 /* assume same as in Exynos M1/M2 cores */,
};
*l3 = (struct cpuinfo_cache) {
.size = 4 * 1024 * 1024,
@@ -1393,3 +1503,124 @@ void cpuinfo_arm_decode_cache(
}
}
}
+
+uint32_t cpuinfo_arm_compute_max_cache_size(const struct cpuinfo_processor* processor) {
+ /*
+ * There is no precise way to detect cache size on ARM/ARM64, and cache size reported by cpuinfo
+ * may underestimate the actual cache size. Thus, we use microarchitecture-specific maximum.
+ */
+ switch (processor->core->uarch) {
+ case cpuinfo_uarch_xscale:
+ case cpuinfo_uarch_arm11:
+ case cpuinfo_uarch_scorpion:
+ case cpuinfo_uarch_krait:
+ case cpuinfo_uarch_kryo:
+ case cpuinfo_uarch_exynos_m1:
+ case cpuinfo_uarch_exynos_m2:
+ case cpuinfo_uarch_exynos_m3:
+ /* cpuinfo-detected cache size always correct */
+ return cpuinfo_compute_max_cache_size(processor);
+ case cpuinfo_uarch_cortex_a5:
+ /* Max observed (NXP Vybrid SoC) */
+ return 512 * 1024;
+ case cpuinfo_uarch_cortex_a7:
+ /*
+ * Cortex-A7 MPCore Technical Reference Manual:
+ * 7.1. About the L2 Memory system
+ * The L2 memory system consists of an:
+ * - Optional tightly-coupled L2 cache that includes:
+ * - Configurable L2 cache size of 128KB, 256KB, 512KB, and 1MB.
+ */
+ return 1024 * 1024;
+ case cpuinfo_uarch_cortex_a8:
+ /*
+ * Cortex-A8 Technical Reference Manual:
+ * 8.1. About the L2 memory system
+ * The key features of the L2 memory system include:
+ * - configurable cache size of 0KB, 128KB, 256KB, 512KB, and 1MB
+ */
+ return 1024 * 1024;
+ case cpuinfo_uarch_cortex_a9:
+ /* Max observed (e.g. Exynos 4212) */
+ return 1024 * 1024;
+ case cpuinfo_uarch_cortex_a12:
+ case cpuinfo_uarch_cortex_a17:
+ /*
+ * ARM Cortex-A17 MPCore Processor Technical Reference Manual:
+ * 7.1. About the L2 Memory system
+ * The key features of the L2 memory system include:
+ * - An integrated L2 cache:
+ * - The cache size is implemented as either 256KB, 512KB, 1MB, 2MB, 4MB or 8MB.
+ */
+ return 8 * 1024 * 1024;
+ case cpuinfo_uarch_cortex_a15:
+ /*
+ * ARM Cortex-A15 MPCore Processor Technical Reference Manual:
+ * 7.1. About the L2 memory system
+ * The features of the L2 memory system include:
+ * - Configurable L2 cache size of 512KB, 1MB, 2MB and 4MB.
+ */
+ return 4 * 1024 * 1024;
+ case cpuinfo_uarch_cortex_a35:
+ /*
+ * ARM Cortex‑A35 Processor Technical Reference Manual:
+ * 7.1 About the L2 memory system
+ * L2 cache
+ * - Further features of the L2 cache are:
+ * - Configurable size of 128KB, 256KB, 512KB, and 1MB.
+ */
+ return 1024 * 1024;
+ case cpuinfo_uarch_cortex_a53:
+ /*
+ * ARM Cortex-A53 MPCore Processor Technical Reference Manual:
+ * 7.1. About the L2 memory system
+ * The L2 memory system consists of an:
+ * - Optional tightly-coupled L2 cache that includes:
+ * - Configurable L2 cache size of 128KB, 256KB, 512KB, 1MB and 2MB.
+ */
+ return 2 * 1024 * 1024;
+ case cpuinfo_uarch_cortex_a57:
+ /*
+ * ARM Cortex-A57 MPCore Processor Technical Reference Manual:
+ * 7.1 About the L2 memory system
+ * The features of the L2 memory system include:
+ * - Configurable L2 cache size of 512KB, 1MB, and 2MB.
+ */
+ return 2 * 1024 * 1024;
+ case cpuinfo_uarch_cortex_a72:
+ /*
+ * ARM Cortex-A72 MPCore Processor Technical Reference Manual:
+ * 7.1 About the L2 memory system
+ * The features of the L2 memory system include:
+ * - Configurable L2 cache size of 512KB, 1MB, 2MB and 4MB.
+ */
+ return 4 * 1024 * 1024;
+ case cpuinfo_uarch_cortex_a73:
+ /*
+ * ARM Cortex‑A73 MPCore Processor Technical Reference Manual
+ * 7.1 About the L2 memory system
+ * The L2 memory system consists of:
+ * - A tightly-integrated L2 cache with:
+ * - A configurable size of 256KB, 512KB, 1MB, 2MB, 4MB, or 8MB.
+ */
+ return 8 * 1024 * 1024;
+ case cpuinfo_uarch_cortex_a55:
+ case cpuinfo_uarch_cortex_a75:
+ case cpuinfo_uarch_cortex_a76:
+ case cpuinfo_uarch_exynos_m4:
+ default:
+ /*
+ * ARM DynamIQ Shared Unit Technical Reference Manual
+ * 1.3 Implementation options
+ * L3_CACHE_SIZE
+ * - 256KB
+ * - 512KB
+ * - 1024KB
+ * - 1536KB
+ * - 2048KB
+ * - 3072KB
+ * - 4096KB
+ */
+ return 4 * 1024 * 1024;
+ }
+}
diff --git src/arm/linux/init.c src/arm/linux/init.c
index a297f63..f0c432c 100644
--- src/arm/linux/init.c
+++ src/arm/linux/init.c
@@ -678,6 +678,8 @@ void cpuinfo_arm_linux_init(void) {
cpuinfo_cache_count[cpuinfo_cache_level_2] = l2_count;
cpuinfo_cache_count[cpuinfo_cache_level_3] = l3_count;
+ cpuinfo_max_cache_size = cpuinfo_arm_compute_max_cache_size(&processors[0]);
+
__sync_synchronize();
cpuinfo_is_initialized = true;
diff --git src/arm/linux/midr.c src/arm/linux/midr.c
index 668fc72..2c3116b 100644
--- src/arm/linux/midr.c
+++ src/arm/linux/midr.c
@@ -220,7 +220,7 @@ static const struct cluster_config cluster_configs[] = {
.model = UINT16_C(7420),
.clusters = 2,
.cluster_cores = {
- [0] = 4,
+ [0] = 4,
[1] = 4,
},
.cluster_midr = {
@@ -229,7 +229,7 @@ static const struct cluster_config cluster_configs[] = {
},
},
{
- /* Exynos 8890: 4x Mongoose + 4x Cortex-A53 */
+ /* Exynos 8890: 4x Exynos M1 + 4x Cortex-A53 */
.cores = 8,
.series = cpuinfo_arm_chipset_series_samsung_exynos,
.model = UINT16_C(8890),
@@ -695,7 +695,7 @@ static void cpuinfo_arm_linux_detect_cluster_midr_by_sequential_scan(
if (bitmask_all(processors[i].flags, CPUINFO_LINUX_FLAG_VALID)) {
if (processors[i].package_leader_id == i) {
if (bitmask_all(processors[i].flags, CPUINFO_ARM_LINUX_VALID_MIDR)) {
- midr = processors[i].midr;
+ midr = processors[i].midr;
} else {
cpuinfo_log_info("assume processor %"PRIu32" to have MIDR %08"PRIx32, i, midr);
/* To be consistent, we copy the MIDR entirely, rather than by parts */
@@ -836,7 +836,7 @@ uint32_t cpuinfo_arm_linux_detect_cluster_midr(
* - Clusters preceeding the first reported MIDR value are assumed to have the last reported MIDR value.
* - Clusters following any reported MIDR value to have that MIDR value.
*/
-
+
if (cpuinfo_arm_linux_detect_cluster_midr_by_chipset(
chipset, clusters_count, cluster_leaders, usable_processors, processors, true))
{
diff --git src/arm/mach/init.c src/arm/mach/init.c
index 5b14b49..e64cc18 100644
--- src/arm/mach/init.c
+++ src/arm/mach/init.c
@@ -562,6 +562,8 @@ void cpuinfo_arm_mach_init(void) {
cpuinfo_clusters_count = num_clusters;
cpuinfo_packages_count = mach_topology.packages;
+ cpuinfo_max_cache_size = cpuinfo_compute_max_cache_size(&processors[0]);
+
__sync_synchronize();
cpuinfo_is_initialized = true;
diff --git src/arm/midr.h src/arm/midr.h
index 6363ed7..d5a28e3 100644
--- src/arm/midr.h
+++ src/arm/midr.h
@@ -33,31 +33,31 @@
#define CPUINFO_ARM_MIDR_KRYO_SILVER_821 UINT32_C(0x510F2010)
#define CPUINFO_ARM_MIDR_KRYO_GOLD UINT32_C(0x510F2050)
#define CPUINFO_ARM_MIDR_KRYO_SILVER_820 UINT32_C(0x510F2110)
-#define CPUINFO_ARM_MIDR_MONGOOSE UINT32_C(0x530F0010)
+#define CPUINFO_ARM_MIDR_EXYNOS_M1_M2 UINT32_C(0x530F0010)
#define CPUINFO_ARM_MIDR_DENVER2 UINT32_C(0x4E0F0030)
inline static uint32_t midr_set_implementer(uint32_t midr, uint32_t implementer) {
- return (midr & ~CPUINFO_ARM_MIDR_IMPLEMENTER_MASK) |
+ return (midr & ~CPUINFO_ARM_MIDR_IMPLEMENTER_MASK) |
((implementer << CPUINFO_ARM_MIDR_IMPLEMENTER_OFFSET) & CPUINFO_ARM_MIDR_IMPLEMENTER_MASK);
}
inline static uint32_t midr_set_variant(uint32_t midr, uint32_t variant) {
- return (midr & ~CPUINFO_ARM_MIDR_VARIANT_MASK) |
+ return (midr & ~CPUINFO_ARM_MIDR_VARIANT_MASK) |
((variant << CPUINFO_ARM_MIDR_VARIANT_OFFSET) & CPUINFO_ARM_MIDR_VARIANT_MASK);
}
inline static uint32_t midr_set_architecture(uint32_t midr, uint32_t architecture) {
- return (midr & ~CPUINFO_ARM_MIDR_ARCHITECTURE_MASK) |
+ return (midr & ~CPUINFO_ARM_MIDR_ARCHITECTURE_MASK) |
((architecture << CPUINFO_ARM_MIDR_ARCHITECTURE_OFFSET) & CPUINFO_ARM_MIDR_ARCHITECTURE_MASK);
}
inline static uint32_t midr_set_part(uint32_t midr, uint32_t part) {
- return (midr & ~CPUINFO_ARM_MIDR_PART_MASK) |
+ return (midr & ~CPUINFO_ARM_MIDR_PART_MASK) |
((part << CPUINFO_ARM_MIDR_PART_OFFSET) & CPUINFO_ARM_MIDR_PART_MASK);
}
inline static uint32_t midr_set_revision(uint32_t midr, uint32_t revision) {
- return (midr & ~CPUINFO_ARM_MIDR_REVISION_MASK) |
+ return (midr & ~CPUINFO_ARM_MIDR_REVISION_MASK) |
((revision << CPUINFO_ARM_MIDR_REVISION_OFFSET) & CPUINFO_ARM_MIDR_REVISION_MASK);
}
@@ -171,13 +171,20 @@ inline static bool midr_is_kryo_gold(uint32_t midr) {
inline static uint32_t midr_score_core(uint32_t midr) {
const uint32_t core_mask = CPUINFO_ARM_MIDR_IMPLEMENTER_MASK | CPUINFO_ARM_MIDR_PART_MASK;
switch (midr & core_mask) {
+ case UINT32_C(0x53000040): /* Exynos M5 */
+ case UINT32_C(0x53000030): /* Exynos M4 */
+ /* These cores are in big role w.r.t Cortex-A75 or Cortex-A76 */
+ return 6;
case UINT32_C(0x4E000030): /* Denver 2 */
- case UINT32_C(0x53000010): /* Mongoose */
- case UINT32_C(0x53000020): /* Meerkat */
+ case UINT32_C(0x53000010): /* Exynos M1 and Exynos M2 */
+ case UINT32_C(0x53000020): /* Exynos M3 */
+ case UINT32_C(0x51008040): /* Kryo 485 Gold / Gold Prime */
case UINT32_C(0x51008020): /* Kryo 385 Gold */
case UINT32_C(0x51008000): /* Kryo 260 / 280 Gold */
case UINT32_C(0x51002050): /* Kryo Gold */
case UINT32_C(0x4800D400): /* Cortex-A76 (HiSilicon) */
+ case UINT32_C(0x4100D0D0): /* Cortex-A77 */
+ case UINT32_C(0x4100D0E0): /* Cortex-A76AE */
case UINT32_C(0x4100D0B0): /* Cortex-A76 */
case UINT32_C(0x4100D0A0): /* Cortex-A75 */
case UINT32_C(0x4100D090): /* Cortex-A73 */
@@ -191,12 +198,14 @@ inline static uint32_t midr_score_core(uint32_t midr) {
case UINT32_C(0x4100D070): /* Cortex-A57 */
/* Cortex-A57 can be in LITTLE role w.r.t. Denver 2, or in big role w.r.t. Cortex-A53 */
return 4;
+ case UINT32_C(0x4100D060): /* Cortex-A65 */
case UINT32_C(0x4100D050): /* Cortex-A55 */
case UINT32_C(0x4100D030): /* Cortex-A53 */
/* Cortex-A53 is usually in LITTLE role, but can be in big role w.r.t. Cortex-A35 */
return 2;
case UINT32_C(0x4100D040): /* Cortex-A35 */
case UINT32_C(0x4100C070): /* Cortex-A7 */
+ case UINT32_C(0x51008050): /* Kryo 485 Silver */
case UINT32_C(0x51008030): /* Kryo 385 Silver */
case UINT32_C(0x51008010): /* Kryo 260 / 280 Silver */
case UINT32_C(0x51002110): /* Kryo Silver (Snapdragon 820) */
@@ -215,7 +224,7 @@ inline static uint32_t midr_score_core(uint32_t midr) {
}
inline static uint32_t midr_little_core_for_big(uint32_t midr) {
- const uint32_t core_mask =
+ const uint32_t core_mask =
CPUINFO_ARM_MIDR_IMPLEMENTER_MASK | CPUINFO_ARM_MIDR_ARCHITECTURE_MASK | CPUINFO_ARM_MIDR_PART_MASK;
switch (midr & core_mask) {
case CPUINFO_ARM_MIDR_CORTEX_A75:
@@ -223,7 +232,7 @@ inline static uint32_t midr_little_core_for_big(uint32_t midr) {
case CPUINFO_ARM_MIDR_CORTEX_A73:
case CPUINFO_ARM_MIDR_CORTEX_A72:
case CPUINFO_ARM_MIDR_CORTEX_A57:
- case CPUINFO_ARM_MIDR_MONGOOSE:
+ case CPUINFO_ARM_MIDR_EXYNOS_M1_M2:
return CPUINFO_ARM_MIDR_CORTEX_A53;
case CPUINFO_ARM_MIDR_CORTEX_A17:
case CPUINFO_ARM_MIDR_CORTEX_A15:
diff --git src/arm/uarch.c src/arm/uarch.c
index d7d2c63..a38250a 100644
--- src/arm/uarch.c
+++ src/arm/uarch.c
@@ -60,6 +60,9 @@ void cpuinfo_arm_decode_vendor_uarch(
case 0xD05:
*uarch = cpuinfo_uarch_cortex_a55;
break;
+ case 0xD06:
+ *uarch = cpuinfo_uarch_cortex_a65;
+ break;
case 0xD07:
*uarch = cpuinfo_uarch_cortex_a57;
break;
@@ -75,6 +78,22 @@ void cpuinfo_arm_decode_vendor_uarch(
case 0xD0B:
*uarch = cpuinfo_uarch_cortex_a76;
break;
+#if CPUINFO_ARCH_ARM64 && !defined(__ANDROID__)
+ case 0xD0C:
+ *uarch = cpuinfo_uarch_neoverse_n1;
+ break;
+#endif /* CPUINFO_ARCH_ARM64 && !defined(__ANDROID__) */
+ case 0xD0D:
+ *uarch = cpuinfo_uarch_cortex_a77;
+ break;
+ case 0xD0E:
+ *uarch = cpuinfo_uarch_cortex_a76ae;
+ break;
+#if CPUINFO_ARCH_ARM64 && !defined(__ANDROID__)
+ case 0xD4A:
+ *uarch = cpuinfo_uarch_neoverse_e1;
+ break;
+#endif /* CPUINFO_ARCH_ARM64 && !defined(__ANDROID__) */
default:
switch (midr_get_part(midr) >> 8) {
#if CPUINFO_ARCH_ARM
@@ -242,10 +261,14 @@ void cpuinfo_arm_decode_vendor_uarch(
*vendor = cpuinfo_vendor_arm;
*uarch = cpuinfo_uarch_cortex_a55;
break;
- case 0x804:
+ case 0x804: /* High-performance Kryo 485 "Gold" / "Gold Prime" -> Cortex-A76 */
*vendor = cpuinfo_vendor_arm;
*uarch = cpuinfo_uarch_cortex_a76;
break;
+ case 0x805: /* Low-performance Kryo 485 "Silver" -> Cortex-A55 */
+ *vendor = cpuinfo_vendor_arm;
+ *uarch = cpuinfo_uarch_cortex_a55;
+ break;
#if CPUINFO_ARCH_ARM64 && !defined(__ANDROID__)
case 0xC00:
*uarch = cpuinfo_uarch_falkor;
@@ -263,27 +286,43 @@ void cpuinfo_arm_decode_vendor_uarch(
switch (midr & (CPUINFO_ARM_MIDR_VARIANT_MASK | CPUINFO_ARM_MIDR_PART_MASK)) {
case 0x00100010:
/*
- * Exynos 8890 MIDR = 0x531F0011, assume Mongoose M1 has:
+ * Exynos 8890 MIDR = 0x531F0011, assume Exynos M1 has:
* - CPU variant 0x1
* - CPU part 0x001
*/
- *uarch = cpuinfo_uarch_mongoose_m1;
+ *uarch = cpuinfo_uarch_exynos_m1;
break;
case 0x00400010:
/*
- * Exynos 8895 MIDR = 0x534F0010, assume Mongoose M2 has:
+ * Exynos 8895 MIDR = 0x534F0010, assume Exynos M2 has:
* - CPU variant 0x4
* - CPU part 0x001
*/
- *uarch = cpuinfo_uarch_mongoose_m2;
+ *uarch = cpuinfo_uarch_exynos_m2;
break;
case 0x00100020:
/*
- * Exynos 9810 MIDR = 0x531F0020, assume Meerkat M3 has:
+ * Exynos 9810 MIDR = 0x531F0020, assume Exynos M3 has:
* - CPU variant 0x1
* - CPU part 0x002
*/
- *uarch = cpuinfo_uarch_meerkat_m3;
+ *uarch = cpuinfo_uarch_exynos_m3;
+ break;
+ case 0x00100030:
+ /*
+ * Exynos 9820 MIDR = 0x531F0030, assume Exynos M4 has:
+ * - CPU variant 0x1
+ * - CPU part 0x003
+ */
+ *uarch = cpuinfo_uarch_exynos_m4;
+ break;
+ case 0x00100040:
+ /*
+ * Exynos 9820 MIDR = 0x531F0040, assume Exynos M5 has:
+ * - CPU variant 0x1
+ * - CPU part 0x004
+ */
+ *uarch = cpuinfo_uarch_exynos_m5;
break;
default:
cpuinfo_log_warning("unknown Samsung CPU variant 0x%01"PRIx32" part 0x%03"PRIx32" ignored",
diff --git src/cache.c src/cache.c
new file mode 100644
index 0000000..b976b87
--- /dev/null
+++ src/cache.c
@@ -0,0 +1,18 @@
+#include <stddef.h>
+
+#include <cpuinfo.h>
+#include <cpuinfo/internal-api.h>
+
+
+uint32_t cpuinfo_compute_max_cache_size(const struct cpuinfo_processor* processor) {
+ if (processor->cache.l4 != NULL) {
+ return processor->cache.l4->size;
+ } else if (processor->cache.l3 != NULL) {
+ return processor->cache.l3->size;
+ } else if (processor->cache.l2 != NULL) {
+ return processor->cache.l2->size;
+ } else if (processor->cache.l1d != NULL) {
+ return processor->cache.l1d->size;
+ }
+ return 0;
+}
diff --git src/cpuinfo/internal-api.h src/cpuinfo/internal-api.h
index 6045750..717b810 100644
--- src/cpuinfo/internal-api.h
+++ src/cpuinfo/internal-api.h
@@ -31,6 +31,7 @@ extern CPUINFO_INTERNAL uint32_t cpuinfo_cores_count;
extern CPUINFO_INTERNAL uint32_t cpuinfo_clusters_count;
extern CPUINFO_INTERNAL uint32_t cpuinfo_packages_count;
extern CPUINFO_INTERNAL uint32_t cpuinfo_cache_count[cpuinfo_cache_level_max];
+extern CPUINFO_INTERNAL uint32_t cpuinfo_max_cache_size;
CPUINFO_PRIVATE void cpuinfo_x86_mach_init(void);
CPUINFO_PRIVATE void cpuinfo_x86_linux_init(void);
@@ -40,4 +41,6 @@ CPUINFO_PRIVATE void cpuinfo_x86_linux_init(void);
CPUINFO_PRIVATE void cpuinfo_arm_mach_init(void);
CPUINFO_PRIVATE void cpuinfo_arm_linux_init(void);
+CPUINFO_PRIVATE uint32_t cpuinfo_compute_max_cache_size(const struct cpuinfo_processor* processor);
+
typedef void (*cpuinfo_processor_callback)(uint32_t);
diff --git src/x86/isa.c src/x86/isa.c
index bca1ecd..d27dbca 100644
--- src/x86/isa.c
+++ src/x86/isa.c
@@ -42,8 +42,10 @@ struct cpuinfo_x86_isa cpuinfo_x86_detect_isa(
{
struct cpuinfo_x86_isa isa = { 0 };
- const struct cpuid_regs structured_feature_info =
+ const struct cpuid_regs structured_feature_info0 =
(max_base_index >= 7) ? cpuidex(7, 0) : (struct cpuid_regs) { 0, 0, 0, 0};
+ const struct cpuid_regs structured_feature_info1 =
+ (max_base_index >= 7) ? cpuidex(7, 1) : (struct cpuid_regs) { 0, 0, 0, 0};
const uint32_t processor_capacity_info_index = UINT32_C(0x80000008);
const struct cpuid_regs processor_capacity_info =
@@ -144,9 +146,9 @@ struct cpuinfo_x86_isa cpuinfo_x86_detect_isa(
/*
* CLFLUSHOPT instruction:
- * - Intel: ebx[bit 23] in structured feature info.
+ * - Intel: ebx[bit 23] in structured feature info (ecx = 0).
*/
- isa.clflushopt = !!(structured_feature_info.ebx & UINT32_C(0x00800000));
+ isa.clflushopt = !!(structured_feature_info0.ebx & UINT32_C(0x00800000));
/*
* MWAIT/MONITOR instructions:
@@ -273,9 +275,9 @@ struct cpuinfo_x86_isa cpuinfo_x86_detect_isa(
/*
* PREFETCHWT1 instruction:
- * - Intel: ecx[bit 0] of structured feature info. Reserved bit on AMD.
+ * - Intel: ecx[bit 0] of structured feature info (ecx = 0). Reserved bit on AMD.
*/
- isa.prefetchwt1 = !!(structured_feature_info.ecx & UINT32_C(0x00000001));
+ isa.prefetchwt1 = !!(structured_feature_info0.ecx & UINT32_C(0x00000001));
#if CPUINFO_ARCH_X86
/*
@@ -386,111 +388,123 @@ struct cpuinfo_x86_isa cpuinfo_x86_detect_isa(
/*
* AVX2 instructions:
- * - Intel: ebx[bit 5] in structured feature info.
+ * - Intel: ebx[bit 5] in structured feature info (ecx = 0).
*/
- isa.avx2 = avx_regs && !!(structured_feature_info.ebx & UINT32_C(0x00000020));
+ isa.avx2 = avx_regs && !!(structured_feature_info0.ebx & UINT32_C(0x00000020));
/*
* AVX512F instructions:
- * - Intel: ebx[bit 16] in structured feature info.
+ * - Intel: ebx[bit 16] in structured feature info (ecx = 0).
*/
- isa.avx512f = avx512_regs && !!(structured_feature_info.ebx & UINT32_C(0x00010000));
+ isa.avx512f = avx512_regs && !!(structured_feature_info0.ebx & UINT32_C(0x00010000));
/*
* AVX512PF instructions:
- * - Intel: ebx[bit 26] in structured feature info.
+ * - Intel: ebx[bit 26] in structured feature info (ecx = 0).
*/
- isa.avx512pf = avx512_regs && !!(structured_feature_info.ebx & UINT32_C(0x04000000));
+ isa.avx512pf = avx512_regs && !!(structured_feature_info0.ebx & UINT32_C(0x04000000));
/*
* AVX512ER instructions:
- * - Intel: ebx[bit 27] in structured feature info.
+ * - Intel: ebx[bit 27] in structured feature info (ecx = 0).
*/
- isa.avx512er = avx512_regs && !!(structured_feature_info.ebx & UINT32_C(0x08000000));
+ isa.avx512er = avx512_regs && !!(structured_feature_info0.ebx & UINT32_C(0x08000000));
/*
* AVX512CD instructions:
- * - Intel: ebx[bit 28] in structured feature info.
+ * - Intel: ebx[bit 28] in structured feature info (ecx = 0).
*/
- isa.avx512cd = avx512_regs && !!(structured_feature_info.ebx & UINT32_C(0x10000000));
+ isa.avx512cd = avx512_regs && !!(structured_feature_info0.ebx & UINT32_C(0x10000000));
/*
* AVX512DQ instructions:
- * - Intel: ebx[bit 17] in structured feature info.
+ * - Intel: ebx[bit 17] in structured feature info (ecx = 0).
*/
- isa.avx512dq = avx512_regs && !!(structured_feature_info.ebx & UINT32_C(0x00020000));
+ isa.avx512dq = avx512_regs && !!(structured_feature_info0.ebx & UINT32_C(0x00020000));
/*
* AVX512BW instructions:
- * - Intel: ebx[bit 30] in structured feature info.
+ * - Intel: ebx[bit 30] in structured feature info (ecx = 0).
*/
- isa.avx512bw = avx512_regs && !!(structured_feature_info.ebx & UINT32_C(0x40000000));
+ isa.avx512bw = avx512_regs && !!(structured_feature_info0.ebx & UINT32_C(0x40000000));
/*
* AVX512VL instructions:
- * - Intel: ebx[bit 31] in structured feature info.
+ * - Intel: ebx[bit 31] in structured feature info (ecx = 0).
*/
- isa.avx512vl = avx512_regs && !!(structured_feature_info.ebx & UINT32_C(0x80000000));
+ isa.avx512vl = avx512_regs && !!(structured_feature_info0.ebx & UINT32_C(0x80000000));
/*
* AVX512IFMA instructions:
- * - Intel: ebx[bit 21] in structured feature info.
+ * - Intel: ebx[bit 21] in structured feature info (ecx = 0).
*/
- isa.avx512ifma = avx512_regs && !!(structured_feature_info.ebx & UINT32_C(0x00200000));
+ isa.avx512ifma = avx512_regs && !!(structured_feature_info0.ebx & UINT32_C(0x00200000));
/*
* AVX512VBMI instructions:
- * - Intel: ecx[bit 1] in structured feature info.
+ * - Intel: ecx[bit 1] in structured feature info (ecx = 0).
*/
- isa.avx512vbmi = avx512_regs && !!(structured_feature_info.ecx & UINT32_C(0x00000002));
+ isa.avx512vbmi = avx512_regs && !!(structured_feature_info0.ecx & UINT32_C(0x00000002));
/*
* AVX512VBMI2 instructions:
- * - Intel: ecx[bit 6] in structured feature info.
+ * - Intel: ecx[bit 6] in structured feature info (ecx = 0).
*/
- isa.avx512vbmi2 = avx512_regs && !!(structured_feature_info.ecx & UINT32_C(0x00000040));
+ isa.avx512vbmi2 = avx512_regs && !!(structured_feature_info0.ecx & UINT32_C(0x00000040));
/*
* AVX512BITALG instructions:
- * - Intel: ecx[bit 12] in structured feature info.
+ * - Intel: ecx[bit 12] in structured feature info (ecx = 0).
*/
- isa.avx512bitalg = avx512_regs && !!(structured_feature_info.ecx & UINT32_C(0x00001000));
+ isa.avx512bitalg = avx512_regs && !!(structured_feature_info0.ecx & UINT32_C(0x00001000));
/*
* AVX512VPOPCNTDQ instructions:
- * - Intel: ecx[bit 14] in structured feature info.
+ * - Intel: ecx[bit 14] in structured feature info (ecx = 0).
*/
- isa.avx512vpopcntdq = avx512_regs && !!(structured_feature_info.ecx & UINT32_C(0x00004000));
+ isa.avx512vpopcntdq = avx512_regs && !!(structured_feature_info0.ecx & UINT32_C(0x00004000));
/*
* AVX512VNNI instructions:
- * - Intel: ecx[bit 11] in structured feature info.
+ * - Intel: ecx[bit 11] in structured feature info (ecx = 0).
*/
- isa.avx512vnni = avx512_regs && !!(structured_feature_info.ecx & UINT32_C(0x00000800));
+ isa.avx512vnni = avx512_regs && !!(structured_feature_info0.ecx & UINT32_C(0x00000800));
/*
* AVX512_4VNNIW instructions:
- * - Intel: edx[bit 2] in structured feature info.
+ * - Intel: edx[bit 2] in structured feature info (ecx = 0).
*/
- isa.avx512_4vnniw = avx512_regs && !!(structured_feature_info.edx & UINT32_C(0x00000004));
+ isa.avx512_4vnniw = avx512_regs && !!(structured_feature_info0.edx & UINT32_C(0x00000004));
/*
* AVX512_4FMAPS instructions:
- * - Intel: edx[bit 3] in structured feature info.
+ * - Intel: edx[bit 3] in structured feature info (ecx = 0).
*/
- isa.avx512_4fmaps = avx512_regs && !!(structured_feature_info.edx & UINT32_C(0x00000008));
+ isa.avx512_4fmaps = avx512_regs && !!(structured_feature_info0.edx & UINT32_C(0x00000008));
+
+ /*
+ * AVX512_VP2INTERSECT instructions:
+ * - Intel: edx[bit 8] in structured feature info (ecx = 0).
+ */
+ isa.avx512vp2intersect = avx512_regs && !!(structured_feature_info0.edx & UINT32_C(0x00000100));
+
+ /*
+ * AVX512_BF16 instructions:
+ * - Intel: eax[bit 5] in structured feature info (ecx = 1).
+ */
+ isa.avx512bf16 = avx512_regs && !!(structured_feature_info1.eax & UINT32_C(0x00000020));
/*
* HLE instructions:
- * - Intel: ebx[bit 4] in structured feature info.
+ * - Intel: ebx[bit 4] in structured feature info (ecx = 0).
*/
- isa.hle = !!(structured_feature_info.ebx & UINT32_C(0x00000010));
+ isa.hle = !!(structured_feature_info0.ebx & UINT32_C(0x00000010));
/*
* RTM instructions:
- * - Intel: ebx[bit 11] in structured feature info.
+ * - Intel: ebx[bit 11] in structured feature info (ecx = 0).
*/
- isa.rtm = !!(structured_feature_info.ebx & UINT32_C(0x00000800));
+ isa.rtm = !!(structured_feature_info0.ebx & UINT32_C(0x00000800));
/*
* XTEST instruction:
@@ -500,9 +514,9 @@ struct cpuinfo_x86_isa cpuinfo_x86_detect_isa(
/*
* MPX registers and instructions:
- * - Intel: ebx[bit 14] in structured feature info.
+ * - Intel: ebx[bit 14] in structured feature info (ecx = 0).
*/
- isa.mpx = mpx_regs && !!(structured_feature_info.ebx & UINT32_C(0x00004000));
+ isa.mpx = mpx_regs && !!(structured_feature_info0.ebx & UINT32_C(0x00004000));
#if CPUINFO_ARCH_X86
/*
@@ -528,9 +542,9 @@ struct cpuinfo_x86_isa cpuinfo_x86_detect_isa(
/*
* CLWB instruction:
- * - Intel: ebx[bit 24] in structured feature info.
+ * - Intel: ebx[bit 24] in structured feature info (ecx = 0).
*/
- isa.clwb = !!(structured_feature_info.ebx & UINT32_C(0x01000000));
+ isa.clwb = !!(structured_feature_info0.ebx & UINT32_C(0x01000000));
/*
* MOVBE instruction:
@@ -549,9 +563,9 @@ struct cpuinfo_x86_isa cpuinfo_x86_detect_isa(
/*
* RDFSBASE/RDGSBASE/WRFSBASE/WRGSBASE instructions.
- * - Intel: ebx[bit 0] in structured feature info.
+ * - Intel: ebx[bit 0] in structured feature info (ecx = 0).
*/
- isa.fs_gs_base = !!(structured_feature_info.ebx & UINT32_C(0x00000001));
+ isa.fs_gs_base = !!(structured_feature_info0.ebx & UINT32_C(0x00000001));
/*
* LZCNT instruction:
@@ -573,21 +587,21 @@ struct cpuinfo_x86_isa cpuinfo_x86_detect_isa(
/*
* BMI instructions:
- * - Intel, AMD: ebx[bit 3] in structured feature info.
+ * - Intel, AMD: ebx[bit 3] in structured feature info (ecx = 0).
*/
- isa.bmi = !!(structured_feature_info.ebx & UINT32_C(0x00000008));
+ isa.bmi = !!(structured_feature_info0.ebx & UINT32_C(0x00000008));
/*
* BMI2 instructions:
- * - Intel: ebx[bit 8] in structured feature info.
+ * - Intel: ebx[bit 8] in structured feature info (ecx = 0).
*/
- isa.bmi2 = !!(structured_feature_info.ebx & UINT32_C(0x00000100));
+ isa.bmi2 = !!(structured_feature_info0.ebx & UINT32_C(0x00000100));
/*
* ADCX/ADOX instructions:
- * - Intel: ebx[bit 19] in structured feature info.
+ * - Intel: ebx[bit 19] in structured feature info (ecx = 0).
*/
- isa.adx = !!(structured_feature_info.ebx & UINT32_C(0x00080000));
+ isa.adx = !!(structured_feature_info0.ebx & UINT32_C(0x00080000));
/*
* AES instructions:
@@ -597,9 +611,9 @@ struct cpuinfo_x86_isa cpuinfo_x86_detect_isa(
/*
* VAES instructions:
- * - Intel: ecx[bit 9] in structured feature info.
+ * - Intel: ecx[bit 9] in structured feature info (ecx = 0).
*/
- isa.vaes = !!(structured_feature_info.ecx & UINT32_C(0x00000200));
+ isa.vaes = !!(structured_feature_info0.ecx & UINT32_C(0x00000200));
/*
* PCLMULQDQ instruction:
@@ -609,15 +623,15 @@ struct cpuinfo_x86_isa cpuinfo_x86_detect_isa(
/*
* VPCLMULQDQ instruction:
- * - Intel: ecx[bit 10] in structured feature info.
+ * - Intel: ecx[bit 10] in structured feature info (ecx = 0).
*/
- isa.vpclmulqdq = !!(structured_feature_info.ecx & UINT32_C(0x00000400));
+ isa.vpclmulqdq = !!(structured_feature_info0.ecx & UINT32_C(0x00000400));
/*
* GFNI instructions:
- * - Intel: ecx[bit 8] in structured feature info.
+ * - Intel: ecx[bit 8] in structured feature info (ecx = 0).
*/
- isa.gfni = !!(structured_feature_info.ecx & UINT32_C(0x00000100));
+ isa.gfni = !!(structured_feature_info0.ecx & UINT32_C(0x00000100));
/*
* RDRAND instruction:
@@ -627,15 +641,15 @@ struct cpuinfo_x86_isa cpuinfo_x86_detect_isa(
/*
* RDSEED instruction:
- * - Intel: ebx[bit 18] in structured feature info.
+ * - Intel: ebx[bit 18] in structured feature info (ecx = 0).
*/
- isa.rdseed = !!(structured_feature_info.ebx & UINT32_C(0x00040000));
+ isa.rdseed = !!(structured_feature_info0.ebx & UINT32_C(0x00040000));
/*
* SHA instructions:
- * - Intel: ebx[bit 29] in structured feature info.
+ * - Intel: ebx[bit 29] in structured feature info (ecx = 0).
*/
- isa.sha = !!(structured_feature_info.ebx & UINT32_C(0x20000000));
+ isa.sha = !!(structured_feature_info0.ebx & UINT32_C(0x20000000));
if (vendor == cpuinfo_vendor_via) {
const struct cpuid_regs padlock_meta_info = cpuid(UINT32_C(0xC0000000));
@@ -700,9 +714,9 @@ struct cpuinfo_x86_isa cpuinfo_x86_detect_isa(
/*
* RDPID instruction:
- * - Intel: ecx[bit 22] in structured feature info.
+ * - Intel: ecx[bit 22] in structured feature info (ecx = 0).
*/
- isa.rdpid = !!(structured_feature_info.ecx & UINT32_C(0x00400000));
+ isa.rdpid = !!(structured_feature_info0.ecx & UINT32_C(0x00400000));
return isa;
}
diff --git src/x86/linux/init.c src/x86/linux/init.c
index b5f74d0..c096336 100644
--- src/x86/linux/init.c
+++ src/x86/linux/init.c
@@ -592,6 +592,8 @@ void cpuinfo_x86_linux_init(void) {
cpuinfo_cache_count[cpuinfo_cache_level_3] = l3_count;
cpuinfo_cache_count[cpuinfo_cache_level_4] = l4_count;
+ cpuinfo_max_cache_size = cpuinfo_compute_max_cache_size(&processors[0]);
+
__sync_synchronize();
cpuinfo_is_initialized = true;
diff --git src/x86/mach/init.c src/x86/mach/init.c
index 7b41ad0..ae2be33 100644
--- src/x86/mach/init.c
+++ src/x86/mach/init.c
@@ -327,6 +327,8 @@ void cpuinfo_x86_mach_init(void) {
cpuinfo_clusters_count = mach_topology.packages;
cpuinfo_packages_count = mach_topology.packages;
+ cpuinfo_max_cache_size = cpuinfo_compute_max_cache_size(&processors[0]);
+
__sync_synchronize();
cpuinfo_is_initialized = true;
diff --git src/x86/uarch.c src/x86/uarch.c
index 71c899e..ba72d8a 100644
--- src/x86/uarch.c
+++ src/x86/uarch.c
@@ -74,13 +74,19 @@ enum cpuinfo_uarch cpuinfo_x86_decode_uarch(
case 0x4F: // Broadwell-E
case 0x56: // Broadwell-DE
return cpuinfo_uarch_broadwell;
- case 0x4E: // Skylake-U/Y
- case 0x55: // Skylake Server (SKX)
- case 0x5E: // Skylake-H/S
+ case 0x4E: // Sky Lake Client Y/U
+ case 0x55: // Sky/Cascade/Cooper Lake Server
+ case 0x5E: // Sky Lake Client DT/H/S
+ case 0x8E: // Kaby/Whiskey/Amber/Comet Lake Y/U
+ case 0x9E: // Kaby/Coffee Lake DT/H/S
return cpuinfo_uarch_sky_lake;
- case 0x8E: // Kaby Lake U/Y
- case 0x9E: // Kaby Lake H/S
- return cpuinfo_uarch_kaby_lake;
+ case 0x66: // Cannon Lake (Core i3-8121U)
+ return cpuinfo_uarch_palm_cove;
+ case 0x6A: // Ice Lake-DE
+ case 0x6C: // Ice Lake-SP
+ case 0x7D: // Ice Lake-Y
+ case 0x7E: // Ice Lake-U
+ return cpuinfo_uarch_sunny_cove;
/* Low-power cores */
case 0x1C: // Diamondville, Silverthorne, Pineview
@@ -90,18 +96,20 @@ enum cpuinfo_uarch cpuinfo_x86_decode_uarch(
case 0x35: // Cloverview
case 0x36: // Cedarview, Centerton
return cpuinfo_uarch_saltwell;
- case 0x37:
- case 0x4A:
- case 0x4D:
+ case 0x37: // Bay Trail
+ case 0x4A: // Merrifield
+ case 0x4D: // Avoton, Rangeley
case 0x5A: // Moorefield
case 0x5D: // SoFIA
return cpuinfo_uarch_silvermont;
- case 0x4C: // Braswell
- case 0x5F: // Denverton
+ case 0x4C: // Braswell, Cherry Trail
case 0x75: // Spreadtrum SC9853I-IA
- case 0x7A: // Goldmont+
return cpuinfo_uarch_airmont;
-
+ case 0x5C: // Apollo Lake
+ case 0x5F: // Denverton
+ return cpuinfo_uarch_goldmont;
+ case 0x7A: // Gemini Lake
+ return cpuinfo_uarch_goldmont_plus;
/* Knights-series cores */
case 0x57:
return cpuinfo_uarch_knights_landing;
@@ -190,7 +198,15 @@ enum cpuinfo_uarch cpuinfo_x86_decode_uarch(
return cpuinfo_uarch_jaguar;
}
case 0x17:
- return cpuinfo_uarch_zen;
+ switch (model_info->model) {
+ case 0x01: // 14 nm Naples, Whitehaven, Summit Ridge, Snowy Owl
+ case 0x08: // 12 nm Pinnacle Ridge
+ case 0x11: // 14 nm Raven Ridge
+ case 0x18: // 12 nm Picasso
+ return cpuinfo_uarch_zen;
+ case 0x71: // Matisse
+ return cpuinfo_uarch_zen2;
+ }
}
break;
default:
diff --git src/x86/windows/init.c src/x86/windows/init.c
index eb3498a..7a2090e 100644
--- src/x86/windows/init.c
+++ src/x86/windows/init.c
@@ -571,6 +571,8 @@ BOOL CALLBACK cpuinfo_x86_windows_init(PINIT_ONCE init_once, PVOID parameter, PV
cpuinfo_clusters_count = packages_count;
cpuinfo_packages_count = packages_count;
+ cpuinfo_max_cache_size = cpuinfo_compute_max_cache_size(&processors[0]);
+
MemoryBarrier();
cpuinfo_is_initialized = true;
diff --git tools/cache-info.c tools/cache-info.c
index ba0706f..05f69ee 100644
--- tools/cache-info.c
+++ tools/cache-info.c
@@ -60,6 +60,8 @@ int main(int argc, char** argv) {
fprintf(stderr, "failed to initialize CPU information\n");
exit(EXIT_FAILURE);
}
+ printf("Max cache size (upper bound): %"PRIu32" bytes\n", cpuinfo_get_max_cache_size());
+
if (cpuinfo_get_l1i_caches_count() != 0 && (cpuinfo_get_l1i_cache(0)->flags & CPUINFO_CACHE_UNIFIED) == 0) {
report_cache(cpuinfo_get_l1i_caches_count(), cpuinfo_get_l1i_cache(0), 1, "instruction");
}
diff --git tools/cpu-info.c tools/cpu-info.c
index caef424..7fa5187 100644
--- tools/cpu-info.c
+++ tools/cpu-info.c
@@ -73,8 +73,10 @@ static const char* uarch_to_string(enum cpuinfo_uarch uarch) {
return "Broadwell";
case cpuinfo_uarch_sky_lake:
return "Sky Lake";
- case cpuinfo_uarch_kaby_lake:
- return "Kaby Lake";
+ case cpuinfo_uarch_palm_cove:
+ return "Palm Cove";
+ case cpuinfo_uarch_sunny_cove:
+ return "Sunny Cove";
case cpuinfo_uarch_willamette:
return "Willamette";
case cpuinfo_uarch_prescott:
@@ -87,6 +89,10 @@ static const char* uarch_to_string(enum cpuinfo_uarch uarch) {
return "Silvermont";
case cpuinfo_uarch_airmont:
return "Airmont";
+ case cpuinfo_uarch_goldmont:
+ return "Goldmont";
+ case cpuinfo_uarch_goldmont_plus:
+ return "Goldmont Plus";
case cpuinfo_uarch_knights_ferry:
return "Knights Ferry";
case cpuinfo_uarch_knights_corner:
@@ -117,6 +123,8 @@ static const char* uarch_to_string(enum cpuinfo_uarch uarch) {
return "Excavator";
case cpuinfo_uarch_zen:
return "Zen";
+ case cpuinfo_uarch_zen2:
+ return "Zen 2";
case cpuinfo_uarch_geode:
return "Geode";
case cpuinfo_uarch_bobcat:
@@ -157,6 +165,8 @@ static const char* uarch_to_string(enum cpuinfo_uarch uarch) {
return "Cortex-A55";
case cpuinfo_uarch_cortex_a57:
return "Cortex-A57";
+ case cpuinfo_uarch_cortex_a65:
+ return "Cortex-A65";
case cpuinfo_uarch_cortex_a72:
return "Cortex-A72";
case cpuinfo_uarch_cortex_a73:
@@ -165,6 +175,10 @@ static const char* uarch_to_string(enum cpuinfo_uarch uarch) {
return "Cortex-A75";
case cpuinfo_uarch_cortex_a76:
return "Cortex-A76";
+ case cpuinfo_uarch_cortex_a76ae:
+ return "Cortex-A76AE";
+ case cpuinfo_uarch_cortex_a77:
+ return "Cortex-A77";
case cpuinfo_uarch_scorpion:
return "Scorpion";
case cpuinfo_uarch_krait:
@@ -181,12 +195,16 @@ static const char* uarch_to_string(enum cpuinfo_uarch uarch) {
return "Denver 2";
case cpuinfo_uarch_carmel:
return "Carmel";
- case cpuinfo_uarch_mongoose_m1:
- return "Mongoose M1";
- case cpuinfo_uarch_mongoose_m2:
- return "Mongoose M2";
- case cpuinfo_uarch_meerkat_m3:
- return "Meerkat M3";
+ case cpuinfo_uarch_exynos_m1:
+ return "Exynos M1";
+ case cpuinfo_uarch_exynos_m2:
+ return "Exynos M2";
+ case cpuinfo_uarch_exynos_m3:
+ return "Exynos M3";
+ case cpuinfo_uarch_exynos_m4:
+ return "Exynos M4";
+ case cpuinfo_uarch_exynos_m5:
+ return "Exynos M5";
case cpuinfo_uarch_swift:
return "Swift";
case cpuinfo_uarch_cyclone:
@@ -258,13 +276,23 @@ int main(int argc, char** argv) {
printf(", %s %s\n", vendor_string, uarch_string);
}
}
- printf("Logical processors:\n");
+ printf("Logical processors");
+ #if defined(__linux__)
+ printf(" (System ID)");
+ #endif
+ printf(":\n");
for (uint32_t i = 0; i < cpuinfo_get_processors_count(); i++) {
const struct cpuinfo_processor* processor = cpuinfo_get_processor(i);
+ printf("\t%"PRIu32"", i);
+
+ #if defined(__linux__)
+ printf(" (%"PRId32")", processor->linux_id);
+ #endif
+
#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
- printf("\t%"PRIu32": APIC ID 0x%08"PRIx32"\n", i, processor->apic_id);
+ printf(": APIC ID 0x%08"PRIx32"\n", processor->apic_id);
#else
- printf("\t%"PRIu32"\n", i);
+ printf("\n");
#endif
}
}
diff --git tools/isa-info.c tools/isa-info.c
index 594c46a..98ef919 100644
--- tools/isa-info.c
+++ tools/isa-info.c
@@ -67,6 +67,8 @@ int main(int argc, char** argv) {
printf("\tAVX512BITALG: %s\n", cpuinfo_has_x86_avx512bitalg() ? "yes" : "no");
printf("\tAVX512VPOPCNTDQ: %s\n", cpuinfo_has_x86_avx512vpopcntdq() ? "yes" : "no");
printf("\tAVX512VNNI: %s\n", cpuinfo_has_x86_avx512vnni() ? "yes" : "no");
+ printf("\tAVX512BF16: %s\n", cpuinfo_has_x86_avx512bf16() ? "yes" : "no");
+ printf("\tAVX512VP2INTERSECT: %s\n", cpuinfo_has_x86_avx512vp2intersect() ? "yes" : "no");
printf("\tAVX512_4VNNIW: %s\n", cpuinfo_has_x86_avx512_4vnniw() ? "yes" : "no");
printf("\tAVX512_4FMAPS: %s\n", cpuinfo_has_x86_avx512_4fmaps() ? "yes" : "no");