Apply patch to cpuinfo

Ensure support for recent microarchitectures and instruction sets

PiperOrigin-RevId: 287845050
diff --git a/WORKSPACE b/WORKSPACE
index b79e00b..e87b543 100644
--- a/WORKSPACE
+++ b/WORKSPACE
@@ -72,6 +72,7 @@
         "https://github.com/pytorch/cpuinfo/archive/d5e37adf1406cf899d7d9ec1d317c47506ccb970.tar.gz",
     ],
     build_file = "@//third_party:cpuinfo.BUILD",
+    patches = ["@//third_party:cpuinfo.patch"],
 )
 
 # psimd library, used for fallback 128-bit SIMD micro-kernels
diff --git a/bench/utils.cc b/bench/utils.cc
index 9dc85d7..84424df 100644
--- a/bench/utils.cc
+++ b/bench/utils.cc
@@ -129,9 +129,9 @@
       case cpuinfo_uarch_scorpion:
       case cpuinfo_uarch_krait:
       case cpuinfo_uarch_kryo:
-      case cpuinfo_uarch_mongoose_m1:
-      case cpuinfo_uarch_mongoose_m2:
-      case cpuinfo_uarch_meerkat_m3:
+      case cpuinfo_uarch_exynos_m1:
+      case cpuinfo_uarch_exynos_m2:
+      case cpuinfo_uarch_exynos_m3:
         // cpuinfo-detected cache size always correct.
         break;
       case cpuinfo_uarch_cortex_a5:
diff --git a/cmake/DownloadCpuinfo.cmake b/cmake/DownloadCpuinfo.cmake
index 2e0a5d9..36790af 100644
--- a/cmake/DownloadCpuinfo.cmake
+++ b/cmake/DownloadCpuinfo.cmake
@@ -18,6 +18,7 @@
   BINARY_DIR "${CMAKE_BINARY_DIR}/cpuinfo"
   CONFIGURE_COMMAND ""
   BUILD_COMMAND ""
+  PATCH_COMMAND patch -p0 -i ${CMAKE_CURRENT_SOURCE_DIR}/cmake/cpuinfo.patch
   INSTALL_COMMAND ""
   TEST_COMMAND ""
 )
diff --git a/cmake/cpuinfo.patch b/cmake/cpuinfo.patch
new file mode 100644
index 0000000..ca3caa9
--- /dev/null
+++ b/cmake/cpuinfo.patch
@@ -0,0 +1,1475 @@
+diff --git CMakeLists.txt CMakeLists.txt
+index e594def..cab4d05 100644
+--- CMakeLists.txt
++++ CMakeLists.txt
+@@ -119,7 +119,8 @@ ENDIF()
+ # ---[ cpuinfo library
+ SET(CPUINFO_SRCS
+   src/init.c
+-  src/api.c)
++  src/api.c
++  src/cache.c)
+ 
+ IF(CPUINFO_SUPPORTED_PLATFORM)
+   IF(CMAKE_SYSTEM_PROCESSOR MATCHES "^(i[3-6]86|AMD64|x86(_64)?)$" OR IOS_ARCH MATCHES "^(i386|x86_64)$")
+diff --git LICENSE LICENSE
+index 4910bfe..3f9a4f0 100644
+--- LICENSE
++++ LICENSE
+@@ -1,3 +1,4 @@
++Copyright (c) 2019 Google LLC
+ Copyright (c) 2017-2018 Facebook Inc.
+ Copyright (C) 2012-2017 Georgia Institute of Technology
+ Copyright (C) 2010-2012 Marat Dukhan
+diff --git include/cpuinfo.h include/cpuinfo.h
+index 7d5833f..9938d2b 100644
+--- include/cpuinfo.h
++++ include/cpuinfo.h
+@@ -38,10 +38,18 @@
+ 	#define CPUINFO_ARCH_PNACL 1
+ #endif
+ 
+-#if defined(EMSCRIPTEN)
++#if defined(__asmjs__)
+ 	#define CPUINFO_ARCH_ASMJS 1
+ #endif
+ 
++#if defined(__wasm__)
++	#if defined(__wasm_simd128__)
++		#define CPUINFO_ARCH_WASMSIMD 1
++	#else
++		#define CPUINFO_ARCH_WASM 1
++	#endif
++#endif
++
+ #if CPUINFO_ARCH_X86 && defined(_MSC_VER)
+ 	#define CPUINFO_ABI __cdecl
+ #elif CPUINFO_ARCH_X86 && defined(__GNUC__)
+@@ -80,6 +88,14 @@
+ 	#define CPUINFO_ARCH_ASMJS 0
+ #endif
+ 
++#ifndef CPUINFO_ARCH_WASM
++	#define CPUINFO_ARCH_WASM 0
++#endif
++
++#ifndef CPUINFO_ARCH_WASMSIMD
++	#define CPUINFO_ARCH_WASMSIMD 0
++#endif
++
+ #define CPUINFO_CACHE_UNIFIED          0x00000001
+ #define CPUINFO_CACHE_INCLUSIVE        0x00000002
+ #define CPUINFO_CACHE_COMPLEX_INDEXING 0x00000004
+@@ -278,10 +294,14 @@ enum cpuinfo_uarch {
+ 	cpuinfo_uarch_haswell      = 0x00100208,
+ 	/** Intel Broadwell microarchitecture. */
+ 	cpuinfo_uarch_broadwell    = 0x00100209,
+-	/** Intel Sky Lake microarchitecture. */
++	/** Intel Sky Lake microarchitecture (14 nm, including Kaby/Coffee/Whiskey/Amber/Comet/Cascade/Cooper Lake). */
+ 	cpuinfo_uarch_sky_lake     = 0x0010020A,
+-	/** Intel Kaby Lake microarchitecture. */
+-	cpuinfo_uarch_kaby_lake    = 0x0010020B,
++	/** DEPRECATED (Intel Kaby Lake microarchitecture). */
++	cpuinfo_uarch_kaby_lake    = 0x0010020A,
++	/** Intel Palm Cove microarchitecture (10 nm, Cannon Lake). */
++	cpuinfo_uarch_palm_cove    = 0x0010020B,
++	/** Intel Sunny Cove microarchitecture (10 nm, Ice Lake). */
++	cpuinfo_uarch_sunny_cove   = 0x0010020C,
+ 
+ 	/** Pentium 4 with Willamette, Northwood, or Foster cores. */
+ 	cpuinfo_uarch_willamette = 0x00100300,
+@@ -289,13 +309,17 @@ enum cpuinfo_uarch {
+ 	cpuinfo_uarch_prescott   = 0x00100301,
+ 
+ 	/** Intel Atom on 45 nm process. */
+-	cpuinfo_uarch_bonnell    = 0x00100400,
++	cpuinfo_uarch_bonnell       = 0x00100400,
+ 	/** Intel Atom on 32 nm process. */
+-	cpuinfo_uarch_saltwell   = 0x00100401,
++	cpuinfo_uarch_saltwell      = 0x00100401,
+ 	/** Intel Silvermont microarchitecture (22 nm out-of-order Atom). */
+-	cpuinfo_uarch_silvermont = 0x00100402,
++	cpuinfo_uarch_silvermont    = 0x00100402,
+ 	/** Intel Airmont microarchitecture (14 nm out-of-order Atom). */
+-	cpuinfo_uarch_airmont    = 0x00100403,
++	cpuinfo_uarch_airmont       = 0x00100403,
++	/** Intel Goldmont microarchitecture (Denverton, Apollo Lake). */
++	cpuinfo_uarch_goldmont      = 0x00100404,
++	/** Intel Goldmont Plus microarchitecture (Gemini Lake). */
++	cpuinfo_uarch_goldmont_plus = 0x00100405,
+ 
+ 	/** Intel Knights Ferry HPC boards. */
+ 	cpuinfo_uarch_knights_ferry   = 0x00100500,
+@@ -335,8 +359,10 @@ enum cpuinfo_uarch {
+ 	cpuinfo_uarch_steamroller = 0x00200107,
+ 	/** AMD Excavator microarchitecture (Carizzo APUs). */
+ 	cpuinfo_uarch_excavator   = 0x00200108,
+-	/** AMD Zen microarchitecture (Ryzen CPUs). */
++	/** AMD Zen microarchitecture (12/14 nm Ryzen and EPYC CPUs). */
+ 	cpuinfo_uarch_zen         = 0x00200109,
++	/** AMD Zen 2 microarchitecture (7 nm Ryzen and EPYC CPUs). */
++	cpuinfo_uarch_zen2        = 0x0020010A,
+ 
+ 	/** NSC Geode and AMD Geode GX and LX. */
+ 	cpuinfo_uarch_geode  = 0x00200200,
+@@ -370,23 +396,34 @@ enum cpuinfo_uarch {
+ 	cpuinfo_uarch_cortex_a17 = 0x00300217,
+ 
+ 	/** ARM Cortex-A32. */
+-	cpuinfo_uarch_cortex_a32 = 0x00300332,
++	cpuinfo_uarch_cortex_a32   = 0x00300332,
+ 	/** ARM Cortex-A35. */
+-	cpuinfo_uarch_cortex_a35 = 0x00300335,
++	cpuinfo_uarch_cortex_a35   = 0x00300335,
+ 	/** ARM Cortex-A53. */
+-	cpuinfo_uarch_cortex_a53 = 0x00300353,
++	cpuinfo_uarch_cortex_a53   = 0x00300353,
+ 	/** ARM Cortex-A55. */
+-	cpuinfo_uarch_cortex_a55 = 0x00300355,
++	cpuinfo_uarch_cortex_a55   = 0x00300355,
+ 	/** ARM Cortex-A57. */
+-	cpuinfo_uarch_cortex_a57 = 0x00300357,
++	cpuinfo_uarch_cortex_a57   = 0x00300357,
++	/** ARM Cortex-A65. */
++	cpuinfo_uarch_cortex_a65   = 0x00300365,
+ 	/** ARM Cortex-A72. */
+-	cpuinfo_uarch_cortex_a72 = 0x00300372,
++	cpuinfo_uarch_cortex_a72   = 0x00300372,
+ 	/** ARM Cortex-A73. */
+-	cpuinfo_uarch_cortex_a73 = 0x00300373,
++	cpuinfo_uarch_cortex_a73   = 0x00300373,
+ 	/** ARM Cortex-A75. */
+-	cpuinfo_uarch_cortex_a75 = 0x00300375,
++	cpuinfo_uarch_cortex_a75   = 0x00300375,
+ 	/** ARM Cortex-A76. */
+-	cpuinfo_uarch_cortex_a76 = 0x00300376,
++	cpuinfo_uarch_cortex_a76   = 0x00300376,
++	/** ARM Cortex-A76AE. */
++	cpuinfo_uarch_cortex_a76ae = 0x00300378,
++	/** ARM Cortex-A77. */
++	cpuinfo_uarch_cortex_a77   = 0x00300377,
++
++	/** ARM Neoverse N1. */
++	cpuinfo_uarch_neoverse_n1  = 0x00300400,
++	/** ARM Neoverse E1. */
++	cpuinfo_uarch_neoverse_e1  = 0x00300401,
+ 
+ 	/** Qualcomm Scorpion. */
+ 	cpuinfo_uarch_scorpion = 0x00400100,
+@@ -406,12 +443,22 @@ enum cpuinfo_uarch {
+ 	/** Nvidia Carmel. */
+ 	cpuinfo_uarch_carmel   = 0x00500102,
+ 
+-	/** Samsung Mongoose M1 (Exynos 8890 big cores). */
++	/** Samsung Exynos M1 (Exynos 8890 big cores). */
++	cpuinfo_uarch_exynos_m1 = 0x00600100,
++	/** Samsung Exynos M2 (Exynos 8895 big cores). */
++	cpuinfo_uarch_exynos_m2 = 0x00600101,
++	/** Samsung Exynos M3 (Exynos 9810 big cores). */
++	cpuinfo_uarch_exynos_m3  = 0x00600102,
++	/** Samsung Exynos M4 (Exynos 9820 big cores). */
++	cpuinfo_uarch_exynos_m4  = 0x00600103,
++	/** Samsung Exynos M5 (Exynos 9830 big cores). */
++	cpuinfo_uarch_exynos_m5  = 0x00600104,
++
++	/* Old names for Exynos. */
+ 	cpuinfo_uarch_mongoose_m1 = 0x00600100,
+-	/** Samsung Mongoose M2 (Exynos 8895 big cores). */
+ 	cpuinfo_uarch_mongoose_m2 = 0x00600101,
+-	/** Samsung Meerkat M3 (Exynos 9810 big cores). */
+ 	cpuinfo_uarch_meerkat_m3  = 0x00600102,
++	cpuinfo_uarch_meerkat_m4  = 0x00600103,
+ 
+ 	/** Apple A6 and A6X processors. */
+ 	cpuinfo_uarch_swift     = 0x00700100,
+@@ -640,6 +687,8 @@ void CPUINFO_ABI cpuinfo_deinitialize(void);
+ 		bool avx512bitalg;
+ 		bool avx512vpopcntdq;
+ 		bool avx512vnni;
++		bool avx512bf16;
++		bool avx512vp2intersect;
+ 		bool avx512_4vnniw;
+ 		bool avx512_4fmaps;
+ 		bool hle;
+@@ -1110,6 +1159,22 @@ static inline bool cpuinfo_has_x86_avx512vnni(void) {
+ 	#endif
+ }
+ 
++static inline bool cpuinfo_has_x86_avx512bf16(void) {
++	#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
++		return cpuinfo_isa.avx512bf16;
++	#else
++		return false;
++	#endif
++}
++
++static inline bool cpuinfo_has_x86_avx512vp2intersect(void) {
++	#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
++		return cpuinfo_isa.avx512vp2intersect;
++	#else
++		return false;
++	#endif
++}
++
+ static inline bool cpuinfo_has_x86_avx512_4vnniw(void) {
+ 	#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+ 		return cpuinfo_isa.avx512_4vnniw;
+@@ -1682,6 +1747,11 @@ uint32_t CPUINFO_ABI cpuinfo_get_l2_caches_count(void);
+ uint32_t CPUINFO_ABI cpuinfo_get_l3_caches_count(void);
+ uint32_t CPUINFO_ABI cpuinfo_get_l4_caches_count(void);
+ 
++/**
++ * Returns upper bound on cache size.
++ */
++uint32_t CPUINFO_ABI cpuinfo_get_max_cache_size(void);
++
+ const struct cpuinfo_processor* CPUINFO_ABI cpuinfo_get_current_processor(void);
+ const struct cpuinfo_core* CPUINFO_ABI cpuinfo_get_current_core(void);
+ 
+diff --git src/api.c src/api.c
+index 98b5805..83744f5 100644
+--- src/api.c
++++ src/api.c
+@@ -18,6 +18,7 @@ uint32_t cpuinfo_cores_count = 0;
+ uint32_t cpuinfo_clusters_count = 0;
+ uint32_t cpuinfo_packages_count = 0;
+ uint32_t cpuinfo_cache_count[cpuinfo_cache_level_max] = { 0 };
++uint32_t cpuinfo_max_cache_size = 0;
+ 
+ 
+ const struct cpuinfo_processor* cpuinfo_get_processors(void) {
+diff --git src/arm/api.h src/arm/api.h
+index 11e588b..69274bc 100644
+--- src/arm/api.h
++++ src/arm/api.h
+@@ -104,6 +104,9 @@ CPUINFO_INTERNAL void cpuinfo_arm_decode_cache(
+ 	struct cpuinfo_cache l1d[restrict static 1],
+ 	struct cpuinfo_cache l2[restrict static 1],
+ 	struct cpuinfo_cache l3[restrict static 1]);
++
++CPUINFO_INTERNAL uint32_t cpuinfo_arm_compute_max_cache_size(
++	const struct cpuinfo_processor processor[restrict static 1]);
+ #else /* defined(__cplusplus) */
+ CPUINFO_INTERNAL void cpuinfo_arm_decode_cache(
+ 	enum cpuinfo_uarch uarch,
+diff --git src/arm/cache.c src/arm/cache.c
+index 5ada7d9..ccadeb4 100644
+--- src/arm/cache.c
++++ src/arm/cache.c
+@@ -1,10 +1,12 @@
+ #include <stdint.h>
+ 
+ #include <cpuinfo.h>
++#include <cpuinfo/internal-api.h>
+ #include <cpuinfo/log.h>
+ #include <arm/api.h>
+ #include <arm/midr.h>
+ 
++
+ void cpuinfo_arm_decode_cache(
+ 	enum cpuinfo_uarch uarch,
+ 	uint32_t cluster_cores,
+@@ -109,7 +111,7 @@ void cpuinfo_arm_decode_cache(
+ 			 *      memory accesses and has been optimized for use with the Cortex-A5 processor.
+ 			 * 8.1.7. Exclusive L2 cache
+ 			 *    The Cortex-A5 processor can be connected to an L2 cache that supports an exclusive cache mode.
+-			 *    This mode must be activated both in the Cortex-A5 processor and in the L2 cache controller. 
++			 *    This mode must be activated both in the Cortex-A5 processor and in the L2 cache controller.
+ 			 *
+ 			 *  +--------------------+-----------+-----------+----------+-----------+
+ 			 *  | Processor model    | L1D cache | L1I cache | L2 cache | Reference |
+@@ -698,7 +700,7 @@ void cpuinfo_arm_decode_cache(
+ 			 * [3] https://en.wikichip.org/wiki/hisilicon/kirin/980
+ 			 */
+ 			if (midr_is_qualcomm_cortex_a55_silver(midr)) {
+-				/* Qualcomm-modified Cortex-A55 in Snapdragon 710 / 845 */
++				/* Qualcomm-modified Cortex-A55 in Snapdragon 670 / 710 / 845 */
+ 				uint32_t l3_size = 1024 * 1024;
+ 				switch (chipset->series) {
+ 					case cpuinfo_arm_chipset_series_qualcomm_snapdragon:
+@@ -827,6 +829,62 @@ void cpuinfo_arm_decode_cache(
+ 				.flags = CPUINFO_CACHE_INCLUSIVE
+ 			};
+ 			break;
++		case cpuinfo_uarch_cortex_a65:
++		{
++			/*
++			 * ARM Cortex‑A65 Core Technical Reference Manual
++			 * A6.1. About the L1 memory system
++			 *   The L1 memory system enhances the performance and power efficiency in the Cortex‑A65 core.
++			 *   It consists of separate instruction and data caches. You can configure instruction and data caches
++			 *   independently during implementation to sizes of 32KB or 64KB.
++			 *
++			 *   L1 instruction-side memory system
++			 *   The L1 instruction-side memory system provides an instruction stream to the DPU. Its key features are:
++			 *    - 64-byte instruction side cache line length.
++			 *    - 4-way set associative L1 instruction cache.
++			 *
++			 *   L1 data-side memory system
++			 *    - 64-byte data side cache line length.
++			 *    - 4-way set associative L1 data cache.
++			 *
++			 * A7.1 About the L2 memory system
++			 *   The Cortex‑A65 L2 memory system is required to interface the Cortex‑A65 cores to the L3 memory system.
++			 *   The L2 memory subsystem consists of:
++			 *    - An optional 4-way, set-associative L2 cache with a configurable size of 64KB, 128KB, or 256KB.
++			 *      Cache lines have a fixed length of 64 bytes.
++			 *
++			 *   The main features of the L2 memory system are:
++			 *    - Strictly exclusive with L1 data cache.
++			 *    - Pseudo-inclusive with L1 instruction cache.
++			 *    - Private per-core unified L2 cache.
++			 */
++			const uint32_t l1_size = 32 * 1024;
++			const uint32_t l2_size = 128 * 1024;
++			const uint32_t l3_size = 512 * 1024;
++			*l1i = (struct cpuinfo_cache) {
++				.size = l1_size,
++				.associativity = 4,
++				.line_size = 64,
++			};
++			*l1d = (struct cpuinfo_cache) {
++				.size = l1_size,
++				.associativity = 4,
++				.line_size = 64,
++			};
++			*l2 = (struct cpuinfo_cache) {
++				.size = l2_size,
++				.associativity = 4,
++				.line_size = 64,
++				.flags = CPUINFO_CACHE_INCLUSIVE
++			};
++			*l3 = (struct cpuinfo_cache) {
++				.size = l3_size,
++				/* DynamIQ */
++				.associativity = 16,
++				.line_size = 64,
++			};
++			break;
++		}
+ 		case cpuinfo_uarch_cortex_a72:
+ 		{
+ 			/*
+@@ -1047,6 +1105,7 @@ void cpuinfo_arm_decode_cache(
+ 			break;
+ 		}
+ 		case cpuinfo_uarch_cortex_a76:
++		case cpuinfo_uarch_cortex_a76ae:
+ 		{
+ 			/*
+ 			 * ARM Cortex-A76 Core Technical Reference Manual
+@@ -1119,6 +1178,57 @@ void cpuinfo_arm_decode_cache(
+ 			};
+ 			break;
+ 		}
++		case cpuinfo_uarch_cortex_a77:
++		{
++			/*
++			 * ARM Cortex-A77 Core Technical Reference Manual
++			 * A6.1. About the L1 memory system
++			 *   The L1 memory system consists of separate instruction and data caches. Both have a fixed size of 64KB.
++			 *
++			 * A6.1.1 L1 instruction-side memory system
++			 *   The L1 instruction memory system has the following key features:
++			 *    - Virtually Indexed, Physically Tagged (VIPT), which behaves as a Physically Indexed,
++			 *      Physically Tagged (PIPT) 4-way set-associative L1 data cache.
++			 *    - Fixed cache line length of 64 bytes.
++			 *
++			 * A6.1.2 L1 data-side memory system
++			 *   The L1 data memory system has the following features:
++			 *    - Virtually Indexed, Physically Tagged (VIPT), which behaves as a Physically Indexed,
++			 *      Physically Tagged (PIPT) 4-way set-associative L1 data cache.
++			 *    - Fixed cache line length of 64 bytes.
++			 *    - Pseudo-LRU cache replacement policy.
++			 *
++			 * A7.1 About the L2 memory system
++			 *   The L2 memory subsystem consist of:
++			 *    - An 8-way set associative L2 cache with a configurable size of 128KB, 256KB or 512KB. Cache lines
++			 *      have a fixed length of 64 bytes.
++			 *    - Strictly inclusive with L1 data cache. Weakly inclusive with L1 instruction cache.
++			 */
++			const uint32_t l2_size = 256 * 1024;
++			const uint32_t l3_size = 1024 * 1024;
++			*l1i = (struct cpuinfo_cache) {
++				.size = 64 * 1024,
++				.associativity = 4,
++				.line_size = 64,
++			};
++			*l1d = (struct cpuinfo_cache) {
++				.size = 64 * 1024,
++				.associativity = 4,
++				.line_size = 64,
++			};
++			*l2 = (struct cpuinfo_cache) {
++				.size = l2_size,
++				.associativity = 8,
++				.line_size = 64,
++				.flags = CPUINFO_CACHE_INCLUSIVE,
++			};
++			*l3 = (struct cpuinfo_cache) {
++				.size = l3_size,
++				.associativity = 16,
++				.line_size = 64,
++			};
++			break;
++		}
+ #if CPUINFO_ARCH_ARM && !defined(__ARM_ARCH_8A__)
+ 		case cpuinfo_uarch_scorpion:
+ 			/*
+@@ -1248,8 +1358,8 @@ void cpuinfo_arm_decode_cache(
+ 				.line_size = 64
+ 			};
+ 			break;
+-		case cpuinfo_uarch_mongoose_m1:
+-		case cpuinfo_uarch_mongoose_m2:
++		case cpuinfo_uarch_exynos_m1:
++		case cpuinfo_uarch_exynos_m2:
+ 			/*
+ 			 * - "Moving past branch prediction we can see some elements of how the cache is set up for the L1 I$,
+ 			 *    namely 64 KB split into four sets with 128-byte line sizes for 128 cache lines per set" [1]
+@@ -1283,7 +1393,7 @@ void cpuinfo_arm_decode_cache(
+ 				.line_size = 64
+ 			};
+ 			break;
+-		case cpuinfo_uarch_meerkat_m3:
++		case cpuinfo_uarch_exynos_m3:
+ 			/*
+ 			 *  +--------------------+-------+-----------+-----------+-----------+----------+------------+
+ 			 *  | Processor model    | Cores | L1D cache | L1I cache | L2 cache  | L3 cache | Reference  |
+@@ -1294,19 +1404,19 @@ void cpuinfo_arm_decode_cache(
+ 			 * [1] https://www.anandtech.com/show/12478/exynos-9810-handson-awkward-first-results
+ 			 */
+ 			*l1i = (struct cpuinfo_cache) {
+-				.size = 64 * 1024 /* assume same as in Mongoose cores */,
+-				.associativity = 4 /* assume same as in Mongoose cores */,
+-				.line_size = 128 /* assume same as in Mongoose cores */
++				.size = 64 * 1024 /* assume same as in Exynos M1/M2 cores */,
++				.associativity = 4 /* assume same as in Exynos M1/M2 cores */,
++				.line_size = 128 /* assume same as in Exynos M1/M2 cores */
+ 			};
+ 			*l1d = (struct cpuinfo_cache) {
+ 				.size = 64 * 1024,
+-				.associativity = 8 /* assume same as in Mongoose cores */,
+-				.line_size = 64 /* assume same as in Mongoose cores */,
++				.associativity = 8 /* assume same as in Exynos M1/M2 cores */,
++				.line_size = 64 /* assume same as in Exynos M1/M2 cores */,
+ 			};
+ 			*l2 = (struct cpuinfo_cache) {
+ 				.size = 512 * 1024,
+-				.associativity = 16 /* assume same as in Mongoose cores */,
+-				.line_size = 64 /* assume same as in Mongoose cores */,
++				.associativity = 16 /* assume same as in Exynos M1/M2 cores */,
++				.line_size = 64 /* assume same as in Exynos M1/M2 cores */,
+ 			};
+ 			*l3 = (struct cpuinfo_cache) {
+ 				.size = 4 * 1024 * 1024,
+@@ -1393,3 +1503,124 @@ void cpuinfo_arm_decode_cache(
+ 		}
+ 	}
+ }
++
++uint32_t cpuinfo_arm_compute_max_cache_size(const struct cpuinfo_processor* processor) {
++	/*
++	 * There is no precise way to detect cache size on ARM/ARM64, and cache size reported by cpuinfo
++	 * may underestimate the actual cache size. Thus, we use microarchitecture-specific maximum.
++	 */
++	switch (processor->core->uarch) {
++		case cpuinfo_uarch_xscale:
++		case cpuinfo_uarch_arm11:
++		case cpuinfo_uarch_scorpion:
++		case cpuinfo_uarch_krait:
++		case cpuinfo_uarch_kryo:
++		case cpuinfo_uarch_exynos_m1:
++		case cpuinfo_uarch_exynos_m2:
++		case cpuinfo_uarch_exynos_m3:
++			/* cpuinfo-detected cache size always correct */
++			return cpuinfo_compute_max_cache_size(processor);
++		case cpuinfo_uarch_cortex_a5:
++			/* Max observed (NXP Vybrid SoC) */
++			return 512 * 1024;
++		case cpuinfo_uarch_cortex_a7:
++			/*
++			 * Cortex-A7 MPCore Technical Reference Manual:
++			 * 7.1. About the L2 Memory system
++			 *   The L2 memory system consists of an:
++			 *    - Optional tightly-coupled L2 cache that includes:
++			 *      - Configurable L2 cache size of 128KB, 256KB, 512KB, and 1MB.
++			 */
++			return 1024 * 1024;
++		case cpuinfo_uarch_cortex_a8:
++			/*
++			 * Cortex-A8 Technical Reference Manual:
++			 * 8.1. About the L2 memory system
++			 *   The key features of the L2 memory system include:
++			 *    - configurable cache size of 0KB, 128KB, 256KB, 512KB, and 1MB
++			 */
++			return 1024 * 1024;
++		case cpuinfo_uarch_cortex_a9:
++			/* Max observed (e.g. Exynos 4212) */
++			return 1024 * 1024;
++		case cpuinfo_uarch_cortex_a12:
++		case cpuinfo_uarch_cortex_a17:
++			/*
++			 * ARM Cortex-A17 MPCore Processor Technical Reference Manual:
++			 * 7.1. About the L2 Memory system
++			 *   The key features of the L2 memory system include:
++			 *    - An integrated L2 cache:
++			 *      - The cache size is implemented as either 256KB, 512KB, 1MB, 2MB, 4MB or 8MB.
++			 */
++			return 8 * 1024 * 1024;
++		case cpuinfo_uarch_cortex_a15:
++			/*
++			 * ARM Cortex-A15 MPCore Processor Technical Reference Manual:
++			 * 7.1. About the L2 memory system
++			 *   The features of the L2 memory system include:
++			 *    - Configurable L2 cache size of 512KB, 1MB, 2MB and 4MB.
++			 */
++			return 4 * 1024 * 1024;
++		case cpuinfo_uarch_cortex_a35:
++			/*
++			 * ARM Cortex‑A35 Processor Technical Reference Manual:
++			 * 7.1 About the L2 memory system
++			 *   L2 cache
++			 *    - Further features of the L2 cache are:
++			 *      - Configurable size of 128KB, 256KB, 512KB, and 1MB.
++			 */
++			return 1024 * 1024;
++		case cpuinfo_uarch_cortex_a53:
++			/*
++			 * ARM Cortex-A53 MPCore Processor Technical Reference Manual:
++			 * 7.1. About the L2 memory system
++			 *   The L2 memory system consists of an:
++			 *    - Optional tightly-coupled L2 cache that includes:
++			 *      - Configurable L2 cache size of 128KB, 256KB, 512KB, 1MB and 2MB.
++			 */
++			return 2 * 1024 * 1024;
++		case cpuinfo_uarch_cortex_a57:
++			/*
++			 * ARM Cortex-A57 MPCore Processor Technical Reference Manual:
++			 * 7.1 About the L2 memory system
++			 *   The features of the L2 memory system include:
++			 *    - Configurable L2 cache size of 512KB, 1MB, and 2MB.
++			 */
++			return 2 * 1024 * 1024;
++		case cpuinfo_uarch_cortex_a72:
++			/*
++			 * ARM Cortex-A72 MPCore Processor Technical Reference Manual:
++			 * 7.1 About the L2 memory system
++			 *   The features of the L2 memory system include:
++			 *    - Configurable L2 cache size of 512KB, 1MB, 2MB and 4MB.
++			 */
++			return 4 * 1024 * 1024;
++		case cpuinfo_uarch_cortex_a73:
++			/*
++			 * ARM Cortex‑A73 MPCore Processor Technical Reference Manual
++			 * 7.1 About the L2 memory system
++			 *   The L2 memory system consists of:
++			 *    - A tightly-integrated L2 cache with:
++			 *       - A configurable size of 256KB, 512KB, 1MB, 2MB, 4MB, or 8MB.
++			 */
++			return 8 * 1024 * 1024;
++		case cpuinfo_uarch_cortex_a55:
++		case cpuinfo_uarch_cortex_a75:
++		case cpuinfo_uarch_cortex_a76:
++		case cpuinfo_uarch_exynos_m4:
++		default:
++			/*
++			 * ARM DynamIQ Shared Unit Technical Reference Manual
++			 * 1.3 Implementation options
++			 *   L3_CACHE_SIZE
++			 *    - 256KB
++			 *    - 512KB
++			 *    - 1024KB
++			 *    - 1536KB
++			 *    - 2048KB
++			 *    - 3072KB
++			 *    - 4096KB
++			 */
++			return 4 * 1024 * 1024;
++	}
++}
+diff --git src/arm/linux/init.c src/arm/linux/init.c
+index a297f63..f0c432c 100644
+--- src/arm/linux/init.c
++++ src/arm/linux/init.c
+@@ -678,6 +678,8 @@ void cpuinfo_arm_linux_init(void) {
+ 	cpuinfo_cache_count[cpuinfo_cache_level_2]  = l2_count;
+ 	cpuinfo_cache_count[cpuinfo_cache_level_3]  = l3_count;
+ 
++	cpuinfo_max_cache_size = cpuinfo_arm_compute_max_cache_size(&processors[0]);
++
+ 	__sync_synchronize();
+ 
+ 	cpuinfo_is_initialized = true;
+diff --git src/arm/linux/midr.c src/arm/linux/midr.c
+index 668fc72..2c3116b 100644
+--- src/arm/linux/midr.c
++++ src/arm/linux/midr.c
+@@ -220,7 +220,7 @@ static const struct cluster_config cluster_configs[] = {
+ 		.model = UINT16_C(7420),
+ 		.clusters = 2,
+ 		.cluster_cores = {
+-			[0] = 4, 
++			[0] = 4,
+ 			[1] = 4,
+ 		},
+ 		.cluster_midr = {
+@@ -229,7 +229,7 @@ static const struct cluster_config cluster_configs[] = {
+ 		},
+ 	},
+ 	{
+-		/* Exynos 8890: 4x Mongoose + 4x Cortex-A53 */
++		/* Exynos 8890: 4x Exynos M1 + 4x Cortex-A53 */
+ 		.cores = 8,
+ 		.series = cpuinfo_arm_chipset_series_samsung_exynos,
+ 		.model = UINT16_C(8890),
+@@ -695,7 +695,7 @@ static void cpuinfo_arm_linux_detect_cluster_midr_by_sequential_scan(
+ 		if (bitmask_all(processors[i].flags, CPUINFO_LINUX_FLAG_VALID)) {
+ 			if (processors[i].package_leader_id == i) {
+ 				if (bitmask_all(processors[i].flags, CPUINFO_ARM_LINUX_VALID_MIDR)) {
+-					midr = processors[i].midr;	
++					midr = processors[i].midr;
+ 				} else {
+ 					cpuinfo_log_info("assume processor %"PRIu32" to have MIDR %08"PRIx32, i, midr);
+ 					/* To be consistent, we copy the MIDR entirely, rather than by parts */
+@@ -836,7 +836,7 @@ uint32_t cpuinfo_arm_linux_detect_cluster_midr(
+ 			 *    - Clusters preceeding the first reported MIDR value are assumed to have the last reported MIDR value.
+ 			 *    - Clusters following any reported MIDR value to have that MIDR value.
+ 			 */
+-			
++
+ 			if (cpuinfo_arm_linux_detect_cluster_midr_by_chipset(
+ 				chipset, clusters_count, cluster_leaders, usable_processors, processors, true))
+ 			{
+diff --git src/arm/mach/init.c src/arm/mach/init.c
+index 5b14b49..e64cc18 100644
+--- src/arm/mach/init.c
++++ src/arm/mach/init.c
+@@ -562,6 +562,8 @@ void cpuinfo_arm_mach_init(void) {
+ 	cpuinfo_clusters_count = num_clusters;
+ 	cpuinfo_packages_count = mach_topology.packages;
+ 
++	cpuinfo_max_cache_size = cpuinfo_compute_max_cache_size(&processors[0]);
++
+ 	__sync_synchronize();
+ 
+ 	cpuinfo_is_initialized = true;
+diff --git src/arm/midr.h src/arm/midr.h
+index 6363ed7..d5a28e3 100644
+--- src/arm/midr.h
++++ src/arm/midr.h
+@@ -33,31 +33,31 @@
+ #define CPUINFO_ARM_MIDR_KRYO_SILVER_821 UINT32_C(0x510F2010)
+ #define CPUINFO_ARM_MIDR_KRYO_GOLD       UINT32_C(0x510F2050)
+ #define CPUINFO_ARM_MIDR_KRYO_SILVER_820 UINT32_C(0x510F2110)
+-#define CPUINFO_ARM_MIDR_MONGOOSE        UINT32_C(0x530F0010)
++#define CPUINFO_ARM_MIDR_EXYNOS_M1_M2    UINT32_C(0x530F0010)
+ #define CPUINFO_ARM_MIDR_DENVER2         UINT32_C(0x4E0F0030)
+ 
+ inline static uint32_t midr_set_implementer(uint32_t midr, uint32_t implementer) {
+-	return (midr & ~CPUINFO_ARM_MIDR_IMPLEMENTER_MASK) | 
++	return (midr & ~CPUINFO_ARM_MIDR_IMPLEMENTER_MASK) |
+ 		((implementer << CPUINFO_ARM_MIDR_IMPLEMENTER_OFFSET) & CPUINFO_ARM_MIDR_IMPLEMENTER_MASK);
+ }
+ 
+ inline static uint32_t midr_set_variant(uint32_t midr, uint32_t variant) {
+-	return (midr & ~CPUINFO_ARM_MIDR_VARIANT_MASK) | 
++	return (midr & ~CPUINFO_ARM_MIDR_VARIANT_MASK) |
+ 		((variant << CPUINFO_ARM_MIDR_VARIANT_OFFSET) & CPUINFO_ARM_MIDR_VARIANT_MASK);
+ }
+ 
+ inline static uint32_t midr_set_architecture(uint32_t midr, uint32_t architecture) {
+-	return (midr & ~CPUINFO_ARM_MIDR_ARCHITECTURE_MASK) | 
++	return (midr & ~CPUINFO_ARM_MIDR_ARCHITECTURE_MASK) |
+ 		((architecture << CPUINFO_ARM_MIDR_ARCHITECTURE_OFFSET) & CPUINFO_ARM_MIDR_ARCHITECTURE_MASK);
+ }
+ 
+ inline static uint32_t midr_set_part(uint32_t midr, uint32_t part) {
+-	return (midr & ~CPUINFO_ARM_MIDR_PART_MASK) | 
++	return (midr & ~CPUINFO_ARM_MIDR_PART_MASK) |
+ 		((part << CPUINFO_ARM_MIDR_PART_OFFSET) & CPUINFO_ARM_MIDR_PART_MASK);
+ }
+ 
+ inline static uint32_t midr_set_revision(uint32_t midr, uint32_t revision) {
+-	return (midr & ~CPUINFO_ARM_MIDR_REVISION_MASK) | 
++	return (midr & ~CPUINFO_ARM_MIDR_REVISION_MASK) |
+ 		((revision << CPUINFO_ARM_MIDR_REVISION_OFFSET) & CPUINFO_ARM_MIDR_REVISION_MASK);
+ }
+ 
+@@ -171,13 +171,20 @@ inline static bool midr_is_kryo_gold(uint32_t midr) {
+ inline static uint32_t midr_score_core(uint32_t midr) {
+ 	const uint32_t core_mask = CPUINFO_ARM_MIDR_IMPLEMENTER_MASK | CPUINFO_ARM_MIDR_PART_MASK;
+ 	switch (midr & core_mask) {
++		case UINT32_C(0x53000040): /* Exynos M5 */
++		case UINT32_C(0x53000030): /* Exynos M4 */
++			/* These cores are in big role w.r.t Cortex-A75 or Cortex-A76 */
++			return 6;
+ 		case UINT32_C(0x4E000030): /* Denver 2 */
+-		case UINT32_C(0x53000010): /* Mongoose */
+-		case UINT32_C(0x53000020): /* Meerkat */
++		case UINT32_C(0x53000010): /* Exynos M1 and Exynos M2 */
++		case UINT32_C(0x53000020): /* Exynos M3 */
++		case UINT32_C(0x51008040): /* Kryo 485 Gold / Gold Prime */
+ 		case UINT32_C(0x51008020): /* Kryo 385 Gold */
+ 		case UINT32_C(0x51008000): /* Kryo 260 / 280 Gold */
+ 		case UINT32_C(0x51002050): /* Kryo Gold */
+ 		case UINT32_C(0x4800D400): /* Cortex-A76 (HiSilicon) */
++		case UINT32_C(0x4100D0D0): /* Cortex-A77 */
++		case UINT32_C(0x4100D0E0): /* Cortex-A76AE */
+ 		case UINT32_C(0x4100D0B0): /* Cortex-A76 */
+ 		case UINT32_C(0x4100D0A0): /* Cortex-A75 */
+ 		case UINT32_C(0x4100D090): /* Cortex-A73 */
+@@ -191,12 +198,14 @@ inline static uint32_t midr_score_core(uint32_t midr) {
+ 		case UINT32_C(0x4100D070): /* Cortex-A57 */
+ 			/* Cortex-A57 can be in LITTLE role w.r.t. Denver 2, or in big role w.r.t. Cortex-A53 */
+ 			return 4;
++		case UINT32_C(0x4100D060): /* Cortex-A65 */
+ 		case UINT32_C(0x4100D050): /* Cortex-A55 */
+ 		case UINT32_C(0x4100D030): /* Cortex-A53 */
+ 			/* Cortex-A53 is usually in LITTLE role, but can be in big role w.r.t. Cortex-A35 */
+ 			return 2;
+ 		case UINT32_C(0x4100D040): /* Cortex-A35 */
+ 		case UINT32_C(0x4100C070): /* Cortex-A7 */
++		case UINT32_C(0x51008050): /* Kryo 485 Silver */
+ 		case UINT32_C(0x51008030): /* Kryo 385 Silver */
+ 		case UINT32_C(0x51008010): /* Kryo 260 / 280 Silver */
+ 		case UINT32_C(0x51002110): /* Kryo Silver (Snapdragon 820) */
+@@ -215,7 +224,7 @@ inline static uint32_t midr_score_core(uint32_t midr) {
+ }
+ 
+ inline static uint32_t midr_little_core_for_big(uint32_t midr) {
+-	const uint32_t core_mask = 
++	const uint32_t core_mask =
+ 		CPUINFO_ARM_MIDR_IMPLEMENTER_MASK | CPUINFO_ARM_MIDR_ARCHITECTURE_MASK | CPUINFO_ARM_MIDR_PART_MASK;
+ 	switch (midr & core_mask) {
+ 		case CPUINFO_ARM_MIDR_CORTEX_A75:
+@@ -223,7 +232,7 @@ inline static uint32_t midr_little_core_for_big(uint32_t midr) {
+ 		case CPUINFO_ARM_MIDR_CORTEX_A73:
+ 		case CPUINFO_ARM_MIDR_CORTEX_A72:
+ 		case CPUINFO_ARM_MIDR_CORTEX_A57:
+-		case CPUINFO_ARM_MIDR_MONGOOSE:
++		case CPUINFO_ARM_MIDR_EXYNOS_M1_M2:
+ 			return CPUINFO_ARM_MIDR_CORTEX_A53;
+ 		case CPUINFO_ARM_MIDR_CORTEX_A17:
+ 		case CPUINFO_ARM_MIDR_CORTEX_A15:
+diff --git src/arm/uarch.c src/arm/uarch.c
+index d7d2c63..a38250a 100644
+--- src/arm/uarch.c
++++ src/arm/uarch.c
+@@ -60,6 +60,9 @@ void cpuinfo_arm_decode_vendor_uarch(
+ 				case 0xD05:
+ 					*uarch = cpuinfo_uarch_cortex_a55;
+ 					break;
++				case 0xD06:
++					*uarch = cpuinfo_uarch_cortex_a65;
++					break;
+ 				case 0xD07:
+ 					*uarch = cpuinfo_uarch_cortex_a57;
+ 					break;
+@@ -75,6 +78,22 @@ void cpuinfo_arm_decode_vendor_uarch(
+ 				case 0xD0B:
+ 					*uarch = cpuinfo_uarch_cortex_a76;
+ 					break;
++#if CPUINFO_ARCH_ARM64 && !defined(__ANDROID__)
++				case 0xD0C:
++					*uarch = cpuinfo_uarch_neoverse_n1;
++					break;
++#endif /* CPUINFO_ARCH_ARM64 && !defined(__ANDROID__) */
++				case 0xD0D:
++					*uarch = cpuinfo_uarch_cortex_a77;
++					break;
++				case 0xD0E:
++					*uarch = cpuinfo_uarch_cortex_a76ae;
++					break;
++#if CPUINFO_ARCH_ARM64 && !defined(__ANDROID__)
++				case 0xD4A:
++					*uarch = cpuinfo_uarch_neoverse_e1;
++					break;
++#endif /* CPUINFO_ARCH_ARM64 && !defined(__ANDROID__) */
+ 				default:
+ 					switch (midr_get_part(midr) >> 8) {
+ #if CPUINFO_ARCH_ARM
+@@ -242,10 +261,14 @@ void cpuinfo_arm_decode_vendor_uarch(
+ 					*vendor = cpuinfo_vendor_arm;
+ 					*uarch = cpuinfo_uarch_cortex_a55;
+ 					break;
+-				case 0x804:
++				case 0x804: /* High-performance Kryo 485 "Gold" / "Gold Prime" -> Cortex-A76 */
+ 					*vendor = cpuinfo_vendor_arm;
+ 					*uarch = cpuinfo_uarch_cortex_a76;
+ 					break;
++				case 0x805: /* Low-performance Kryo 485 "Silver" -> Cortex-A55 */
++					*vendor = cpuinfo_vendor_arm;
++					*uarch = cpuinfo_uarch_cortex_a55;
++					break;
+ #if CPUINFO_ARCH_ARM64 && !defined(__ANDROID__)
+ 				case 0xC00:
+ 					*uarch = cpuinfo_uarch_falkor;
+@@ -263,27 +286,43 @@ void cpuinfo_arm_decode_vendor_uarch(
+ 			switch (midr & (CPUINFO_ARM_MIDR_VARIANT_MASK | CPUINFO_ARM_MIDR_PART_MASK)) {
+ 				case 0x00100010:
+ 					/*
+-					 * Exynos 8890 MIDR = 0x531F0011, assume Mongoose M1 has:
++					 * Exynos 8890 MIDR = 0x531F0011, assume Exynos M1 has:
+ 					 * - CPU variant 0x1
+ 					 * - CPU part 0x001
+ 					 */
+-					*uarch = cpuinfo_uarch_mongoose_m1;
++					*uarch = cpuinfo_uarch_exynos_m1;
+ 					break;
+ 				case 0x00400010:
+ 					/*
+-					 * Exynos 8895 MIDR = 0x534F0010, assume Mongoose M2 has:
++					 * Exynos 8895 MIDR = 0x534F0010, assume Exynos M2 has:
+ 					 * - CPU variant 0x4
+ 					 * - CPU part 0x001
+ 					 */
+-					*uarch = cpuinfo_uarch_mongoose_m2;
++					*uarch = cpuinfo_uarch_exynos_m2;
+ 					break;
+ 				case 0x00100020:
+ 					/*
+-					 * Exynos 9810 MIDR = 0x531F0020, assume Meerkat M3 has:
++					 * Exynos 9810 MIDR = 0x531F0020, assume Exynos M3 has:
+ 					 * - CPU variant 0x1
+ 					 * - CPU part 0x002
+ 					 */
+-					*uarch = cpuinfo_uarch_meerkat_m3;
++					*uarch = cpuinfo_uarch_exynos_m3;
++					break;
++				case 0x00100030:
++					/*
++					 * Exynos 9820 MIDR = 0x531F0030, assume Exynos M4 has:
++					 * - CPU variant 0x1
++					 * - CPU part 0x003
++					 */
++					*uarch = cpuinfo_uarch_exynos_m4;
++					break;
++				case 0x00100040:
++					/*
++					 * Exynos 9820 MIDR = 0x531F0040, assume Exynos M5 has:
++					 * - CPU variant 0x1
++					 * - CPU part 0x004
++					 */
++					*uarch = cpuinfo_uarch_exynos_m5;
+ 					break;
+ 				default:
+ 					cpuinfo_log_warning("unknown Samsung CPU variant 0x%01"PRIx32" part 0x%03"PRIx32" ignored",
+diff --git src/cache.c src/cache.c
+new file mode 100644
+index 0000000..b976b87
+--- /dev/null
++++ src/cache.c
+@@ -0,0 +1,18 @@
++#include <stddef.h>
++
++#include <cpuinfo.h>
++#include <cpuinfo/internal-api.h>
++
++
++uint32_t cpuinfo_compute_max_cache_size(const struct cpuinfo_processor* processor) {
++  if (processor->cache.l4 != NULL) {
++    return processor->cache.l4->size;
++  } else if (processor->cache.l3 != NULL) {
++    return processor->cache.l3->size;
++  } else if (processor->cache.l2 != NULL) {
++    return processor->cache.l2->size;
++  } else if (processor->cache.l1d != NULL) {
++    return processor->cache.l1d->size;
++  }
++  return 0;
++}
+diff --git src/cpuinfo/internal-api.h src/cpuinfo/internal-api.h
+index 6045750..717b810 100644
+--- src/cpuinfo/internal-api.h
++++ src/cpuinfo/internal-api.h
+@@ -31,6 +31,7 @@ extern CPUINFO_INTERNAL uint32_t cpuinfo_cores_count;
+ extern CPUINFO_INTERNAL uint32_t cpuinfo_clusters_count;
+ extern CPUINFO_INTERNAL uint32_t cpuinfo_packages_count;
+ extern CPUINFO_INTERNAL uint32_t cpuinfo_cache_count[cpuinfo_cache_level_max];
++extern CPUINFO_INTERNAL uint32_t cpuinfo_max_cache_size;
+ 
+ CPUINFO_PRIVATE void cpuinfo_x86_mach_init(void);
+ CPUINFO_PRIVATE void cpuinfo_x86_linux_init(void);
+@@ -40,4 +41,6 @@ CPUINFO_PRIVATE void cpuinfo_x86_linux_init(void);
+ CPUINFO_PRIVATE void cpuinfo_arm_mach_init(void);
+ CPUINFO_PRIVATE void cpuinfo_arm_linux_init(void);
+ 
++CPUINFO_PRIVATE uint32_t cpuinfo_compute_max_cache_size(const struct cpuinfo_processor* processor);
++
+ typedef void (*cpuinfo_processor_callback)(uint32_t);
+diff --git src/x86/isa.c src/x86/isa.c
+index bca1ecd..d27dbca 100644
+--- src/x86/isa.c
++++ src/x86/isa.c
+@@ -42,8 +42,10 @@ struct cpuinfo_x86_isa cpuinfo_x86_detect_isa(
+ {
+ 	struct cpuinfo_x86_isa isa = { 0 };
+ 
+-	const struct cpuid_regs structured_feature_info =
++	const struct cpuid_regs structured_feature_info0 =
+ 		(max_base_index >= 7) ? cpuidex(7, 0) : (struct cpuid_regs) { 0, 0, 0, 0};
++	const struct cpuid_regs structured_feature_info1 =
++		(max_base_index >= 7) ? cpuidex(7, 1) : (struct cpuid_regs) { 0, 0, 0, 0};
+ 
+ 	const uint32_t processor_capacity_info_index = UINT32_C(0x80000008);
+ 	const struct cpuid_regs processor_capacity_info =
+@@ -144,9 +146,9 @@ struct cpuinfo_x86_isa cpuinfo_x86_detect_isa(
+ 
+ 	/*
+ 	 * CLFLUSHOPT instruction:
+-	 * - Intel: ebx[bit 23] in structured feature info.
++	 * - Intel: ebx[bit 23] in structured feature info (ecx = 0).
+ 	 */
+-	isa.clflushopt = !!(structured_feature_info.ebx & UINT32_C(0x00800000));
++	isa.clflushopt = !!(structured_feature_info0.ebx & UINT32_C(0x00800000));
+ 
+ 	/*
+ 	 * MWAIT/MONITOR instructions:
+@@ -273,9 +275,9 @@ struct cpuinfo_x86_isa cpuinfo_x86_detect_isa(
+ 
+ 	/*
+ 	 * PREFETCHWT1 instruction:
+-	 * - Intel: ecx[bit 0] of structured feature info. Reserved bit on AMD.
++	 * - Intel: ecx[bit 0] of structured feature info (ecx = 0). Reserved bit on AMD.
+ 	 */
+-	isa.prefetchwt1 = !!(structured_feature_info.ecx & UINT32_C(0x00000001));
++	isa.prefetchwt1 = !!(structured_feature_info0.ecx & UINT32_C(0x00000001));
+ 
+ #if CPUINFO_ARCH_X86
+ 	/*
+@@ -386,111 +388,123 @@ struct cpuinfo_x86_isa cpuinfo_x86_detect_isa(
+ 
+ 	/*
+ 	 * AVX2 instructions:
+-	 * - Intel: ebx[bit 5] in structured feature info.
++	 * - Intel: ebx[bit 5] in structured feature info (ecx = 0).
+ 	 */
+-	isa.avx2 = avx_regs && !!(structured_feature_info.ebx & UINT32_C(0x00000020));
++	isa.avx2 = avx_regs && !!(structured_feature_info0.ebx & UINT32_C(0x00000020));
+ 
+ 	/*
+ 	 * AVX512F instructions:
+-	 * - Intel: ebx[bit 16] in structured feature info.
++	 * - Intel: ebx[bit 16] in structured feature info (ecx = 0).
+ 	 */
+-	isa.avx512f = avx512_regs && !!(structured_feature_info.ebx & UINT32_C(0x00010000));
++	isa.avx512f = avx512_regs && !!(structured_feature_info0.ebx & UINT32_C(0x00010000));
+ 
+ 	/*
+ 	 * AVX512PF instructions:
+-	 * - Intel: ebx[bit 26] in structured feature info.
++	 * - Intel: ebx[bit 26] in structured feature info (ecx = 0).
+ 	 */
+-	isa.avx512pf = avx512_regs && !!(structured_feature_info.ebx & UINT32_C(0x04000000));
++	isa.avx512pf = avx512_regs && !!(structured_feature_info0.ebx & UINT32_C(0x04000000));
+ 
+ 	/*
+ 	 * AVX512ER instructions:
+-	 * - Intel: ebx[bit 27] in structured feature info.
++	 * - Intel: ebx[bit 27] in structured feature info (ecx = 0).
+ 	 */
+-	isa.avx512er = avx512_regs && !!(structured_feature_info.ebx & UINT32_C(0x08000000));
++	isa.avx512er = avx512_regs && !!(structured_feature_info0.ebx & UINT32_C(0x08000000));
+ 
+ 	/*
+ 	 * AVX512CD instructions:
+-	 * - Intel: ebx[bit 28] in structured feature info.
++	 * - Intel: ebx[bit 28] in structured feature info (ecx = 0).
+ 	 */
+-	isa.avx512cd = avx512_regs && !!(structured_feature_info.ebx & UINT32_C(0x10000000));
++	isa.avx512cd = avx512_regs && !!(structured_feature_info0.ebx & UINT32_C(0x10000000));
+ 
+ 	/*
+ 	 * AVX512DQ instructions:
+-	 * - Intel: ebx[bit 17] in structured feature info.
++	 * - Intel: ebx[bit 17] in structured feature info (ecx = 0).
+ 	 */
+-	isa.avx512dq = avx512_regs && !!(structured_feature_info.ebx & UINT32_C(0x00020000));
++	isa.avx512dq = avx512_regs && !!(structured_feature_info0.ebx & UINT32_C(0x00020000));
+ 
+ 	/*
+ 	 * AVX512BW instructions:
+-	 * - Intel: ebx[bit 30] in structured feature info.
++	 * - Intel: ebx[bit 30] in structured feature info (ecx = 0).
+ 	 */
+-	isa.avx512bw = avx512_regs && !!(structured_feature_info.ebx & UINT32_C(0x40000000));
++	isa.avx512bw = avx512_regs && !!(structured_feature_info0.ebx & UINT32_C(0x40000000));
+ 
+ 	/*
+ 	 * AVX512VL instructions:
+-	 * - Intel: ebx[bit 31] in structured feature info.
++	 * - Intel: ebx[bit 31] in structured feature info (ecx = 0).
+ 	 */
+-	isa.avx512vl = avx512_regs && !!(structured_feature_info.ebx & UINT32_C(0x80000000));
++	isa.avx512vl = avx512_regs && !!(structured_feature_info0.ebx & UINT32_C(0x80000000));
+ 
+ 	/*
+ 	 * AVX512IFMA instructions:
+-	 * - Intel: ebx[bit 21] in structured feature info.
++	 * - Intel: ebx[bit 21] in structured feature info (ecx = 0).
+ 	 */
+-	isa.avx512ifma = avx512_regs && !!(structured_feature_info.ebx & UINT32_C(0x00200000));
++	isa.avx512ifma = avx512_regs && !!(structured_feature_info0.ebx & UINT32_C(0x00200000));
+ 
+ 	/*
+ 	 * AVX512VBMI instructions:
+-	 * - Intel: ecx[bit 1] in structured feature info.
++	 * - Intel: ecx[bit 1] in structured feature info (ecx = 0).
+ 	 */
+-	isa.avx512vbmi = avx512_regs && !!(structured_feature_info.ecx & UINT32_C(0x00000002));
++	isa.avx512vbmi = avx512_regs && !!(structured_feature_info0.ecx & UINT32_C(0x00000002));
+ 
+ 	/*
+ 	 * AVX512VBMI2 instructions:
+-	 * - Intel: ecx[bit 6] in structured feature info.
++	 * - Intel: ecx[bit 6] in structured feature info (ecx = 0).
+ 	 */
+-	isa.avx512vbmi2 = avx512_regs && !!(structured_feature_info.ecx & UINT32_C(0x00000040));
++	isa.avx512vbmi2 = avx512_regs && !!(structured_feature_info0.ecx & UINT32_C(0x00000040));
+ 
+ 	/*
+ 	 * AVX512BITALG instructions:
+-	 * - Intel: ecx[bit 12] in structured feature info.
++	 * - Intel: ecx[bit 12] in structured feature info (ecx = 0).
+ 	 */
+-	isa.avx512bitalg = avx512_regs && !!(structured_feature_info.ecx & UINT32_C(0x00001000));
++	isa.avx512bitalg = avx512_regs && !!(structured_feature_info0.ecx & UINT32_C(0x00001000));
+ 
+ 	/*
+ 	 * AVX512VPOPCNTDQ instructions:
+-	 * - Intel: ecx[bit 14] in structured feature info.
++	 * - Intel: ecx[bit 14] in structured feature info (ecx = 0).
+ 	 */
+-	isa.avx512vpopcntdq = avx512_regs && !!(structured_feature_info.ecx & UINT32_C(0x00004000));
++	isa.avx512vpopcntdq = avx512_regs && !!(structured_feature_info0.ecx & UINT32_C(0x00004000));
+ 
+ 	/*
+ 	 * AVX512VNNI instructions:
+-	 * - Intel: ecx[bit 11] in structured feature info.
++	 * - Intel: ecx[bit 11] in structured feature info (ecx = 0).
+ 	 */
+-	isa.avx512vnni = avx512_regs && !!(structured_feature_info.ecx & UINT32_C(0x00000800));
++	isa.avx512vnni = avx512_regs && !!(structured_feature_info0.ecx & UINT32_C(0x00000800));
+ 
+ 	/*
+ 	 * AVX512_4VNNIW instructions:
+-	 * - Intel: edx[bit 2] in structured feature info.
++	 * - Intel: edx[bit 2] in structured feature info (ecx = 0).
+ 	 */
+-	isa.avx512_4vnniw = avx512_regs && !!(structured_feature_info.edx & UINT32_C(0x00000004));
++	isa.avx512_4vnniw = avx512_regs && !!(structured_feature_info0.edx & UINT32_C(0x00000004));
+ 
+ 	/*
+ 	 * AVX512_4FMAPS instructions:
+-	 * - Intel: edx[bit 3] in structured feature info.
++	 * - Intel: edx[bit 3] in structured feature info (ecx = 0).
+ 	 */
+-	isa.avx512_4fmaps = avx512_regs && !!(structured_feature_info.edx & UINT32_C(0x00000008));
++	isa.avx512_4fmaps = avx512_regs && !!(structured_feature_info0.edx & UINT32_C(0x00000008));
++
++	/*
++	 * AVX512_VP2INTERSECT instructions:
++	 * - Intel: edx[bit 8] in structured feature info (ecx = 0).
++	 */
++	isa.avx512vp2intersect = avx512_regs && !!(structured_feature_info0.edx & UINT32_C(0x00000100));
++
++	/*
++	 * AVX512_BF16 instructions:
++	 * - Intel: eax[bit 5] in structured feature info (ecx = 1).
++	 */
++	isa.avx512bf16 = avx512_regs && !!(structured_feature_info1.eax & UINT32_C(0x00000020));
+ 
+ 	/*
+ 	 * HLE instructions:
+-	 * - Intel: ebx[bit 4] in structured feature info.
++	 * - Intel: ebx[bit 4] in structured feature info (ecx = 0).
+ 	 */
+-	isa.hle = !!(structured_feature_info.ebx & UINT32_C(0x00000010));
++	isa.hle = !!(structured_feature_info0.ebx & UINT32_C(0x00000010));
+ 
+ 	/*
+ 	 * RTM instructions:
+-	 * - Intel: ebx[bit 11] in structured feature info.
++	 * - Intel: ebx[bit 11] in structured feature info (ecx = 0).
+ 	 */
+-	isa.rtm = !!(structured_feature_info.ebx & UINT32_C(0x00000800));
++	isa.rtm = !!(structured_feature_info0.ebx & UINT32_C(0x00000800));
+ 
+ 	/*
+ 	 * XTEST instruction:
+@@ -500,9 +514,9 @@ struct cpuinfo_x86_isa cpuinfo_x86_detect_isa(
+ 
+ 	/*
+ 	 * MPX registers and instructions:
+-	 * - Intel: ebx[bit 14] in structured feature info.
++	 * - Intel: ebx[bit 14] in structured feature info (ecx = 0).
+ 	 */
+-	isa.mpx = mpx_regs && !!(structured_feature_info.ebx & UINT32_C(0x00004000));
++	isa.mpx = mpx_regs && !!(structured_feature_info0.ebx & UINT32_C(0x00004000));
+ 
+ #if CPUINFO_ARCH_X86
+ 	/*
+@@ -528,9 +542,9 @@ struct cpuinfo_x86_isa cpuinfo_x86_detect_isa(
+ 
+ 	/*
+ 	 * CLWB instruction:
+-	 * - Intel: ebx[bit 24] in structured feature info.
++	 * - Intel: ebx[bit 24] in structured feature info (ecx = 0).
+ 	 */
+-	isa.clwb = !!(structured_feature_info.ebx & UINT32_C(0x01000000));
++	isa.clwb = !!(structured_feature_info0.ebx & UINT32_C(0x01000000));
+ 
+ 	/*
+ 	 * MOVBE instruction:
+@@ -549,9 +563,9 @@ struct cpuinfo_x86_isa cpuinfo_x86_detect_isa(
+ 
+ 	/*
+ 	 * RDFSBASE/RDGSBASE/WRFSBASE/WRGSBASE instructions.
+-	 * - Intel: ebx[bit 0] in structured feature info.
++	 * - Intel: ebx[bit 0] in structured feature info (ecx = 0).
+ 	 */
+-	isa.fs_gs_base = !!(structured_feature_info.ebx & UINT32_C(0x00000001));
++	isa.fs_gs_base = !!(structured_feature_info0.ebx & UINT32_C(0x00000001));
+ 
+ 	/*
+ 	 * LZCNT instruction:
+@@ -573,21 +587,21 @@ struct cpuinfo_x86_isa cpuinfo_x86_detect_isa(
+ 
+ 	/*
+ 	 * BMI instructions:
+-	 * - Intel, AMD: ebx[bit 3] in structured feature info.
++	 * - Intel, AMD: ebx[bit 3] in structured feature info (ecx = 0).
+ 	 */
+-	isa.bmi = !!(structured_feature_info.ebx & UINT32_C(0x00000008));
++	isa.bmi = !!(structured_feature_info0.ebx & UINT32_C(0x00000008));
+ 
+ 	/*
+ 	 * BMI2 instructions:
+-	 * - Intel: ebx[bit 8] in structured feature info.
++	 * - Intel: ebx[bit 8] in structured feature info (ecx = 0).
+ 	 */
+-	isa.bmi2 = !!(structured_feature_info.ebx & UINT32_C(0x00000100));
++	isa.bmi2 = !!(structured_feature_info0.ebx & UINT32_C(0x00000100));
+ 
+ 	/*
+ 	 * ADCX/ADOX instructions:
+-	 * - Intel: ebx[bit 19] in structured feature info.
++	 * - Intel: ebx[bit 19] in structured feature info (ecx = 0).
+ 	 */
+-	isa.adx = !!(structured_feature_info.ebx & UINT32_C(0x00080000));
++	isa.adx = !!(structured_feature_info0.ebx & UINT32_C(0x00080000));
+ 
+ 	/*
+ 	 * AES instructions:
+@@ -597,9 +611,9 @@ struct cpuinfo_x86_isa cpuinfo_x86_detect_isa(
+ 
+ 	/*
+ 	 * VAES instructions:
+-	 * - Intel: ecx[bit 9] in structured feature info.
++	 * - Intel: ecx[bit 9] in structured feature info (ecx = 0).
+ 	 */
+-	isa.vaes = !!(structured_feature_info.ecx & UINT32_C(0x00000200));
++	isa.vaes = !!(structured_feature_info0.ecx & UINT32_C(0x00000200));
+ 
+ 	/*
+ 	 * PCLMULQDQ instruction:
+@@ -609,15 +623,15 @@ struct cpuinfo_x86_isa cpuinfo_x86_detect_isa(
+ 
+ 	/*
+ 	 * VPCLMULQDQ instruction:
+-	 * - Intel: ecx[bit 10] in structured feature info.
++	 * - Intel: ecx[bit 10] in structured feature info (ecx = 0).
+ 	 */
+-	isa.vpclmulqdq = !!(structured_feature_info.ecx & UINT32_C(0x00000400));
++	isa.vpclmulqdq = !!(structured_feature_info0.ecx & UINT32_C(0x00000400));
+ 
+ 	/*
+ 	 * GFNI instructions:
+-	 * - Intel: ecx[bit 8] in structured feature info.
++	 * - Intel: ecx[bit 8] in structured feature info (ecx = 0).
+ 	 */
+-	isa.gfni = !!(structured_feature_info.ecx & UINT32_C(0x00000100));
++	isa.gfni = !!(structured_feature_info0.ecx & UINT32_C(0x00000100));
+ 
+ 	/*
+ 	 * RDRAND instruction:
+@@ -627,15 +641,15 @@ struct cpuinfo_x86_isa cpuinfo_x86_detect_isa(
+ 
+ 	/*
+ 	 * RDSEED instruction:
+-	 * - Intel: ebx[bit 18] in structured feature info.
++	 * - Intel: ebx[bit 18] in structured feature info (ecx = 0).
+ 	 */
+-	isa.rdseed = !!(structured_feature_info.ebx & UINT32_C(0x00040000));
++	isa.rdseed = !!(structured_feature_info0.ebx & UINT32_C(0x00040000));
+ 
+ 	/*
+ 	 * SHA instructions:
+-	 * - Intel: ebx[bit 29] in structured feature info.
++	 * - Intel: ebx[bit 29] in structured feature info (ecx = 0).
+ 	 */
+-	isa.sha = !!(structured_feature_info.ebx & UINT32_C(0x20000000));
++	isa.sha = !!(structured_feature_info0.ebx & UINT32_C(0x20000000));
+ 
+ 	if (vendor == cpuinfo_vendor_via) {
+ 		const struct cpuid_regs padlock_meta_info = cpuid(UINT32_C(0xC0000000));
+@@ -700,9 +714,9 @@ struct cpuinfo_x86_isa cpuinfo_x86_detect_isa(
+ 
+ 	/*
+ 	 * RDPID instruction:
+-	 * - Intel: ecx[bit 22] in structured feature info.
++	 * - Intel: ecx[bit 22] in structured feature info (ecx = 0).
+ 	 */
+-	isa.rdpid = !!(structured_feature_info.ecx & UINT32_C(0x00400000));
++	isa.rdpid = !!(structured_feature_info0.ecx & UINT32_C(0x00400000));
+ 
+ 	return isa;
+ }
+diff --git src/x86/linux/init.c src/x86/linux/init.c
+index b5f74d0..c096336 100644
+--- src/x86/linux/init.c
++++ src/x86/linux/init.c
+@@ -592,6 +592,8 @@ void cpuinfo_x86_linux_init(void) {
+ 	cpuinfo_cache_count[cpuinfo_cache_level_3]  = l3_count;
+ 	cpuinfo_cache_count[cpuinfo_cache_level_4]  = l4_count;
+ 
++	cpuinfo_max_cache_size = cpuinfo_compute_max_cache_size(&processors[0]);
++
+ 	__sync_synchronize();
+ 
+ 	cpuinfo_is_initialized = true;
+diff --git src/x86/mach/init.c src/x86/mach/init.c
+index 7b41ad0..ae2be33 100644
+--- src/x86/mach/init.c
++++ src/x86/mach/init.c
+@@ -327,6 +327,8 @@ void cpuinfo_x86_mach_init(void) {
+ 	cpuinfo_clusters_count = mach_topology.packages;
+ 	cpuinfo_packages_count = mach_topology.packages;
+ 
++	cpuinfo_max_cache_size = cpuinfo_compute_max_cache_size(&processors[0]);
++
+ 	__sync_synchronize();
+ 
+ 	cpuinfo_is_initialized = true;
+diff --git src/x86/uarch.c src/x86/uarch.c
+index 71c899e..ba72d8a 100644
+--- src/x86/uarch.c
++++ src/x86/uarch.c
+@@ -74,13 +74,19 @@ enum cpuinfo_uarch cpuinfo_x86_decode_uarch(
+ 						case 0x4F: // Broadwell-E
+ 						case 0x56: // Broadwell-DE
+ 							return cpuinfo_uarch_broadwell;
+-						case 0x4E: // Skylake-U/Y
+-						case 0x55: // Skylake Server (SKX)
+-						case 0x5E: // Skylake-H/S
++						case 0x4E: // Sky Lake Client Y/U
++						case 0x55: // Sky/Cascade/Cooper Lake Server
++						case 0x5E: // Sky Lake Client DT/H/S
++						case 0x8E: // Kaby/Whiskey/Amber/Comet Lake Y/U
++						case 0x9E: // Kaby/Coffee Lake DT/H/S
+ 							return cpuinfo_uarch_sky_lake;
+-						case 0x8E: // Kaby Lake U/Y
+-						case 0x9E: // Kaby Lake H/S
+-							return cpuinfo_uarch_kaby_lake;
++						case 0x66: // Cannon Lake (Core i3-8121U)
++							return cpuinfo_uarch_palm_cove;
++						case 0x6A: // Ice Lake-DE
++						case 0x6C: // Ice Lake-SP
++						case 0x7D: // Ice Lake-Y
++						case 0x7E: // Ice Lake-U
++							return cpuinfo_uarch_sunny_cove;
+ 
+ 						/* Low-power cores */
+ 						case 0x1C: // Diamondville, Silverthorne, Pineview
+@@ -90,18 +96,20 @@ enum cpuinfo_uarch cpuinfo_x86_decode_uarch(
+ 						case 0x35: // Cloverview
+ 						case 0x36: // Cedarview, Centerton 
+ 							return cpuinfo_uarch_saltwell;
+-						case 0x37:
+-						case 0x4A:
+-						case 0x4D:
++						case 0x37: // Bay Trail
++						case 0x4A: // Merrifield
++						case 0x4D: // Avoton, Rangeley
+ 						case 0x5A: // Moorefield
+ 						case 0x5D: // SoFIA
+ 							return cpuinfo_uarch_silvermont;
+-						case 0x4C: // Braswell
+-						case 0x5F: // Denverton
++						case 0x4C: // Braswell, Cherry Trail
+ 						case 0x75: // Spreadtrum SC9853I-IA
+-						case 0x7A: // Goldmont+
+ 							return cpuinfo_uarch_airmont;
+-
++						case 0x5C: // Apollo Lake
++						case 0x5F: // Denverton
++							return cpuinfo_uarch_goldmont;
++						case 0x7A: // Gemini Lake
++							return cpuinfo_uarch_goldmont_plus;
+ 						/* Knights-series cores */
+ 						case 0x57:
+ 							return cpuinfo_uarch_knights_landing;
+@@ -190,7 +198,15 @@ enum cpuinfo_uarch cpuinfo_x86_decode_uarch(
+ 						return cpuinfo_uarch_jaguar;
+ 					}
+ 				case 0x17:
+-					return cpuinfo_uarch_zen;
++					switch (model_info->model) {
++						case 0x01: // 14 nm Naples, Whitehaven, Summit Ridge, Snowy Owl
++						case 0x08: // 12 nm Pinnacle Ridge
++						case 0x11: // 14 nm Raven Ridge
++						case 0x18: // 12 nm Picasso
++							return cpuinfo_uarch_zen;
++						case 0x71: // Matisse
++							return cpuinfo_uarch_zen2;
++					}
+ 			}
+ 			break;
+ 		default:
+diff --git src/x86/windows/init.c src/x86/windows/init.c
+index eb3498a..7a2090e 100644
+--- src/x86/windows/init.c
++++ src/x86/windows/init.c
+@@ -571,6 +571,8 @@ BOOL CALLBACK cpuinfo_x86_windows_init(PINIT_ONCE init_once, PVOID parameter, PV
+ 	cpuinfo_clusters_count = packages_count;
+ 	cpuinfo_packages_count = packages_count;
+ 
++	cpuinfo_max_cache_size = cpuinfo_compute_max_cache_size(&processors[0]);
++
+ 	MemoryBarrier();
+ 
+ 	cpuinfo_is_initialized = true;
+diff --git tools/cache-info.c tools/cache-info.c
+index ba0706f..05f69ee 100644
+--- tools/cache-info.c
++++ tools/cache-info.c
+@@ -60,6 +60,8 @@ int main(int argc, char** argv) {
+ 		fprintf(stderr, "failed to initialize CPU information\n");
+ 		exit(EXIT_FAILURE);
+ 	}
++	printf("Max cache size (upper bound): %"PRIu32" bytes\n", cpuinfo_get_max_cache_size());
++
+ 	if (cpuinfo_get_l1i_caches_count() != 0 && (cpuinfo_get_l1i_cache(0)->flags & CPUINFO_CACHE_UNIFIED) == 0) {
+ 		report_cache(cpuinfo_get_l1i_caches_count(), cpuinfo_get_l1i_cache(0), 1, "instruction");
+ 	}
+diff --git tools/cpu-info.c tools/cpu-info.c
+index caef424..7fa5187 100644
+--- tools/cpu-info.c
++++ tools/cpu-info.c
+@@ -73,8 +73,10 @@ static const char* uarch_to_string(enum cpuinfo_uarch uarch) {
+ 			return "Broadwell";
+ 		case cpuinfo_uarch_sky_lake:
+ 			return "Sky Lake";
+-		case cpuinfo_uarch_kaby_lake:
+-			return "Kaby Lake";
++		case cpuinfo_uarch_palm_cove:
++			return "Palm Cove";
++		case cpuinfo_uarch_sunny_cove:
++			return "Sunny Cove";
+ 		case cpuinfo_uarch_willamette:
+ 			return "Willamette";
+ 		case cpuinfo_uarch_prescott:
+@@ -87,6 +89,10 @@ static const char* uarch_to_string(enum cpuinfo_uarch uarch) {
+ 			return "Silvermont";
+ 		case cpuinfo_uarch_airmont:
+ 			return "Airmont";
++		case cpuinfo_uarch_goldmont:
++			return "Goldmont";
++		case cpuinfo_uarch_goldmont_plus:
++			return "Goldmont Plus";
+ 		case cpuinfo_uarch_knights_ferry:
+ 			return "Knights Ferry";
+ 		case cpuinfo_uarch_knights_corner:
+@@ -117,6 +123,8 @@ static const char* uarch_to_string(enum cpuinfo_uarch uarch) {
+ 			return "Excavator";
+ 		case cpuinfo_uarch_zen:
+ 			return "Zen";
++		case cpuinfo_uarch_zen2:
++			return "Zen 2";
+ 		case cpuinfo_uarch_geode:
+ 			return "Geode";
+ 		case cpuinfo_uarch_bobcat:
+@@ -157,6 +165,8 @@ static const char* uarch_to_string(enum cpuinfo_uarch uarch) {
+ 			return "Cortex-A55";
+ 		case cpuinfo_uarch_cortex_a57:
+ 			return "Cortex-A57";
++		case cpuinfo_uarch_cortex_a65:
++			return "Cortex-A65";
+ 		case cpuinfo_uarch_cortex_a72:
+ 			return "Cortex-A72";
+ 		case cpuinfo_uarch_cortex_a73:
+@@ -165,6 +175,10 @@ static const char* uarch_to_string(enum cpuinfo_uarch uarch) {
+ 			return "Cortex-A75";
+ 		case cpuinfo_uarch_cortex_a76:
+ 			return "Cortex-A76";
++		case cpuinfo_uarch_cortex_a76ae:
++			return "Cortex-A76AE";
++		case cpuinfo_uarch_cortex_a77:
++			return "Cortex-A77";
+ 		case cpuinfo_uarch_scorpion:
+ 			return "Scorpion";
+ 		case cpuinfo_uarch_krait:
+@@ -181,12 +195,16 @@ static const char* uarch_to_string(enum cpuinfo_uarch uarch) {
+ 			return "Denver 2";
+ 		case cpuinfo_uarch_carmel:
+ 			return "Carmel";
+-		case cpuinfo_uarch_mongoose_m1:
+-			return "Mongoose M1";
+-		case cpuinfo_uarch_mongoose_m2:
+-			return "Mongoose M2";
+-		case cpuinfo_uarch_meerkat_m3:
+-			return "Meerkat M3";
++		case cpuinfo_uarch_exynos_m1:
++			return "Exynos M1";
++		case cpuinfo_uarch_exynos_m2:
++			return "Exynos M2";
++		case cpuinfo_uarch_exynos_m3:
++			return "Exynos M3";
++		case cpuinfo_uarch_exynos_m4:
++			return "Exynos M4";
++		case cpuinfo_uarch_exynos_m5:
++			return "Exynos M5";
+ 		case cpuinfo_uarch_swift:
+ 			return "Swift";
+ 		case cpuinfo_uarch_cyclone:
+@@ -258,13 +276,23 @@ int main(int argc, char** argv) {
+ 			printf(", %s %s\n", vendor_string, uarch_string);
+ 		}
+ 	}
+-	printf("Logical processors:\n");
++	printf("Logical processors");
++  #if defined(__linux__)
++    printf(" (System ID)");
++  #endif
++  printf(":\n");
+ 	for (uint32_t i = 0; i < cpuinfo_get_processors_count(); i++) {
+ 		const struct cpuinfo_processor* processor = cpuinfo_get_processor(i);
++    printf("\t%"PRIu32"", i);
++
++    #if defined(__linux__)
++      printf(" (%"PRId32")", processor->linux_id);
++    #endif
++
+ 		#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+-			printf("\t%"PRIu32": APIC ID 0x%08"PRIx32"\n", i, processor->apic_id);
++			printf(": APIC ID 0x%08"PRIx32"\n", processor->apic_id);
+ 		#else
+-			printf("\t%"PRIu32"\n", i);
++			printf("\n");
+ 		#endif
+ 	}
+ }
+diff --git tools/isa-info.c tools/isa-info.c
+index 594c46a..98ef919 100644
+--- tools/isa-info.c
++++ tools/isa-info.c
+@@ -67,6 +67,8 @@ int main(int argc, char** argv) {
+ 		printf("\tAVX512BITALG: %s\n", cpuinfo_has_x86_avx512bitalg() ? "yes" : "no");
+ 		printf("\tAVX512VPOPCNTDQ: %s\n", cpuinfo_has_x86_avx512vpopcntdq() ? "yes" : "no");
+ 		printf("\tAVX512VNNI: %s\n", cpuinfo_has_x86_avx512vnni() ? "yes" : "no");
++		printf("\tAVX512BF16: %s\n", cpuinfo_has_x86_avx512bf16() ? "yes" : "no");
++		printf("\tAVX512VP2INTERSECT: %s\n", cpuinfo_has_x86_avx512vp2intersect() ? "yes" : "no");
+ 		printf("\tAVX512_4VNNIW: %s\n", cpuinfo_has_x86_avx512_4vnniw() ? "yes" : "no");
+ 		printf("\tAVX512_4FMAPS: %s\n", cpuinfo_has_x86_avx512_4fmaps() ? "yes" : "no");
+ 
diff --git a/src/init.c b/src/init.c
index bee5fd4..aa31aeb 100644
--- a/src/init.c
+++ b/src/init.c
@@ -393,8 +393,8 @@
           break;
         case cpuinfo_uarch_cortex_a75:
         case cpuinfo_uarch_cortex_a76:
-        case cpuinfo_uarch_meerkat_m3:
-        case (cpuinfo_uarch_meerkat_m3 + 1):
+        case cpuinfo_uarch_exynos_m3:
+        case cpuinfo_uarch_exynos_m4:
           xnn_params.f32.gemm = (struct gemm_parameters) {
             .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_6x8__aarch64_neonfma_cortex_a75,
             .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_6x8__aarch64_neonfma_cortex_a75,
@@ -405,8 +405,8 @@
           };
           break;
 
-        case cpuinfo_uarch_mongoose_m1:
-        case cpuinfo_uarch_mongoose_m2:
+        case cpuinfo_uarch_exynos_m1:
+        case cpuinfo_uarch_exynos_m2:
           xnn_params.f32.gemm = (struct gemm_parameters) {
             .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_6x8s4__neonfma,
             .igemm = (xnn_igemm_ukernel_function) xnn_f32_igemm_ukernel_6x8s4__neonfma,
diff --git a/third_party/cpuinfo.BUILD b/third_party/cpuinfo.BUILD
index ccec4c2..ad8a070 100644
--- a/third_party/cpuinfo.BUILD
+++ b/third_party/cpuinfo.BUILD
@@ -17,6 +17,7 @@
 COMMON_SRCS = [
     "src/api.c",
     "src/init.c",
+    "src/cache.c",
 ]
 
 # Architecture-specific sources and headers.
diff --git a/third_party/cpuinfo.patch b/third_party/cpuinfo.patch
new file mode 100644
index 0000000..ca3caa9
--- /dev/null
+++ b/third_party/cpuinfo.patch
@@ -0,0 +1,1475 @@
+diff --git CMakeLists.txt CMakeLists.txt
+index e594def..cab4d05 100644
+--- CMakeLists.txt
++++ CMakeLists.txt
+@@ -119,7 +119,8 @@ ENDIF()
+ # ---[ cpuinfo library
+ SET(CPUINFO_SRCS
+   src/init.c
+-  src/api.c)
++  src/api.c
++  src/cache.c)
+ 
+ IF(CPUINFO_SUPPORTED_PLATFORM)
+   IF(CMAKE_SYSTEM_PROCESSOR MATCHES "^(i[3-6]86|AMD64|x86(_64)?)$" OR IOS_ARCH MATCHES "^(i386|x86_64)$")
+diff --git LICENSE LICENSE
+index 4910bfe..3f9a4f0 100644
+--- LICENSE
++++ LICENSE
+@@ -1,3 +1,4 @@
++Copyright (c) 2019 Google LLC
+ Copyright (c) 2017-2018 Facebook Inc.
+ Copyright (C) 2012-2017 Georgia Institute of Technology
+ Copyright (C) 2010-2012 Marat Dukhan
+diff --git include/cpuinfo.h include/cpuinfo.h
+index 7d5833f..9938d2b 100644
+--- include/cpuinfo.h
++++ include/cpuinfo.h
+@@ -38,10 +38,18 @@
+ 	#define CPUINFO_ARCH_PNACL 1
+ #endif
+ 
+-#if defined(EMSCRIPTEN)
++#if defined(__asmjs__)
+ 	#define CPUINFO_ARCH_ASMJS 1
+ #endif
+ 
++#if defined(__wasm__)
++	#if defined(__wasm_simd128__)
++		#define CPUINFO_ARCH_WASMSIMD 1
++	#else
++		#define CPUINFO_ARCH_WASM 1
++	#endif
++#endif
++
+ #if CPUINFO_ARCH_X86 && defined(_MSC_VER)
+ 	#define CPUINFO_ABI __cdecl
+ #elif CPUINFO_ARCH_X86 && defined(__GNUC__)
+@@ -80,6 +88,14 @@
+ 	#define CPUINFO_ARCH_ASMJS 0
+ #endif
+ 
++#ifndef CPUINFO_ARCH_WASM
++	#define CPUINFO_ARCH_WASM 0
++#endif
++
++#ifndef CPUINFO_ARCH_WASMSIMD
++	#define CPUINFO_ARCH_WASMSIMD 0
++#endif
++
+ #define CPUINFO_CACHE_UNIFIED          0x00000001
+ #define CPUINFO_CACHE_INCLUSIVE        0x00000002
+ #define CPUINFO_CACHE_COMPLEX_INDEXING 0x00000004
+@@ -278,10 +294,14 @@ enum cpuinfo_uarch {
+ 	cpuinfo_uarch_haswell      = 0x00100208,
+ 	/** Intel Broadwell microarchitecture. */
+ 	cpuinfo_uarch_broadwell    = 0x00100209,
+-	/** Intel Sky Lake microarchitecture. */
++	/** Intel Sky Lake microarchitecture (14 nm, including Kaby/Coffee/Whiskey/Amber/Comet/Cascade/Cooper Lake). */
+ 	cpuinfo_uarch_sky_lake     = 0x0010020A,
+-	/** Intel Kaby Lake microarchitecture. */
+-	cpuinfo_uarch_kaby_lake    = 0x0010020B,
++	/** DEPRECATED (Intel Kaby Lake microarchitecture). */
++	cpuinfo_uarch_kaby_lake    = 0x0010020A,
++	/** Intel Palm Cove microarchitecture (10 nm, Cannon Lake). */
++	cpuinfo_uarch_palm_cove    = 0x0010020B,
++	/** Intel Sunny Cove microarchitecture (10 nm, Ice Lake). */
++	cpuinfo_uarch_sunny_cove   = 0x0010020C,
+ 
+ 	/** Pentium 4 with Willamette, Northwood, or Foster cores. */
+ 	cpuinfo_uarch_willamette = 0x00100300,
+@@ -289,13 +309,17 @@ enum cpuinfo_uarch {
+ 	cpuinfo_uarch_prescott   = 0x00100301,
+ 
+ 	/** Intel Atom on 45 nm process. */
+-	cpuinfo_uarch_bonnell    = 0x00100400,
++	cpuinfo_uarch_bonnell       = 0x00100400,
+ 	/** Intel Atom on 32 nm process. */
+-	cpuinfo_uarch_saltwell   = 0x00100401,
++	cpuinfo_uarch_saltwell      = 0x00100401,
+ 	/** Intel Silvermont microarchitecture (22 nm out-of-order Atom). */
+-	cpuinfo_uarch_silvermont = 0x00100402,
++	cpuinfo_uarch_silvermont    = 0x00100402,
+ 	/** Intel Airmont microarchitecture (14 nm out-of-order Atom). */
+-	cpuinfo_uarch_airmont    = 0x00100403,
++	cpuinfo_uarch_airmont       = 0x00100403,
++	/** Intel Goldmont microarchitecture (Denverton, Apollo Lake). */
++	cpuinfo_uarch_goldmont      = 0x00100404,
++	/** Intel Goldmont Plus microarchitecture (Gemini Lake). */
++	cpuinfo_uarch_goldmont_plus = 0x00100405,
+ 
+ 	/** Intel Knights Ferry HPC boards. */
+ 	cpuinfo_uarch_knights_ferry   = 0x00100500,
+@@ -335,8 +359,10 @@ enum cpuinfo_uarch {
+ 	cpuinfo_uarch_steamroller = 0x00200107,
+ 	/** AMD Excavator microarchitecture (Carizzo APUs). */
+ 	cpuinfo_uarch_excavator   = 0x00200108,
+-	/** AMD Zen microarchitecture (Ryzen CPUs). */
++	/** AMD Zen microarchitecture (12/14 nm Ryzen and EPYC CPUs). */
+ 	cpuinfo_uarch_zen         = 0x00200109,
++	/** AMD Zen 2 microarchitecture (7 nm Ryzen and EPYC CPUs). */
++	cpuinfo_uarch_zen2        = 0x0020010A,
+ 
+ 	/** NSC Geode and AMD Geode GX and LX. */
+ 	cpuinfo_uarch_geode  = 0x00200200,
+@@ -370,23 +396,34 @@ enum cpuinfo_uarch {
+ 	cpuinfo_uarch_cortex_a17 = 0x00300217,
+ 
+ 	/** ARM Cortex-A32. */
+-	cpuinfo_uarch_cortex_a32 = 0x00300332,
++	cpuinfo_uarch_cortex_a32   = 0x00300332,
+ 	/** ARM Cortex-A35. */
+-	cpuinfo_uarch_cortex_a35 = 0x00300335,
++	cpuinfo_uarch_cortex_a35   = 0x00300335,
+ 	/** ARM Cortex-A53. */
+-	cpuinfo_uarch_cortex_a53 = 0x00300353,
++	cpuinfo_uarch_cortex_a53   = 0x00300353,
+ 	/** ARM Cortex-A55. */
+-	cpuinfo_uarch_cortex_a55 = 0x00300355,
++	cpuinfo_uarch_cortex_a55   = 0x00300355,
+ 	/** ARM Cortex-A57. */
+-	cpuinfo_uarch_cortex_a57 = 0x00300357,
++	cpuinfo_uarch_cortex_a57   = 0x00300357,
++	/** ARM Cortex-A65. */
++	cpuinfo_uarch_cortex_a65   = 0x00300365,
+ 	/** ARM Cortex-A72. */
+-	cpuinfo_uarch_cortex_a72 = 0x00300372,
++	cpuinfo_uarch_cortex_a72   = 0x00300372,
+ 	/** ARM Cortex-A73. */
+-	cpuinfo_uarch_cortex_a73 = 0x00300373,
++	cpuinfo_uarch_cortex_a73   = 0x00300373,
+ 	/** ARM Cortex-A75. */
+-	cpuinfo_uarch_cortex_a75 = 0x00300375,
++	cpuinfo_uarch_cortex_a75   = 0x00300375,
+ 	/** ARM Cortex-A76. */
+-	cpuinfo_uarch_cortex_a76 = 0x00300376,
++	cpuinfo_uarch_cortex_a76   = 0x00300376,
++	/** ARM Cortex-A76AE. */
++	cpuinfo_uarch_cortex_a76ae = 0x00300378,
++	/** ARM Cortex-A77. */
++	cpuinfo_uarch_cortex_a77   = 0x00300377,
++
++	/** ARM Neoverse N1. */
++	cpuinfo_uarch_neoverse_n1  = 0x00300400,
++	/** ARM Neoverse E1. */
++	cpuinfo_uarch_neoverse_e1  = 0x00300401,
+ 
+ 	/** Qualcomm Scorpion. */
+ 	cpuinfo_uarch_scorpion = 0x00400100,
+@@ -406,12 +443,22 @@ enum cpuinfo_uarch {
+ 	/** Nvidia Carmel. */
+ 	cpuinfo_uarch_carmel   = 0x00500102,
+ 
+-	/** Samsung Mongoose M1 (Exynos 8890 big cores). */
++	/** Samsung Exynos M1 (Exynos 8890 big cores). */
++	cpuinfo_uarch_exynos_m1 = 0x00600100,
++	/** Samsung Exynos M2 (Exynos 8895 big cores). */
++	cpuinfo_uarch_exynos_m2 = 0x00600101,
++	/** Samsung Exynos M3 (Exynos 9810 big cores). */
++	cpuinfo_uarch_exynos_m3  = 0x00600102,
++	/** Samsung Exynos M4 (Exynos 9820 big cores). */
++	cpuinfo_uarch_exynos_m4  = 0x00600103,
++	/** Samsung Exynos M5 (Exynos 9830 big cores). */
++	cpuinfo_uarch_exynos_m5  = 0x00600104,
++
++	/* Old names for Exynos. */
+ 	cpuinfo_uarch_mongoose_m1 = 0x00600100,
+-	/** Samsung Mongoose M2 (Exynos 8895 big cores). */
+ 	cpuinfo_uarch_mongoose_m2 = 0x00600101,
+-	/** Samsung Meerkat M3 (Exynos 9810 big cores). */
+ 	cpuinfo_uarch_meerkat_m3  = 0x00600102,
++	cpuinfo_uarch_meerkat_m4  = 0x00600103,
+ 
+ 	/** Apple A6 and A6X processors. */
+ 	cpuinfo_uarch_swift     = 0x00700100,
+@@ -640,6 +687,8 @@ void CPUINFO_ABI cpuinfo_deinitialize(void);
+ 		bool avx512bitalg;
+ 		bool avx512vpopcntdq;
+ 		bool avx512vnni;
++		bool avx512bf16;
++		bool avx512vp2intersect;
+ 		bool avx512_4vnniw;
+ 		bool avx512_4fmaps;
+ 		bool hle;
+@@ -1110,6 +1159,22 @@ static inline bool cpuinfo_has_x86_avx512vnni(void) {
+ 	#endif
+ }
+ 
++static inline bool cpuinfo_has_x86_avx512bf16(void) {
++	#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
++		return cpuinfo_isa.avx512bf16;
++	#else
++		return false;
++	#endif
++}
++
++static inline bool cpuinfo_has_x86_avx512vp2intersect(void) {
++	#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
++		return cpuinfo_isa.avx512vp2intersect;
++	#else
++		return false;
++	#endif
++}
++
+ static inline bool cpuinfo_has_x86_avx512_4vnniw(void) {
+ 	#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+ 		return cpuinfo_isa.avx512_4vnniw;
+@@ -1682,6 +1747,11 @@ uint32_t CPUINFO_ABI cpuinfo_get_l2_caches_count(void);
+ uint32_t CPUINFO_ABI cpuinfo_get_l3_caches_count(void);
+ uint32_t CPUINFO_ABI cpuinfo_get_l4_caches_count(void);
+ 
++/**
++ * Returns upper bound on cache size.
++ */
++uint32_t CPUINFO_ABI cpuinfo_get_max_cache_size(void);
++
+ const struct cpuinfo_processor* CPUINFO_ABI cpuinfo_get_current_processor(void);
+ const struct cpuinfo_core* CPUINFO_ABI cpuinfo_get_current_core(void);
+ 
+diff --git src/api.c src/api.c
+index 98b5805..83744f5 100644
+--- src/api.c
++++ src/api.c
+@@ -18,6 +18,7 @@ uint32_t cpuinfo_cores_count = 0;
+ uint32_t cpuinfo_clusters_count = 0;
+ uint32_t cpuinfo_packages_count = 0;
+ uint32_t cpuinfo_cache_count[cpuinfo_cache_level_max] = { 0 };
++uint32_t cpuinfo_max_cache_size = 0;
+ 
+ 
+ const struct cpuinfo_processor* cpuinfo_get_processors(void) {
+diff --git src/arm/api.h src/arm/api.h
+index 11e588b..69274bc 100644
+--- src/arm/api.h
++++ src/arm/api.h
+@@ -104,6 +104,9 @@ CPUINFO_INTERNAL void cpuinfo_arm_decode_cache(
+ 	struct cpuinfo_cache l1d[restrict static 1],
+ 	struct cpuinfo_cache l2[restrict static 1],
+ 	struct cpuinfo_cache l3[restrict static 1]);
++
++CPUINFO_INTERNAL uint32_t cpuinfo_arm_compute_max_cache_size(
++	const struct cpuinfo_processor processor[restrict static 1]);
+ #else /* defined(__cplusplus) */
+ CPUINFO_INTERNAL void cpuinfo_arm_decode_cache(
+ 	enum cpuinfo_uarch uarch,
+diff --git src/arm/cache.c src/arm/cache.c
+index 5ada7d9..ccadeb4 100644
+--- src/arm/cache.c
++++ src/arm/cache.c
+@@ -1,10 +1,12 @@
+ #include <stdint.h>
+ 
+ #include <cpuinfo.h>
++#include <cpuinfo/internal-api.h>
+ #include <cpuinfo/log.h>
+ #include <arm/api.h>
+ #include <arm/midr.h>
+ 
++
+ void cpuinfo_arm_decode_cache(
+ 	enum cpuinfo_uarch uarch,
+ 	uint32_t cluster_cores,
+@@ -109,7 +111,7 @@ void cpuinfo_arm_decode_cache(
+ 			 *      memory accesses and has been optimized for use with the Cortex-A5 processor.
+ 			 * 8.1.7. Exclusive L2 cache
+ 			 *    The Cortex-A5 processor can be connected to an L2 cache that supports an exclusive cache mode.
+-			 *    This mode must be activated both in the Cortex-A5 processor and in the L2 cache controller. 
++			 *    This mode must be activated both in the Cortex-A5 processor and in the L2 cache controller.
+ 			 *
+ 			 *  +--------------------+-----------+-----------+----------+-----------+
+ 			 *  | Processor model    | L1D cache | L1I cache | L2 cache | Reference |
+@@ -698,7 +700,7 @@ void cpuinfo_arm_decode_cache(
+ 			 * [3] https://en.wikichip.org/wiki/hisilicon/kirin/980
+ 			 */
+ 			if (midr_is_qualcomm_cortex_a55_silver(midr)) {
+-				/* Qualcomm-modified Cortex-A55 in Snapdragon 710 / 845 */
++				/* Qualcomm-modified Cortex-A55 in Snapdragon 670 / 710 / 845 */
+ 				uint32_t l3_size = 1024 * 1024;
+ 				switch (chipset->series) {
+ 					case cpuinfo_arm_chipset_series_qualcomm_snapdragon:
+@@ -827,6 +829,62 @@ void cpuinfo_arm_decode_cache(
+ 				.flags = CPUINFO_CACHE_INCLUSIVE
+ 			};
+ 			break;
++		case cpuinfo_uarch_cortex_a65:
++		{
++			/*
++			 * ARM Cortex‑A65 Core Technical Reference Manual
++			 * A6.1. About the L1 memory system
++			 *   The L1 memory system enhances the performance and power efficiency in the Cortex‑A65 core.
++			 *   It consists of separate instruction and data caches. You can configure instruction and data caches
++			 *   independently during implementation to sizes of 32KB or 64KB.
++			 *
++			 *   L1 instruction-side memory system
++			 *   The L1 instruction-side memory system provides an instruction stream to the DPU. Its key features are:
++			 *    - 64-byte instruction side cache line length.
++			 *    - 4-way set associative L1 instruction cache.
++			 *
++			 *   L1 data-side memory system
++			 *    - 64-byte data side cache line length.
++			 *    - 4-way set associative L1 data cache.
++			 *
++			 * A7.1 About the L2 memory system
++			 *   The Cortex‑A65 L2 memory system is required to interface the Cortex‑A65 cores to the L3 memory system.
++			 *   The L2 memory subsystem consists of:
++			 *    - An optional 4-way, set-associative L2 cache with a configurable size of 64KB, 128KB, or 256KB.
++			 *      Cache lines have a fixed length of 64 bytes.
++			 *
++			 *   The main features of the L2 memory system are:
++			 *    - Strictly exclusive with L1 data cache.
++			 *    - Pseudo-inclusive with L1 instruction cache.
++			 *    - Private per-core unified L2 cache.
++			 */
++			const uint32_t l1_size = 32 * 1024;
++			const uint32_t l2_size = 128 * 1024;
++			const uint32_t l3_size = 512 * 1024;
++			*l1i = (struct cpuinfo_cache) {
++				.size = l1_size,
++				.associativity = 4,
++				.line_size = 64,
++			};
++			*l1d = (struct cpuinfo_cache) {
++				.size = l1_size,
++				.associativity = 4,
++				.line_size = 64,
++			};
++			*l2 = (struct cpuinfo_cache) {
++				.size = l2_size,
++				.associativity = 4,
++				.line_size = 64,
++				.flags = CPUINFO_CACHE_INCLUSIVE
++			};
++			*l3 = (struct cpuinfo_cache) {
++				.size = l3_size,
++				/* DynamIQ */
++				.associativity = 16,
++				.line_size = 64,
++			};
++			break;
++		}
+ 		case cpuinfo_uarch_cortex_a72:
+ 		{
+ 			/*
+@@ -1047,6 +1105,7 @@ void cpuinfo_arm_decode_cache(
+ 			break;
+ 		}
+ 		case cpuinfo_uarch_cortex_a76:
++		case cpuinfo_uarch_cortex_a76ae:
+ 		{
+ 			/*
+ 			 * ARM Cortex-A76 Core Technical Reference Manual
+@@ -1119,6 +1178,57 @@ void cpuinfo_arm_decode_cache(
+ 			};
+ 			break;
+ 		}
++		case cpuinfo_uarch_cortex_a77:
++		{
++			/*
++			 * ARM Cortex-A77 Core Technical Reference Manual
++			 * A6.1. About the L1 memory system
++			 *   The L1 memory system consists of separate instruction and data caches. Both have a fixed size of 64KB.
++			 *
++			 * A6.1.1 L1 instruction-side memory system
++			 *   The L1 instruction memory system has the following key features:
++			 *    - Virtually Indexed, Physically Tagged (VIPT), which behaves as a Physically Indexed,
++			 *      Physically Tagged (PIPT) 4-way set-associative L1 data cache.
++			 *    - Fixed cache line length of 64 bytes.
++			 *
++			 * A6.1.2 L1 data-side memory system
++			 *   The L1 data memory system has the following features:
++			 *    - Virtually Indexed, Physically Tagged (VIPT), which behaves as a Physically Indexed,
++			 *      Physically Tagged (PIPT) 4-way set-associative L1 data cache.
++			 *    - Fixed cache line length of 64 bytes.
++			 *    - Pseudo-LRU cache replacement policy.
++			 *
++			 * A7.1 About the L2 memory system
++			 *   The L2 memory subsystem consist of:
++			 *    - An 8-way set associative L2 cache with a configurable size of 128KB, 256KB or 512KB. Cache lines
++			 *      have a fixed length of 64 bytes.
++			 *    - Strictly inclusive with L1 data cache. Weakly inclusive with L1 instruction cache.
++			 */
++			const uint32_t l2_size = 256 * 1024;
++			const uint32_t l3_size = 1024 * 1024;
++			*l1i = (struct cpuinfo_cache) {
++				.size = 64 * 1024,
++				.associativity = 4,
++				.line_size = 64,
++			};
++			*l1d = (struct cpuinfo_cache) {
++				.size = 64 * 1024,
++				.associativity = 4,
++				.line_size = 64,
++			};
++			*l2 = (struct cpuinfo_cache) {
++				.size = l2_size,
++				.associativity = 8,
++				.line_size = 64,
++				.flags = CPUINFO_CACHE_INCLUSIVE,
++			};
++			*l3 = (struct cpuinfo_cache) {
++				.size = l3_size,
++				.associativity = 16,
++				.line_size = 64,
++			};
++			break;
++		}
+ #if CPUINFO_ARCH_ARM && !defined(__ARM_ARCH_8A__)
+ 		case cpuinfo_uarch_scorpion:
+ 			/*
+@@ -1248,8 +1358,8 @@ void cpuinfo_arm_decode_cache(
+ 				.line_size = 64
+ 			};
+ 			break;
+-		case cpuinfo_uarch_mongoose_m1:
+-		case cpuinfo_uarch_mongoose_m2:
++		case cpuinfo_uarch_exynos_m1:
++		case cpuinfo_uarch_exynos_m2:
+ 			/*
+ 			 * - "Moving past branch prediction we can see some elements of how the cache is set up for the L1 I$,
+ 			 *    namely 64 KB split into four sets with 128-byte line sizes for 128 cache lines per set" [1]
+@@ -1283,7 +1393,7 @@ void cpuinfo_arm_decode_cache(
+ 				.line_size = 64
+ 			};
+ 			break;
+-		case cpuinfo_uarch_meerkat_m3:
++		case cpuinfo_uarch_exynos_m3:
+ 			/*
+ 			 *  +--------------------+-------+-----------+-----------+-----------+----------+------------+
+ 			 *  | Processor model    | Cores | L1D cache | L1I cache | L2 cache  | L3 cache | Reference  |
+@@ -1294,19 +1404,19 @@ void cpuinfo_arm_decode_cache(
+ 			 * [1] https://www.anandtech.com/show/12478/exynos-9810-handson-awkward-first-results
+ 			 */
+ 			*l1i = (struct cpuinfo_cache) {
+-				.size = 64 * 1024 /* assume same as in Mongoose cores */,
+-				.associativity = 4 /* assume same as in Mongoose cores */,
+-				.line_size = 128 /* assume same as in Mongoose cores */
++				.size = 64 * 1024 /* assume same as in Exynos M1/M2 cores */,
++				.associativity = 4 /* assume same as in Exynos M1/M2 cores */,
++				.line_size = 128 /* assume same as in Exynos M1/M2 cores */
+ 			};
+ 			*l1d = (struct cpuinfo_cache) {
+ 				.size = 64 * 1024,
+-				.associativity = 8 /* assume same as in Mongoose cores */,
+-				.line_size = 64 /* assume same as in Mongoose cores */,
++				.associativity = 8 /* assume same as in Exynos M1/M2 cores */,
++				.line_size = 64 /* assume same as in Exynos M1/M2 cores */,
+ 			};
+ 			*l2 = (struct cpuinfo_cache) {
+ 				.size = 512 * 1024,
+-				.associativity = 16 /* assume same as in Mongoose cores */,
+-				.line_size = 64 /* assume same as in Mongoose cores */,
++				.associativity = 16 /* assume same as in Exynos M1/M2 cores */,
++				.line_size = 64 /* assume same as in Exynos M1/M2 cores */,
+ 			};
+ 			*l3 = (struct cpuinfo_cache) {
+ 				.size = 4 * 1024 * 1024,
+@@ -1393,3 +1503,124 @@ void cpuinfo_arm_decode_cache(
+ 		}
+ 	}
+ }
++
++uint32_t cpuinfo_arm_compute_max_cache_size(const struct cpuinfo_processor* processor) {
++	/*
++	 * There is no precise way to detect cache size on ARM/ARM64, and cache size reported by cpuinfo
++	 * may underestimate the actual cache size. Thus, we use microarchitecture-specific maximum.
++	 */
++	switch (processor->core->uarch) {
++		case cpuinfo_uarch_xscale:
++		case cpuinfo_uarch_arm11:
++		case cpuinfo_uarch_scorpion:
++		case cpuinfo_uarch_krait:
++		case cpuinfo_uarch_kryo:
++		case cpuinfo_uarch_exynos_m1:
++		case cpuinfo_uarch_exynos_m2:
++		case cpuinfo_uarch_exynos_m3:
++			/* cpuinfo-detected cache size always correct */
++			return cpuinfo_compute_max_cache_size(processor);
++		case cpuinfo_uarch_cortex_a5:
++			/* Max observed (NXP Vybrid SoC) */
++			return 512 * 1024;
++		case cpuinfo_uarch_cortex_a7:
++			/*
++			 * Cortex-A7 MPCore Technical Reference Manual:
++			 * 7.1. About the L2 Memory system
++			 *   The L2 memory system consists of an:
++			 *    - Optional tightly-coupled L2 cache that includes:
++			 *      - Configurable L2 cache size of 128KB, 256KB, 512KB, and 1MB.
++			 */
++			return 1024 * 1024;
++		case cpuinfo_uarch_cortex_a8:
++			/*
++			 * Cortex-A8 Technical Reference Manual:
++			 * 8.1. About the L2 memory system
++			 *   The key features of the L2 memory system include:
++			 *    - configurable cache size of 0KB, 128KB, 256KB, 512KB, and 1MB
++			 */
++			return 1024 * 1024;
++		case cpuinfo_uarch_cortex_a9:
++			/* Max observed (e.g. Exynos 4212) */
++			return 1024 * 1024;
++		case cpuinfo_uarch_cortex_a12:
++		case cpuinfo_uarch_cortex_a17:
++			/*
++			 * ARM Cortex-A17 MPCore Processor Technical Reference Manual:
++			 * 7.1. About the L2 Memory system
++			 *   The key features of the L2 memory system include:
++			 *    - An integrated L2 cache:
++			 *      - The cache size is implemented as either 256KB, 512KB, 1MB, 2MB, 4MB or 8MB.
++			 */
++			return 8 * 1024 * 1024;
++		case cpuinfo_uarch_cortex_a15:
++			/*
++			 * ARM Cortex-A15 MPCore Processor Technical Reference Manual:
++			 * 7.1. About the L2 memory system
++			 *   The features of the L2 memory system include:
++			 *    - Configurable L2 cache size of 512KB, 1MB, 2MB and 4MB.
++			 */
++			return 4 * 1024 * 1024;
++		case cpuinfo_uarch_cortex_a35:
++			/*
++			 * ARM Cortex‑A35 Processor Technical Reference Manual:
++			 * 7.1 About the L2 memory system
++			 *   L2 cache
++			 *    - Further features of the L2 cache are:
++			 *      - Configurable size of 128KB, 256KB, 512KB, and 1MB.
++			 */
++			return 1024 * 1024;
++		case cpuinfo_uarch_cortex_a53:
++			/*
++			 * ARM Cortex-A53 MPCore Processor Technical Reference Manual:
++			 * 7.1. About the L2 memory system
++			 *   The L2 memory system consists of an:
++			 *    - Optional tightly-coupled L2 cache that includes:
++			 *      - Configurable L2 cache size of 128KB, 256KB, 512KB, 1MB and 2MB.
++			 */
++			return 2 * 1024 * 1024;
++		case cpuinfo_uarch_cortex_a57:
++			/*
++			 * ARM Cortex-A57 MPCore Processor Technical Reference Manual:
++			 * 7.1 About the L2 memory system
++			 *   The features of the L2 memory system include:
++			 *    - Configurable L2 cache size of 512KB, 1MB, and 2MB.
++			 */
++			return 2 * 1024 * 1024;
++		case cpuinfo_uarch_cortex_a72:
++			/*
++			 * ARM Cortex-A72 MPCore Processor Technical Reference Manual:
++			 * 7.1 About the L2 memory system
++			 *   The features of the L2 memory system include:
++			 *    - Configurable L2 cache size of 512KB, 1MB, 2MB and 4MB.
++			 */
++			return 4 * 1024 * 1024;
++		case cpuinfo_uarch_cortex_a73:
++			/*
++			 * ARM Cortex‑A73 MPCore Processor Technical Reference Manual
++			 * 7.1 About the L2 memory system
++			 *   The L2 memory system consists of:
++			 *    - A tightly-integrated L2 cache with:
++			 *       - A configurable size of 256KB, 512KB, 1MB, 2MB, 4MB, or 8MB.
++			 */
++			return 8 * 1024 * 1024;
++		case cpuinfo_uarch_cortex_a55:
++		case cpuinfo_uarch_cortex_a75:
++		case cpuinfo_uarch_cortex_a76:
++		case cpuinfo_uarch_exynos_m4:
++		default:
++			/*
++			 * ARM DynamIQ Shared Unit Technical Reference Manual
++			 * 1.3 Implementation options
++			 *   L3_CACHE_SIZE
++			 *    - 256KB
++			 *    - 512KB
++			 *    - 1024KB
++			 *    - 1536KB
++			 *    - 2048KB
++			 *    - 3072KB
++			 *    - 4096KB
++			 */
++			return 4 * 1024 * 1024;
++	}
++}
+diff --git src/arm/linux/init.c src/arm/linux/init.c
+index a297f63..f0c432c 100644
+--- src/arm/linux/init.c
++++ src/arm/linux/init.c
+@@ -678,6 +678,8 @@ void cpuinfo_arm_linux_init(void) {
+ 	cpuinfo_cache_count[cpuinfo_cache_level_2]  = l2_count;
+ 	cpuinfo_cache_count[cpuinfo_cache_level_3]  = l3_count;
+ 
++	cpuinfo_max_cache_size = cpuinfo_arm_compute_max_cache_size(&processors[0]);
++
+ 	__sync_synchronize();
+ 
+ 	cpuinfo_is_initialized = true;
+diff --git src/arm/linux/midr.c src/arm/linux/midr.c
+index 668fc72..2c3116b 100644
+--- src/arm/linux/midr.c
++++ src/arm/linux/midr.c
+@@ -220,7 +220,7 @@ static const struct cluster_config cluster_configs[] = {
+ 		.model = UINT16_C(7420),
+ 		.clusters = 2,
+ 		.cluster_cores = {
+-			[0] = 4, 
++			[0] = 4,
+ 			[1] = 4,
+ 		},
+ 		.cluster_midr = {
+@@ -229,7 +229,7 @@ static const struct cluster_config cluster_configs[] = {
+ 		},
+ 	},
+ 	{
+-		/* Exynos 8890: 4x Mongoose + 4x Cortex-A53 */
++		/* Exynos 8890: 4x Exynos M1 + 4x Cortex-A53 */
+ 		.cores = 8,
+ 		.series = cpuinfo_arm_chipset_series_samsung_exynos,
+ 		.model = UINT16_C(8890),
+@@ -695,7 +695,7 @@ static void cpuinfo_arm_linux_detect_cluster_midr_by_sequential_scan(
+ 		if (bitmask_all(processors[i].flags, CPUINFO_LINUX_FLAG_VALID)) {
+ 			if (processors[i].package_leader_id == i) {
+ 				if (bitmask_all(processors[i].flags, CPUINFO_ARM_LINUX_VALID_MIDR)) {
+-					midr = processors[i].midr;	
++					midr = processors[i].midr;
+ 				} else {
+ 					cpuinfo_log_info("assume processor %"PRIu32" to have MIDR %08"PRIx32, i, midr);
+ 					/* To be consistent, we copy the MIDR entirely, rather than by parts */
+@@ -836,7 +836,7 @@ uint32_t cpuinfo_arm_linux_detect_cluster_midr(
+ 			 *    - Clusters preceeding the first reported MIDR value are assumed to have the last reported MIDR value.
+ 			 *    - Clusters following any reported MIDR value to have that MIDR value.
+ 			 */
+-			
++
+ 			if (cpuinfo_arm_linux_detect_cluster_midr_by_chipset(
+ 				chipset, clusters_count, cluster_leaders, usable_processors, processors, true))
+ 			{
+diff --git src/arm/mach/init.c src/arm/mach/init.c
+index 5b14b49..e64cc18 100644
+--- src/arm/mach/init.c
++++ src/arm/mach/init.c
+@@ -562,6 +562,8 @@ void cpuinfo_arm_mach_init(void) {
+ 	cpuinfo_clusters_count = num_clusters;
+ 	cpuinfo_packages_count = mach_topology.packages;
+ 
++	cpuinfo_max_cache_size = cpuinfo_compute_max_cache_size(&processors[0]);
++
+ 	__sync_synchronize();
+ 
+ 	cpuinfo_is_initialized = true;
+diff --git src/arm/midr.h src/arm/midr.h
+index 6363ed7..d5a28e3 100644
+--- src/arm/midr.h
++++ src/arm/midr.h
+@@ -33,31 +33,31 @@
+ #define CPUINFO_ARM_MIDR_KRYO_SILVER_821 UINT32_C(0x510F2010)
+ #define CPUINFO_ARM_MIDR_KRYO_GOLD       UINT32_C(0x510F2050)
+ #define CPUINFO_ARM_MIDR_KRYO_SILVER_820 UINT32_C(0x510F2110)
+-#define CPUINFO_ARM_MIDR_MONGOOSE        UINT32_C(0x530F0010)
++#define CPUINFO_ARM_MIDR_EXYNOS_M1_M2    UINT32_C(0x530F0010)
+ #define CPUINFO_ARM_MIDR_DENVER2         UINT32_C(0x4E0F0030)
+ 
+ inline static uint32_t midr_set_implementer(uint32_t midr, uint32_t implementer) {
+-	return (midr & ~CPUINFO_ARM_MIDR_IMPLEMENTER_MASK) | 
++	return (midr & ~CPUINFO_ARM_MIDR_IMPLEMENTER_MASK) |
+ 		((implementer << CPUINFO_ARM_MIDR_IMPLEMENTER_OFFSET) & CPUINFO_ARM_MIDR_IMPLEMENTER_MASK);
+ }
+ 
+ inline static uint32_t midr_set_variant(uint32_t midr, uint32_t variant) {
+-	return (midr & ~CPUINFO_ARM_MIDR_VARIANT_MASK) | 
++	return (midr & ~CPUINFO_ARM_MIDR_VARIANT_MASK) |
+ 		((variant << CPUINFO_ARM_MIDR_VARIANT_OFFSET) & CPUINFO_ARM_MIDR_VARIANT_MASK);
+ }
+ 
+ inline static uint32_t midr_set_architecture(uint32_t midr, uint32_t architecture) {
+-	return (midr & ~CPUINFO_ARM_MIDR_ARCHITECTURE_MASK) | 
++	return (midr & ~CPUINFO_ARM_MIDR_ARCHITECTURE_MASK) |
+ 		((architecture << CPUINFO_ARM_MIDR_ARCHITECTURE_OFFSET) & CPUINFO_ARM_MIDR_ARCHITECTURE_MASK);
+ }
+ 
+ inline static uint32_t midr_set_part(uint32_t midr, uint32_t part) {
+-	return (midr & ~CPUINFO_ARM_MIDR_PART_MASK) | 
++	return (midr & ~CPUINFO_ARM_MIDR_PART_MASK) |
+ 		((part << CPUINFO_ARM_MIDR_PART_OFFSET) & CPUINFO_ARM_MIDR_PART_MASK);
+ }
+ 
+ inline static uint32_t midr_set_revision(uint32_t midr, uint32_t revision) {
+-	return (midr & ~CPUINFO_ARM_MIDR_REVISION_MASK) | 
++	return (midr & ~CPUINFO_ARM_MIDR_REVISION_MASK) |
+ 		((revision << CPUINFO_ARM_MIDR_REVISION_OFFSET) & CPUINFO_ARM_MIDR_REVISION_MASK);
+ }
+ 
+@@ -171,13 +171,20 @@ inline static bool midr_is_kryo_gold(uint32_t midr) {
+ inline static uint32_t midr_score_core(uint32_t midr) {
+ 	const uint32_t core_mask = CPUINFO_ARM_MIDR_IMPLEMENTER_MASK | CPUINFO_ARM_MIDR_PART_MASK;
+ 	switch (midr & core_mask) {
++		case UINT32_C(0x53000040): /* Exynos M5 */
++		case UINT32_C(0x53000030): /* Exynos M4 */
++			/* These cores are in big role w.r.t Cortex-A75 or Cortex-A76 */
++			return 6;
+ 		case UINT32_C(0x4E000030): /* Denver 2 */
+-		case UINT32_C(0x53000010): /* Mongoose */
+-		case UINT32_C(0x53000020): /* Meerkat */
++		case UINT32_C(0x53000010): /* Exynos M1 and Exynos M2 */
++		case UINT32_C(0x53000020): /* Exynos M3 */
++		case UINT32_C(0x51008040): /* Kryo 485 Gold / Gold Prime */
+ 		case UINT32_C(0x51008020): /* Kryo 385 Gold */
+ 		case UINT32_C(0x51008000): /* Kryo 260 / 280 Gold */
+ 		case UINT32_C(0x51002050): /* Kryo Gold */
+ 		case UINT32_C(0x4800D400): /* Cortex-A76 (HiSilicon) */
++		case UINT32_C(0x4100D0D0): /* Cortex-A77 */
++		case UINT32_C(0x4100D0E0): /* Cortex-A76AE */
+ 		case UINT32_C(0x4100D0B0): /* Cortex-A76 */
+ 		case UINT32_C(0x4100D0A0): /* Cortex-A75 */
+ 		case UINT32_C(0x4100D090): /* Cortex-A73 */
+@@ -191,12 +198,14 @@ inline static uint32_t midr_score_core(uint32_t midr) {
+ 		case UINT32_C(0x4100D070): /* Cortex-A57 */
+ 			/* Cortex-A57 can be in LITTLE role w.r.t. Denver 2, or in big role w.r.t. Cortex-A53 */
+ 			return 4;
++		case UINT32_C(0x4100D060): /* Cortex-A65 */
+ 		case UINT32_C(0x4100D050): /* Cortex-A55 */
+ 		case UINT32_C(0x4100D030): /* Cortex-A53 */
+ 			/* Cortex-A53 is usually in LITTLE role, but can be in big role w.r.t. Cortex-A35 */
+ 			return 2;
+ 		case UINT32_C(0x4100D040): /* Cortex-A35 */
+ 		case UINT32_C(0x4100C070): /* Cortex-A7 */
++		case UINT32_C(0x51008050): /* Kryo 485 Silver */
+ 		case UINT32_C(0x51008030): /* Kryo 385 Silver */
+ 		case UINT32_C(0x51008010): /* Kryo 260 / 280 Silver */
+ 		case UINT32_C(0x51002110): /* Kryo Silver (Snapdragon 820) */
+@@ -215,7 +224,7 @@ inline static uint32_t midr_score_core(uint32_t midr) {
+ }
+ 
+ inline static uint32_t midr_little_core_for_big(uint32_t midr) {
+-	const uint32_t core_mask = 
++	const uint32_t core_mask =
+ 		CPUINFO_ARM_MIDR_IMPLEMENTER_MASK | CPUINFO_ARM_MIDR_ARCHITECTURE_MASK | CPUINFO_ARM_MIDR_PART_MASK;
+ 	switch (midr & core_mask) {
+ 		case CPUINFO_ARM_MIDR_CORTEX_A75:
+@@ -223,7 +232,7 @@ inline static uint32_t midr_little_core_for_big(uint32_t midr) {
+ 		case CPUINFO_ARM_MIDR_CORTEX_A73:
+ 		case CPUINFO_ARM_MIDR_CORTEX_A72:
+ 		case CPUINFO_ARM_MIDR_CORTEX_A57:
+-		case CPUINFO_ARM_MIDR_MONGOOSE:
++		case CPUINFO_ARM_MIDR_EXYNOS_M1_M2:
+ 			return CPUINFO_ARM_MIDR_CORTEX_A53;
+ 		case CPUINFO_ARM_MIDR_CORTEX_A17:
+ 		case CPUINFO_ARM_MIDR_CORTEX_A15:
+diff --git src/arm/uarch.c src/arm/uarch.c
+index d7d2c63..a38250a 100644
+--- src/arm/uarch.c
++++ src/arm/uarch.c
+@@ -60,6 +60,9 @@ void cpuinfo_arm_decode_vendor_uarch(
+ 				case 0xD05:
+ 					*uarch = cpuinfo_uarch_cortex_a55;
+ 					break;
++				case 0xD06:
++					*uarch = cpuinfo_uarch_cortex_a65;
++					break;
+ 				case 0xD07:
+ 					*uarch = cpuinfo_uarch_cortex_a57;
+ 					break;
+@@ -75,6 +78,22 @@ void cpuinfo_arm_decode_vendor_uarch(
+ 				case 0xD0B:
+ 					*uarch = cpuinfo_uarch_cortex_a76;
+ 					break;
++#if CPUINFO_ARCH_ARM64 && !defined(__ANDROID__)
++				case 0xD0C:
++					*uarch = cpuinfo_uarch_neoverse_n1;
++					break;
++#endif /* CPUINFO_ARCH_ARM64 && !defined(__ANDROID__) */
++				case 0xD0D:
++					*uarch = cpuinfo_uarch_cortex_a77;
++					break;
++				case 0xD0E:
++					*uarch = cpuinfo_uarch_cortex_a76ae;
++					break;
++#if CPUINFO_ARCH_ARM64 && !defined(__ANDROID__)
++				case 0xD4A:
++					*uarch = cpuinfo_uarch_neoverse_e1;
++					break;
++#endif /* CPUINFO_ARCH_ARM64 && !defined(__ANDROID__) */
+ 				default:
+ 					switch (midr_get_part(midr) >> 8) {
+ #if CPUINFO_ARCH_ARM
+@@ -242,10 +261,14 @@ void cpuinfo_arm_decode_vendor_uarch(
+ 					*vendor = cpuinfo_vendor_arm;
+ 					*uarch = cpuinfo_uarch_cortex_a55;
+ 					break;
+-				case 0x804:
++				case 0x804: /* High-performance Kryo 485 "Gold" / "Gold Prime" -> Cortex-A76 */
+ 					*vendor = cpuinfo_vendor_arm;
+ 					*uarch = cpuinfo_uarch_cortex_a76;
+ 					break;
++				case 0x805: /* Low-performance Kryo 485 "Silver" -> Cortex-A55 */
++					*vendor = cpuinfo_vendor_arm;
++					*uarch = cpuinfo_uarch_cortex_a55;
++					break;
+ #if CPUINFO_ARCH_ARM64 && !defined(__ANDROID__)
+ 				case 0xC00:
+ 					*uarch = cpuinfo_uarch_falkor;
+@@ -263,27 +286,43 @@ void cpuinfo_arm_decode_vendor_uarch(
+ 			switch (midr & (CPUINFO_ARM_MIDR_VARIANT_MASK | CPUINFO_ARM_MIDR_PART_MASK)) {
+ 				case 0x00100010:
+ 					/*
+-					 * Exynos 8890 MIDR = 0x531F0011, assume Mongoose M1 has:
++					 * Exynos 8890 MIDR = 0x531F0011, assume Exynos M1 has:
+ 					 * - CPU variant 0x1
+ 					 * - CPU part 0x001
+ 					 */
+-					*uarch = cpuinfo_uarch_mongoose_m1;
++					*uarch = cpuinfo_uarch_exynos_m1;
+ 					break;
+ 				case 0x00400010:
+ 					/*
+-					 * Exynos 8895 MIDR = 0x534F0010, assume Mongoose M2 has:
++					 * Exynos 8895 MIDR = 0x534F0010, assume Exynos M2 has:
+ 					 * - CPU variant 0x4
+ 					 * - CPU part 0x001
+ 					 */
+-					*uarch = cpuinfo_uarch_mongoose_m2;
++					*uarch = cpuinfo_uarch_exynos_m2;
+ 					break;
+ 				case 0x00100020:
+ 					/*
+-					 * Exynos 9810 MIDR = 0x531F0020, assume Meerkat M3 has:
++					 * Exynos 9810 MIDR = 0x531F0020, assume Exynos M3 has:
+ 					 * - CPU variant 0x1
+ 					 * - CPU part 0x002
+ 					 */
+-					*uarch = cpuinfo_uarch_meerkat_m3;
++					*uarch = cpuinfo_uarch_exynos_m3;
++					break;
++				case 0x00100030:
++					/*
++					 * Exynos 9820 MIDR = 0x531F0030, assume Exynos M4 has:
++					 * - CPU variant 0x1
++					 * - CPU part 0x003
++					 */
++					*uarch = cpuinfo_uarch_exynos_m4;
++					break;
++				case 0x00100040:
++					/*
++					 * Exynos 9820 MIDR = 0x531F0040, assume Exynos M5 has:
++					 * - CPU variant 0x1
++					 * - CPU part 0x004
++					 */
++					*uarch = cpuinfo_uarch_exynos_m5;
+ 					break;
+ 				default:
+ 					cpuinfo_log_warning("unknown Samsung CPU variant 0x%01"PRIx32" part 0x%03"PRIx32" ignored",
+diff --git src/cache.c src/cache.c
+new file mode 100644
+index 0000000..b976b87
+--- /dev/null
++++ src/cache.c
+@@ -0,0 +1,18 @@
++#include <stddef.h>
++
++#include <cpuinfo.h>
++#include <cpuinfo/internal-api.h>
++
++
++uint32_t cpuinfo_compute_max_cache_size(const struct cpuinfo_processor* processor) {
++  if (processor->cache.l4 != NULL) {
++    return processor->cache.l4->size;
++  } else if (processor->cache.l3 != NULL) {
++    return processor->cache.l3->size;
++  } else if (processor->cache.l2 != NULL) {
++    return processor->cache.l2->size;
++  } else if (processor->cache.l1d != NULL) {
++    return processor->cache.l1d->size;
++  }
++  return 0;
++}
+diff --git src/cpuinfo/internal-api.h src/cpuinfo/internal-api.h
+index 6045750..717b810 100644
+--- src/cpuinfo/internal-api.h
++++ src/cpuinfo/internal-api.h
+@@ -31,6 +31,7 @@ extern CPUINFO_INTERNAL uint32_t cpuinfo_cores_count;
+ extern CPUINFO_INTERNAL uint32_t cpuinfo_clusters_count;
+ extern CPUINFO_INTERNAL uint32_t cpuinfo_packages_count;
+ extern CPUINFO_INTERNAL uint32_t cpuinfo_cache_count[cpuinfo_cache_level_max];
++extern CPUINFO_INTERNAL uint32_t cpuinfo_max_cache_size;
+ 
+ CPUINFO_PRIVATE void cpuinfo_x86_mach_init(void);
+ CPUINFO_PRIVATE void cpuinfo_x86_linux_init(void);
+@@ -40,4 +41,6 @@ CPUINFO_PRIVATE void cpuinfo_x86_linux_init(void);
+ CPUINFO_PRIVATE void cpuinfo_arm_mach_init(void);
+ CPUINFO_PRIVATE void cpuinfo_arm_linux_init(void);
+ 
++CPUINFO_PRIVATE uint32_t cpuinfo_compute_max_cache_size(const struct cpuinfo_processor* processor);
++
+ typedef void (*cpuinfo_processor_callback)(uint32_t);
+diff --git src/x86/isa.c src/x86/isa.c
+index bca1ecd..d27dbca 100644
+--- src/x86/isa.c
++++ src/x86/isa.c
+@@ -42,8 +42,10 @@ struct cpuinfo_x86_isa cpuinfo_x86_detect_isa(
+ {
+ 	struct cpuinfo_x86_isa isa = { 0 };
+ 
+-	const struct cpuid_regs structured_feature_info =
++	const struct cpuid_regs structured_feature_info0 =
+ 		(max_base_index >= 7) ? cpuidex(7, 0) : (struct cpuid_regs) { 0, 0, 0, 0};
++	const struct cpuid_regs structured_feature_info1 =
++		(max_base_index >= 7) ? cpuidex(7, 1) : (struct cpuid_regs) { 0, 0, 0, 0};
+ 
+ 	const uint32_t processor_capacity_info_index = UINT32_C(0x80000008);
+ 	const struct cpuid_regs processor_capacity_info =
+@@ -144,9 +146,9 @@ struct cpuinfo_x86_isa cpuinfo_x86_detect_isa(
+ 
+ 	/*
+ 	 * CLFLUSHOPT instruction:
+-	 * - Intel: ebx[bit 23] in structured feature info.
++	 * - Intel: ebx[bit 23] in structured feature info (ecx = 0).
+ 	 */
+-	isa.clflushopt = !!(structured_feature_info.ebx & UINT32_C(0x00800000));
++	isa.clflushopt = !!(structured_feature_info0.ebx & UINT32_C(0x00800000));
+ 
+ 	/*
+ 	 * MWAIT/MONITOR instructions:
+@@ -273,9 +275,9 @@ struct cpuinfo_x86_isa cpuinfo_x86_detect_isa(
+ 
+ 	/*
+ 	 * PREFETCHWT1 instruction:
+-	 * - Intel: ecx[bit 0] of structured feature info. Reserved bit on AMD.
++	 * - Intel: ecx[bit 0] of structured feature info (ecx = 0). Reserved bit on AMD.
+ 	 */
+-	isa.prefetchwt1 = !!(structured_feature_info.ecx & UINT32_C(0x00000001));
++	isa.prefetchwt1 = !!(structured_feature_info0.ecx & UINT32_C(0x00000001));
+ 
+ #if CPUINFO_ARCH_X86
+ 	/*
+@@ -386,111 +388,123 @@ struct cpuinfo_x86_isa cpuinfo_x86_detect_isa(
+ 
+ 	/*
+ 	 * AVX2 instructions:
+-	 * - Intel: ebx[bit 5] in structured feature info.
++	 * - Intel: ebx[bit 5] in structured feature info (ecx = 0).
+ 	 */
+-	isa.avx2 = avx_regs && !!(structured_feature_info.ebx & UINT32_C(0x00000020));
++	isa.avx2 = avx_regs && !!(structured_feature_info0.ebx & UINT32_C(0x00000020));
+ 
+ 	/*
+ 	 * AVX512F instructions:
+-	 * - Intel: ebx[bit 16] in structured feature info.
++	 * - Intel: ebx[bit 16] in structured feature info (ecx = 0).
+ 	 */
+-	isa.avx512f = avx512_regs && !!(structured_feature_info.ebx & UINT32_C(0x00010000));
++	isa.avx512f = avx512_regs && !!(structured_feature_info0.ebx & UINT32_C(0x00010000));
+ 
+ 	/*
+ 	 * AVX512PF instructions:
+-	 * - Intel: ebx[bit 26] in structured feature info.
++	 * - Intel: ebx[bit 26] in structured feature info (ecx = 0).
+ 	 */
+-	isa.avx512pf = avx512_regs && !!(structured_feature_info.ebx & UINT32_C(0x04000000));
++	isa.avx512pf = avx512_regs && !!(structured_feature_info0.ebx & UINT32_C(0x04000000));
+ 
+ 	/*
+ 	 * AVX512ER instructions:
+-	 * - Intel: ebx[bit 27] in structured feature info.
++	 * - Intel: ebx[bit 27] in structured feature info (ecx = 0).
+ 	 */
+-	isa.avx512er = avx512_regs && !!(structured_feature_info.ebx & UINT32_C(0x08000000));
++	isa.avx512er = avx512_regs && !!(structured_feature_info0.ebx & UINT32_C(0x08000000));
+ 
+ 	/*
+ 	 * AVX512CD instructions:
+-	 * - Intel: ebx[bit 28] in structured feature info.
++	 * - Intel: ebx[bit 28] in structured feature info (ecx = 0).
+ 	 */
+-	isa.avx512cd = avx512_regs && !!(structured_feature_info.ebx & UINT32_C(0x10000000));
++	isa.avx512cd = avx512_regs && !!(structured_feature_info0.ebx & UINT32_C(0x10000000));
+ 
+ 	/*
+ 	 * AVX512DQ instructions:
+-	 * - Intel: ebx[bit 17] in structured feature info.
++	 * - Intel: ebx[bit 17] in structured feature info (ecx = 0).
+ 	 */
+-	isa.avx512dq = avx512_regs && !!(structured_feature_info.ebx & UINT32_C(0x00020000));
++	isa.avx512dq = avx512_regs && !!(structured_feature_info0.ebx & UINT32_C(0x00020000));
+ 
+ 	/*
+ 	 * AVX512BW instructions:
+-	 * - Intel: ebx[bit 30] in structured feature info.
++	 * - Intel: ebx[bit 30] in structured feature info (ecx = 0).
+ 	 */
+-	isa.avx512bw = avx512_regs && !!(structured_feature_info.ebx & UINT32_C(0x40000000));
++	isa.avx512bw = avx512_regs && !!(structured_feature_info0.ebx & UINT32_C(0x40000000));
+ 
+ 	/*
+ 	 * AVX512VL instructions:
+-	 * - Intel: ebx[bit 31] in structured feature info.
++	 * - Intel: ebx[bit 31] in structured feature info (ecx = 0).
+ 	 */
+-	isa.avx512vl = avx512_regs && !!(structured_feature_info.ebx & UINT32_C(0x80000000));
++	isa.avx512vl = avx512_regs && !!(structured_feature_info0.ebx & UINT32_C(0x80000000));
+ 
+ 	/*
+ 	 * AVX512IFMA instructions:
+-	 * - Intel: ebx[bit 21] in structured feature info.
++	 * - Intel: ebx[bit 21] in structured feature info (ecx = 0).
+ 	 */
+-	isa.avx512ifma = avx512_regs && !!(structured_feature_info.ebx & UINT32_C(0x00200000));
++	isa.avx512ifma = avx512_regs && !!(structured_feature_info0.ebx & UINT32_C(0x00200000));
+ 
+ 	/*
+ 	 * AVX512VBMI instructions:
+-	 * - Intel: ecx[bit 1] in structured feature info.
++	 * - Intel: ecx[bit 1] in structured feature info (ecx = 0).
+ 	 */
+-	isa.avx512vbmi = avx512_regs && !!(structured_feature_info.ecx & UINT32_C(0x00000002));
++	isa.avx512vbmi = avx512_regs && !!(structured_feature_info0.ecx & UINT32_C(0x00000002));
+ 
+ 	/*
+ 	 * AVX512VBMI2 instructions:
+-	 * - Intel: ecx[bit 6] in structured feature info.
++	 * - Intel: ecx[bit 6] in structured feature info (ecx = 0).
+ 	 */
+-	isa.avx512vbmi2 = avx512_regs && !!(structured_feature_info.ecx & UINT32_C(0x00000040));
++	isa.avx512vbmi2 = avx512_regs && !!(structured_feature_info0.ecx & UINT32_C(0x00000040));
+ 
+ 	/*
+ 	 * AVX512BITALG instructions:
+-	 * - Intel: ecx[bit 12] in structured feature info.
++	 * - Intel: ecx[bit 12] in structured feature info (ecx = 0).
+ 	 */
+-	isa.avx512bitalg = avx512_regs && !!(structured_feature_info.ecx & UINT32_C(0x00001000));
++	isa.avx512bitalg = avx512_regs && !!(structured_feature_info0.ecx & UINT32_C(0x00001000));
+ 
+ 	/*
+ 	 * AVX512VPOPCNTDQ instructions:
+-	 * - Intel: ecx[bit 14] in structured feature info.
++	 * - Intel: ecx[bit 14] in structured feature info (ecx = 0).
+ 	 */
+-	isa.avx512vpopcntdq = avx512_regs && !!(structured_feature_info.ecx & UINT32_C(0x00004000));
++	isa.avx512vpopcntdq = avx512_regs && !!(structured_feature_info0.ecx & UINT32_C(0x00004000));
+ 
+ 	/*
+ 	 * AVX512VNNI instructions:
+-	 * - Intel: ecx[bit 11] in structured feature info.
++	 * - Intel: ecx[bit 11] in structured feature info (ecx = 0).
+ 	 */
+-	isa.avx512vnni = avx512_regs && !!(structured_feature_info.ecx & UINT32_C(0x00000800));
++	isa.avx512vnni = avx512_regs && !!(structured_feature_info0.ecx & UINT32_C(0x00000800));
+ 
+ 	/*
+ 	 * AVX512_4VNNIW instructions:
+-	 * - Intel: edx[bit 2] in structured feature info.
++	 * - Intel: edx[bit 2] in structured feature info (ecx = 0).
+ 	 */
+-	isa.avx512_4vnniw = avx512_regs && !!(structured_feature_info.edx & UINT32_C(0x00000004));
++	isa.avx512_4vnniw = avx512_regs && !!(structured_feature_info0.edx & UINT32_C(0x00000004));
+ 
+ 	/*
+ 	 * AVX512_4FMAPS instructions:
+-	 * - Intel: edx[bit 3] in structured feature info.
++	 * - Intel: edx[bit 3] in structured feature info (ecx = 0).
+ 	 */
+-	isa.avx512_4fmaps = avx512_regs && !!(structured_feature_info.edx & UINT32_C(0x00000008));
++	isa.avx512_4fmaps = avx512_regs && !!(structured_feature_info0.edx & UINT32_C(0x00000008));
++
++	/*
++	 * AVX512_VP2INTERSECT instructions:
++	 * - Intel: edx[bit 8] in structured feature info (ecx = 0).
++	 */
++	isa.avx512vp2intersect = avx512_regs && !!(structured_feature_info0.edx & UINT32_C(0x00000100));
++
++	/*
++	 * AVX512_BF16 instructions:
++	 * - Intel: eax[bit 5] in structured feature info (ecx = 1).
++	 */
++	isa.avx512bf16 = avx512_regs && !!(structured_feature_info1.eax & UINT32_C(0x00000020));
+ 
+ 	/*
+ 	 * HLE instructions:
+-	 * - Intel: ebx[bit 4] in structured feature info.
++	 * - Intel: ebx[bit 4] in structured feature info (ecx = 0).
+ 	 */
+-	isa.hle = !!(structured_feature_info.ebx & UINT32_C(0x00000010));
++	isa.hle = !!(structured_feature_info0.ebx & UINT32_C(0x00000010));
+ 
+ 	/*
+ 	 * RTM instructions:
+-	 * - Intel: ebx[bit 11] in structured feature info.
++	 * - Intel: ebx[bit 11] in structured feature info (ecx = 0).
+ 	 */
+-	isa.rtm = !!(structured_feature_info.ebx & UINT32_C(0x00000800));
++	isa.rtm = !!(structured_feature_info0.ebx & UINT32_C(0x00000800));
+ 
+ 	/*
+ 	 * XTEST instruction:
+@@ -500,9 +514,9 @@ struct cpuinfo_x86_isa cpuinfo_x86_detect_isa(
+ 
+ 	/*
+ 	 * MPX registers and instructions:
+-	 * - Intel: ebx[bit 14] in structured feature info.
++	 * - Intel: ebx[bit 14] in structured feature info (ecx = 0).
+ 	 */
+-	isa.mpx = mpx_regs && !!(structured_feature_info.ebx & UINT32_C(0x00004000));
++	isa.mpx = mpx_regs && !!(structured_feature_info0.ebx & UINT32_C(0x00004000));
+ 
+ #if CPUINFO_ARCH_X86
+ 	/*
+@@ -528,9 +542,9 @@ struct cpuinfo_x86_isa cpuinfo_x86_detect_isa(
+ 
+ 	/*
+ 	 * CLWB instruction:
+-	 * - Intel: ebx[bit 24] in structured feature info.
++	 * - Intel: ebx[bit 24] in structured feature info (ecx = 0).
+ 	 */
+-	isa.clwb = !!(structured_feature_info.ebx & UINT32_C(0x01000000));
++	isa.clwb = !!(structured_feature_info0.ebx & UINT32_C(0x01000000));
+ 
+ 	/*
+ 	 * MOVBE instruction:
+@@ -549,9 +563,9 @@ struct cpuinfo_x86_isa cpuinfo_x86_detect_isa(
+ 
+ 	/*
+ 	 * RDFSBASE/RDGSBASE/WRFSBASE/WRGSBASE instructions.
+-	 * - Intel: ebx[bit 0] in structured feature info.
++	 * - Intel: ebx[bit 0] in structured feature info (ecx = 0).
+ 	 */
+-	isa.fs_gs_base = !!(structured_feature_info.ebx & UINT32_C(0x00000001));
++	isa.fs_gs_base = !!(structured_feature_info0.ebx & UINT32_C(0x00000001));
+ 
+ 	/*
+ 	 * LZCNT instruction:
+@@ -573,21 +587,21 @@ struct cpuinfo_x86_isa cpuinfo_x86_detect_isa(
+ 
+ 	/*
+ 	 * BMI instructions:
+-	 * - Intel, AMD: ebx[bit 3] in structured feature info.
++	 * - Intel, AMD: ebx[bit 3] in structured feature info (ecx = 0).
+ 	 */
+-	isa.bmi = !!(structured_feature_info.ebx & UINT32_C(0x00000008));
++	isa.bmi = !!(structured_feature_info0.ebx & UINT32_C(0x00000008));
+ 
+ 	/*
+ 	 * BMI2 instructions:
+-	 * - Intel: ebx[bit 8] in structured feature info.
++	 * - Intel: ebx[bit 8] in structured feature info (ecx = 0).
+ 	 */
+-	isa.bmi2 = !!(structured_feature_info.ebx & UINT32_C(0x00000100));
++	isa.bmi2 = !!(structured_feature_info0.ebx & UINT32_C(0x00000100));
+ 
+ 	/*
+ 	 * ADCX/ADOX instructions:
+-	 * - Intel: ebx[bit 19] in structured feature info.
++	 * - Intel: ebx[bit 19] in structured feature info (ecx = 0).
+ 	 */
+-	isa.adx = !!(structured_feature_info.ebx & UINT32_C(0x00080000));
++	isa.adx = !!(structured_feature_info0.ebx & UINT32_C(0x00080000));
+ 
+ 	/*
+ 	 * AES instructions:
+@@ -597,9 +611,9 @@ struct cpuinfo_x86_isa cpuinfo_x86_detect_isa(
+ 
+ 	/*
+ 	 * VAES instructions:
+-	 * - Intel: ecx[bit 9] in structured feature info.
++	 * - Intel: ecx[bit 9] in structured feature info (ecx = 0).
+ 	 */
+-	isa.vaes = !!(structured_feature_info.ecx & UINT32_C(0x00000200));
++	isa.vaes = !!(structured_feature_info0.ecx & UINT32_C(0x00000200));
+ 
+ 	/*
+ 	 * PCLMULQDQ instruction:
+@@ -609,15 +623,15 @@ struct cpuinfo_x86_isa cpuinfo_x86_detect_isa(
+ 
+ 	/*
+ 	 * VPCLMULQDQ instruction:
+-	 * - Intel: ecx[bit 10] in structured feature info.
++	 * - Intel: ecx[bit 10] in structured feature info (ecx = 0).
+ 	 */
+-	isa.vpclmulqdq = !!(structured_feature_info.ecx & UINT32_C(0x00000400));
++	isa.vpclmulqdq = !!(structured_feature_info0.ecx & UINT32_C(0x00000400));
+ 
+ 	/*
+ 	 * GFNI instructions:
+-	 * - Intel: ecx[bit 8] in structured feature info.
++	 * - Intel: ecx[bit 8] in structured feature info (ecx = 0).
+ 	 */
+-	isa.gfni = !!(structured_feature_info.ecx & UINT32_C(0x00000100));
++	isa.gfni = !!(structured_feature_info0.ecx & UINT32_C(0x00000100));
+ 
+ 	/*
+ 	 * RDRAND instruction:
+@@ -627,15 +641,15 @@ struct cpuinfo_x86_isa cpuinfo_x86_detect_isa(
+ 
+ 	/*
+ 	 * RDSEED instruction:
+-	 * - Intel: ebx[bit 18] in structured feature info.
++	 * - Intel: ebx[bit 18] in structured feature info (ecx = 0).
+ 	 */
+-	isa.rdseed = !!(structured_feature_info.ebx & UINT32_C(0x00040000));
++	isa.rdseed = !!(structured_feature_info0.ebx & UINT32_C(0x00040000));
+ 
+ 	/*
+ 	 * SHA instructions:
+-	 * - Intel: ebx[bit 29] in structured feature info.
++	 * - Intel: ebx[bit 29] in structured feature info (ecx = 0).
+ 	 */
+-	isa.sha = !!(structured_feature_info.ebx & UINT32_C(0x20000000));
++	isa.sha = !!(structured_feature_info0.ebx & UINT32_C(0x20000000));
+ 
+ 	if (vendor == cpuinfo_vendor_via) {
+ 		const struct cpuid_regs padlock_meta_info = cpuid(UINT32_C(0xC0000000));
+@@ -700,9 +714,9 @@ struct cpuinfo_x86_isa cpuinfo_x86_detect_isa(
+ 
+ 	/*
+ 	 * RDPID instruction:
+-	 * - Intel: ecx[bit 22] in structured feature info.
++	 * - Intel: ecx[bit 22] in structured feature info (ecx = 0).
+ 	 */
+-	isa.rdpid = !!(structured_feature_info.ecx & UINT32_C(0x00400000));
++	isa.rdpid = !!(structured_feature_info0.ecx & UINT32_C(0x00400000));
+ 
+ 	return isa;
+ }
+diff --git src/x86/linux/init.c src/x86/linux/init.c
+index b5f74d0..c096336 100644
+--- src/x86/linux/init.c
++++ src/x86/linux/init.c
+@@ -592,6 +592,8 @@ void cpuinfo_x86_linux_init(void) {
+ 	cpuinfo_cache_count[cpuinfo_cache_level_3]  = l3_count;
+ 	cpuinfo_cache_count[cpuinfo_cache_level_4]  = l4_count;
+ 
++	cpuinfo_max_cache_size = cpuinfo_compute_max_cache_size(&processors[0]);
++
+ 	__sync_synchronize();
+ 
+ 	cpuinfo_is_initialized = true;
+diff --git src/x86/mach/init.c src/x86/mach/init.c
+index 7b41ad0..ae2be33 100644
+--- src/x86/mach/init.c
++++ src/x86/mach/init.c
+@@ -327,6 +327,8 @@ void cpuinfo_x86_mach_init(void) {
+ 	cpuinfo_clusters_count = mach_topology.packages;
+ 	cpuinfo_packages_count = mach_topology.packages;
+ 
++	cpuinfo_max_cache_size = cpuinfo_compute_max_cache_size(&processors[0]);
++
+ 	__sync_synchronize();
+ 
+ 	cpuinfo_is_initialized = true;
+diff --git src/x86/uarch.c src/x86/uarch.c
+index 71c899e..ba72d8a 100644
+--- src/x86/uarch.c
++++ src/x86/uarch.c
+@@ -74,13 +74,19 @@ enum cpuinfo_uarch cpuinfo_x86_decode_uarch(
+ 						case 0x4F: // Broadwell-E
+ 						case 0x56: // Broadwell-DE
+ 							return cpuinfo_uarch_broadwell;
+-						case 0x4E: // Skylake-U/Y
+-						case 0x55: // Skylake Server (SKX)
+-						case 0x5E: // Skylake-H/S
++						case 0x4E: // Sky Lake Client Y/U
++						case 0x55: // Sky/Cascade/Cooper Lake Server
++						case 0x5E: // Sky Lake Client DT/H/S
++						case 0x8E: // Kaby/Whiskey/Amber/Comet Lake Y/U
++						case 0x9E: // Kaby/Coffee Lake DT/H/S
+ 							return cpuinfo_uarch_sky_lake;
+-						case 0x8E: // Kaby Lake U/Y
+-						case 0x9E: // Kaby Lake H/S
+-							return cpuinfo_uarch_kaby_lake;
++						case 0x66: // Cannon Lake (Core i3-8121U)
++							return cpuinfo_uarch_palm_cove;
++						case 0x6A: // Ice Lake-DE
++						case 0x6C: // Ice Lake-SP
++						case 0x7D: // Ice Lake-Y
++						case 0x7E: // Ice Lake-U
++							return cpuinfo_uarch_sunny_cove;
+ 
+ 						/* Low-power cores */
+ 						case 0x1C: // Diamondville, Silverthorne, Pineview
+@@ -90,18 +96,20 @@ enum cpuinfo_uarch cpuinfo_x86_decode_uarch(
+ 						case 0x35: // Cloverview
+ 						case 0x36: // Cedarview, Centerton 
+ 							return cpuinfo_uarch_saltwell;
+-						case 0x37:
+-						case 0x4A:
+-						case 0x4D:
++						case 0x37: // Bay Trail
++						case 0x4A: // Merrifield
++						case 0x4D: // Avoton, Rangeley
+ 						case 0x5A: // Moorefield
+ 						case 0x5D: // SoFIA
+ 							return cpuinfo_uarch_silvermont;
+-						case 0x4C: // Braswell
+-						case 0x5F: // Denverton
++						case 0x4C: // Braswell, Cherry Trail
+ 						case 0x75: // Spreadtrum SC9853I-IA
+-						case 0x7A: // Goldmont+
+ 							return cpuinfo_uarch_airmont;
+-
++						case 0x5C: // Apollo Lake
++						case 0x5F: // Denverton
++							return cpuinfo_uarch_goldmont;
++						case 0x7A: // Gemini Lake
++							return cpuinfo_uarch_goldmont_plus;
+ 						/* Knights-series cores */
+ 						case 0x57:
+ 							return cpuinfo_uarch_knights_landing;
+@@ -190,7 +198,15 @@ enum cpuinfo_uarch cpuinfo_x86_decode_uarch(
+ 						return cpuinfo_uarch_jaguar;
+ 					}
+ 				case 0x17:
+-					return cpuinfo_uarch_zen;
++					switch (model_info->model) {
++						case 0x01: // 14 nm Naples, Whitehaven, Summit Ridge, Snowy Owl
++						case 0x08: // 12 nm Pinnacle Ridge
++						case 0x11: // 14 nm Raven Ridge
++						case 0x18: // 12 nm Picasso
++							return cpuinfo_uarch_zen;
++						case 0x71: // Matisse
++							return cpuinfo_uarch_zen2;
++					}
+ 			}
+ 			break;
+ 		default:
+diff --git src/x86/windows/init.c src/x86/windows/init.c
+index eb3498a..7a2090e 100644
+--- src/x86/windows/init.c
++++ src/x86/windows/init.c
+@@ -571,6 +571,8 @@ BOOL CALLBACK cpuinfo_x86_windows_init(PINIT_ONCE init_once, PVOID parameter, PV
+ 	cpuinfo_clusters_count = packages_count;
+ 	cpuinfo_packages_count = packages_count;
+ 
++	cpuinfo_max_cache_size = cpuinfo_compute_max_cache_size(&processors[0]);
++
+ 	MemoryBarrier();
+ 
+ 	cpuinfo_is_initialized = true;
+diff --git tools/cache-info.c tools/cache-info.c
+index ba0706f..05f69ee 100644
+--- tools/cache-info.c
++++ tools/cache-info.c
+@@ -60,6 +60,8 @@ int main(int argc, char** argv) {
+ 		fprintf(stderr, "failed to initialize CPU information\n");
+ 		exit(EXIT_FAILURE);
+ 	}
++	printf("Max cache size (upper bound): %"PRIu32" bytes\n", cpuinfo_get_max_cache_size());
++
+ 	if (cpuinfo_get_l1i_caches_count() != 0 && (cpuinfo_get_l1i_cache(0)->flags & CPUINFO_CACHE_UNIFIED) == 0) {
+ 		report_cache(cpuinfo_get_l1i_caches_count(), cpuinfo_get_l1i_cache(0), 1, "instruction");
+ 	}
+diff --git tools/cpu-info.c tools/cpu-info.c
+index caef424..7fa5187 100644
+--- tools/cpu-info.c
++++ tools/cpu-info.c
+@@ -73,8 +73,10 @@ static const char* uarch_to_string(enum cpuinfo_uarch uarch) {
+ 			return "Broadwell";
+ 		case cpuinfo_uarch_sky_lake:
+ 			return "Sky Lake";
+-		case cpuinfo_uarch_kaby_lake:
+-			return "Kaby Lake";
++		case cpuinfo_uarch_palm_cove:
++			return "Palm Cove";
++		case cpuinfo_uarch_sunny_cove:
++			return "Sunny Cove";
+ 		case cpuinfo_uarch_willamette:
+ 			return "Willamette";
+ 		case cpuinfo_uarch_prescott:
+@@ -87,6 +89,10 @@ static const char* uarch_to_string(enum cpuinfo_uarch uarch) {
+ 			return "Silvermont";
+ 		case cpuinfo_uarch_airmont:
+ 			return "Airmont";
++		case cpuinfo_uarch_goldmont:
++			return "Goldmont";
++		case cpuinfo_uarch_goldmont_plus:
++			return "Goldmont Plus";
+ 		case cpuinfo_uarch_knights_ferry:
+ 			return "Knights Ferry";
+ 		case cpuinfo_uarch_knights_corner:
+@@ -117,6 +123,8 @@ static const char* uarch_to_string(enum cpuinfo_uarch uarch) {
+ 			return "Excavator";
+ 		case cpuinfo_uarch_zen:
+ 			return "Zen";
++		case cpuinfo_uarch_zen2:
++			return "Zen 2";
+ 		case cpuinfo_uarch_geode:
+ 			return "Geode";
+ 		case cpuinfo_uarch_bobcat:
+@@ -157,6 +165,8 @@ static const char* uarch_to_string(enum cpuinfo_uarch uarch) {
+ 			return "Cortex-A55";
+ 		case cpuinfo_uarch_cortex_a57:
+ 			return "Cortex-A57";
++		case cpuinfo_uarch_cortex_a65:
++			return "Cortex-A65";
+ 		case cpuinfo_uarch_cortex_a72:
+ 			return "Cortex-A72";
+ 		case cpuinfo_uarch_cortex_a73:
+@@ -165,6 +175,10 @@ static const char* uarch_to_string(enum cpuinfo_uarch uarch) {
+ 			return "Cortex-A75";
+ 		case cpuinfo_uarch_cortex_a76:
+ 			return "Cortex-A76";
++		case cpuinfo_uarch_cortex_a76ae:
++			return "Cortex-A76AE";
++		case cpuinfo_uarch_cortex_a77:
++			return "Cortex-A77";
+ 		case cpuinfo_uarch_scorpion:
+ 			return "Scorpion";
+ 		case cpuinfo_uarch_krait:
+@@ -181,12 +195,16 @@ static const char* uarch_to_string(enum cpuinfo_uarch uarch) {
+ 			return "Denver 2";
+ 		case cpuinfo_uarch_carmel:
+ 			return "Carmel";
+-		case cpuinfo_uarch_mongoose_m1:
+-			return "Mongoose M1";
+-		case cpuinfo_uarch_mongoose_m2:
+-			return "Mongoose M2";
+-		case cpuinfo_uarch_meerkat_m3:
+-			return "Meerkat M3";
++		case cpuinfo_uarch_exynos_m1:
++			return "Exynos M1";
++		case cpuinfo_uarch_exynos_m2:
++			return "Exynos M2";
++		case cpuinfo_uarch_exynos_m3:
++			return "Exynos M3";
++		case cpuinfo_uarch_exynos_m4:
++			return "Exynos M4";
++		case cpuinfo_uarch_exynos_m5:
++			return "Exynos M5";
+ 		case cpuinfo_uarch_swift:
+ 			return "Swift";
+ 		case cpuinfo_uarch_cyclone:
+@@ -258,13 +276,23 @@ int main(int argc, char** argv) {
+ 			printf(", %s %s\n", vendor_string, uarch_string);
+ 		}
+ 	}
+-	printf("Logical processors:\n");
++	printf("Logical processors");
++  #if defined(__linux__)
++    printf(" (System ID)");
++  #endif
++  printf(":\n");
+ 	for (uint32_t i = 0; i < cpuinfo_get_processors_count(); i++) {
+ 		const struct cpuinfo_processor* processor = cpuinfo_get_processor(i);
++    printf("\t%"PRIu32"", i);
++
++    #if defined(__linux__)
++      printf(" (%"PRId32")", processor->linux_id);
++    #endif
++
+ 		#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
+-			printf("\t%"PRIu32": APIC ID 0x%08"PRIx32"\n", i, processor->apic_id);
++			printf(": APIC ID 0x%08"PRIx32"\n", processor->apic_id);
+ 		#else
+-			printf("\t%"PRIu32"\n", i);
++			printf("\n");
+ 		#endif
+ 	}
+ }
+diff --git tools/isa-info.c tools/isa-info.c
+index 594c46a..98ef919 100644
+--- tools/isa-info.c
++++ tools/isa-info.c
+@@ -67,6 +67,8 @@ int main(int argc, char** argv) {
+ 		printf("\tAVX512BITALG: %s\n", cpuinfo_has_x86_avx512bitalg() ? "yes" : "no");
+ 		printf("\tAVX512VPOPCNTDQ: %s\n", cpuinfo_has_x86_avx512vpopcntdq() ? "yes" : "no");
+ 		printf("\tAVX512VNNI: %s\n", cpuinfo_has_x86_avx512vnni() ? "yes" : "no");
++		printf("\tAVX512BF16: %s\n", cpuinfo_has_x86_avx512bf16() ? "yes" : "no");
++		printf("\tAVX512VP2INTERSECT: %s\n", cpuinfo_has_x86_avx512vp2intersect() ? "yes" : "no");
+ 		printf("\tAVX512_4VNNIW: %s\n", cpuinfo_has_x86_avx512_4vnniw() ? "yes" : "no");
+ 		printf("\tAVX512_4FMAPS: %s\n", cpuinfo_has_x86_avx512_4fmaps() ? "yes" : "no");
+