#include <stdint.h>

#include <cpuinfo.h>
#include <log.h>
#include <arm/api.h>
#include <arm/midr.h>

void cpuinfo_arm_decode_cache(
	enum cpuinfo_uarch uarch,
	uint32_t cluster_cores,
	uint32_t midr,
	const struct cpuinfo_arm_chipset chipset[restrict static 1],
	uint32_t cluster_id,
	uint32_t arch_version,
	struct cpuinfo_cache l1i[restrict static 1],
	struct cpuinfo_cache l1d[restrict static 1],
	struct cpuinfo_cache l2[restrict static 1])
{
	switch (uarch) {
		case cpuinfo_uarch_xscale:
			switch (midr_get_part(midr) >> 8) {
				case 2:
					/*
					 * PXA 210/25X/26X
					 *
					 * See "Computer Organization and Design, Revised Printing: The Hardware/Software Interface"
					 *     by David A. Patterson, John L. Hennessy
					 */
					*l1i = (struct cpuinfo_cache) {
						.size = 16 * 1024,
						.associativity = 32,
						.line_size = 32
					};
					*l1d = (struct cpuinfo_cache) {
						.size = 16 * 1024,
						.associativity = 4,
						.line_size = 64
					};
					break;
				case 4:
					/* PXA 27X */
					*l1i = (struct cpuinfo_cache) {
						.size = 32 * 1024,
						.associativity = 32,
						.line_size = 32
					};
					*l1d = (struct cpuinfo_cache) {
						.size = 32 * 1024,
						.associativity = 32,
						.line_size = 32
					};
					break;
				case 6:
					/*
					 * PXA 3XX
					 *
					 * See http://download.intel.com/design/intelxscale/31628302.pdf
					 */
					*l1i = (struct cpuinfo_cache) {
						.size = 32 * 1024,
						.associativity = 4,
						.line_size = 32
					};
					*l1d = (struct cpuinfo_cache) {
						.size = 32 * 1024,
						.associativity = 4,
						.line_size = 32
					};
					*l2 = (struct cpuinfo_cache) {
						.size = 256 * 1024,
						.associativity = 8,
						.line_size = 32
					};
					break;
			}
			break;
		case cpuinfo_uarch_arm11:
			*l1i = (struct cpuinfo_cache) {
				.size = 16 * 1024,
				.associativity = 4,
				.line_size = 32
			};
			*l1d = (struct cpuinfo_cache) {
				.size = 16 * 1024,
				.associativity = 4,
				.line_size = 32
			};
			break;
		case cpuinfo_uarch_cortex_a5:
			/*
			 * Cortex-A5 Technical Reference Manual:
			 * 7.1.1. Memory system
			 *   The Cortex-A5 processor has separate instruction and data caches.
			 *   The caches have the following features:
			 *    - Data cache is 4-way set-associative.
			 *    - Instruction cache is 2-way set-associative.
			 *    - The cache line length is eight words.
			 *    - You can configure the instruction and data caches independently during implementation
			 *      to sizes of 4KB, 8KB, 16KB, 32KB, or 64KB.
			 * 1.1.3. System design components
			 *    PrimeCell Level 2 Cache Controller (PL310)
			 *      The addition of an on-chip secondary cache, also referred to as a Level 2 or L2 cache, is a
			 *      recognized method of improving the performance of ARM-based systems when significant memory traffic
			 *      is generated by the processor. The PrimeCell Level 2 Cache Controller reduces the number of external
			 *      memory accesses and has been optimized for use with the Cortex-A5 processor.
			 * 8.1.7. Exclusive L2 cache
			 *    The Cortex-A5 processor can be connected to an L2 cache that supports an exclusive cache mode.
			 *    This mode must be activated both in the Cortex-A5 processor and in the L2 cache controller. 
			 *
			 *  +--------------------+-----------+-----------+----------+-----------+
			 *  | Processor model    | L1D cache | L1I cache | L2 cache | Reference |
			 *  +--------------------+-----------+-----------+----------+-----------+
			 *  | Qualcomm MSM7225A  |           |           |          |           |
			 *  | Qualcomm MSM7625A  |           |           |          |           |
			 *  | Qualcomm MSM7227A  |           |           |          |           |
			 *  | Qualcomm MSM7627A  |    32K    |    32K    |   256K   | Wiki [1]  |
			 *  | Qualcomm MSM7225AB |           |           |          |           |
			 *  | Qualcomm MSM7225AB |           |           |          |           |
			 *  | Qualcomm QSD8250   |           |           |          |           |
			 *  | Qualcomm QSD8650   |           |           |          |           |
			 *  +--------------------+-----------+-----------+----------+-----------+
			 *  | Spreadtrum SC6821  |    32K    |    32K    |    ?     |           |
			 *  | Spreadtrum SC6825  |    32K    |    32K    |   256K   | Wiki [2]  |
			 *  | Spreadtrum SC8810  |     ?     |     ?     |    ?     |           |
			 *  | Spreadtrum SC8825  |    32K    |    32K    |    ?     |           |
			 *  +--------------------+-----------+-----------+----------+-----------+
			 *
			 * [1] https://en.wikipedia.org/wiki/List_of_Qualcomm_Snapdragon_systems-on-chip#Snapdragon_S1
			 * [2] https://en.wikipedia.org/wiki/Spreadtrum
			 */
			*l1i = (struct cpuinfo_cache) {
				.size = 32 * 1024,
				.associativity = 2,
				.line_size = 32
			};
			*l1d = (struct cpuinfo_cache) {
				.size = 32 * 1024,
				.associativity = 4,
				.line_size = 32
			};
			*l2 = (struct cpuinfo_cache) {
				.size = 256 * 1024,
				/*
				 * Follow NXP specification: "Eight-way set-associative 512 kB L2 cache with 32B line size"
				 * Reference: http://www.nxp.com/assets/documents/data/en/application-notes/AN4947.pdf
				 */
				.associativity = 8,
				.line_size = 32
			};
			break;
		case cpuinfo_uarch_cortex_a7:
			/*
			 * Cortex-A7 MPCore Technical Reference Manual:
			 * 6.1. About the L1 memory system
			 *   The L1 memory system consists of separate instruction and data caches. You can configure the
			 *   instruction and data caches independently during implementation to sizes of 8KB, 16KB, 32KB, or 64KB.
			 *
			 *   The L1 instruction memory system has the following features:
			 *    - Instruction side cache line length of 32-bytes.
			 *    - 2-way set-associative instruction cache.
			 *
			 *   The L1 data memory system has the following features:
			 *    - Data side cache line length of 64-bytes.
			 *    - 4-way set-associative data cache.
			 *
			 * 7.1. About the L2 Memory system
			 *   The L2 memory system consists of an:
			 *    - Optional tightly-coupled L2 cache that includes:
			 *      - Configurable L2 cache size of 128KB, 256KB, 512KB, and 1MB.
			 *      - Fixed line length of 64 bytes
			 *      - 8-way set-associative cache structure
			 *
			 *  +--------------------+-------+-----------+-----------+-----------+-----------+
			 *  | Processor model    | Cores | L1D cache | L1I cache | L2 cache  | Reference |
			 *  +--------------------+-------+-----------+-----------+-----------+-----------+
			 *  | Allwinner A20      |   2   |    32K    |    32K    |   256K    |    [1]    |
			 *  | Allwinner A23      |   2   |    32K    |    32K    |   256K    |    [2]    |
			 *  | Allwinner A31      |   4   |    32K    |    32K    |    1M     |    [3]    |
			 *  | Allwinner A31s     |   4   |    32K    |    32K    |    1M     |    [4]    |
			 *  | Allwinner A33      |   4   |    32K    |    32K    |   512K    |    [5]    |
			 *  | Allwinner A80 Octa | 4(+4) |    32K    |    32K    | 512K(+2M) |    [6]    |
			 *  | Allwinner A81T     |   8   |    32K    |    32K    |    1M     |    [7]    |
			 *  +--------------------+-------+-----------+-----------+-----------+-----------+
			 *  | Broadcom BCM2836   |   4   |    32K    |    32K    |    512K   |    [8]    |
			 *  +--------------------+-------+-----------+-----------+-----------+-----------+
			 *
			 * [1] https://linux-sunxi.org/A20
			 * [2] https://linux-sunxi.org/A23
			 * [3] http://dl.linux-sunxi.org/A31/A3x_release_document/A31/IC/A31%20datasheet%20V1.3%2020131106.pdf
			 * [4] https://github.com/allwinner-zh/documents/blob/master/A31s/A31s_Datasheet_v1.5_20150510.pdf
			 * [5] http://dl.linux-sunxi.org/A33/A33_Datasheet_release1.0.pdf
			 * [6] https://linux-sunxi.org/images/1/10/A80_Datasheet_Revision_1.0_0404.pdf
			 * [7] http://dl.linux-sunxi.org/A83T/A83T_datasheet_Revision_1.1.pdf
			 * [8] https://www.raspberrypi.org/forums/viewtopic.php?t=98428
			 */
			*l1i = (struct cpuinfo_cache) {
				.size = 32 * 1024,
				.associativity = 2,
				.line_size = 32
			};
			*l1d = (struct cpuinfo_cache) {
				.size = 32 * 1024,
				.associativity = 4,
				.line_size = 64
			};
			*l2 = (struct cpuinfo_cache) {
				.size = 128 * 1024 * cluster_cores,
				.associativity = 8,
				.line_size = 64
			};
			break;
		case cpuinfo_uarch_cortex_a8:
			/*
			 * Cortex-A8 Technical Reference Manual:
			 * 7.1. About the L1 memory system
			 *    The L1 memory system consists of separate instruction and data caches in a Harvard arrangement.
			 *    The L1 memory system provides the core with:
			 *     - fixed line length of 64 bytes
			 *     - support for 16KB or 32KB caches
			 *     - 4-way set associative cache structure
			 * 8.1. About the L2 memory system
			 *    The L2 memory system is tightly coupled to the L1 data cache and L1 instruction cache.
			 *    The key features of the L2 memory system include:
			 *     - configurable cache size of 0KB, 128KB, 256KB, 512KB, and 1MB
			 *     - fixed line length of 64 bytes
			 *     - 8-way set associative cache structure
			 *
			 *  +----------------------+-----------+-----------+-----------+-----------+
			 *  | Processor model      | L1D cache | L1I cache | L2 cache  | Reference |
			 *  +----------------------+-----------+-----------+-----------+-----------+
			 *  | Exynos 3 Single 3110 |    32K    |    32K    |   512K    |    [1]    |
			 *  +----------------------+-----------+-----------+-----------+-----------+
			 *  | TI DM 3730           |    32K    |    32K    |   256K    |    [2]    |
			 *  +----------------------+-----------+-----------+-----------+-----------+
			 *
			 * [1] https://en.wikichip.org/w/images/0/04/Exynos_3110.pdf
			 * [2] https://www.ti.com/lit/ds/symlink/dm3725.pdf
			 */
			*l1i = (struct cpuinfo_cache) {
				.size = 32 * 1024,
				.associativity = 4,
				.line_size = 64
			};
			*l1d = (struct cpuinfo_cache) {
				.size = 32 * 1024,
				.associativity = 4,
				.line_size = 64
			};
			*l2 = (struct cpuinfo_cache) {
				.associativity = 8,
				.line_size = 64
			};
			switch (chipset->vendor) {
				case cpuinfo_arm_chipset_vendor_samsung:
					l2->size = 512 * 1024;
					break;
				default:
					l2->size = 256 * 1024;
					break;
			}

			break;
		case cpuinfo_uarch_cortex_a9:
			/*
			 * ARM Cortex‑A9 Technical Reference Manual:
			 * 7.1.1 Memory system
			 *    The Cortex‑A9 processor has separate instruction and data caches.
			 *    The caches have the following features:
			 *     - Both caches are 4-way set-associative.
			 *     - The cache line length is eight words.
			 *     - You can configure the instruction and data caches independently during implementation
			 *       to sizes of 16KB, 32KB, or 64KB.
			 * 8.1.5 Exclusive L2 cache
			 *    The Cortex‑A9 processor can be connected to an L2 cache that supports an exclusive cache mode.
			 *    This mode must be activated both in the Cortex‑A9 processor and in the L2 cache controller.
			 *
			 *  +--------------------+-------+-----------+-----------+-----------+-----------+
			 *  | Processor model    | Cores | L1D cache | L1I cache | L2 cache  | Reference |
			 *  +--------------------+-------+-----------+-----------+-----------+-----------+
			 *  | Exynos 4 Dual 4210 |   2   |    32K    |    32K    |    1M     |    [1]    |
			 *  | Exynos 4 Dual 4212 |   2   |    32K    |    32K    |    1M     |    [2]    |
			 *  | Exynos 4 Quad 4412 |   4   |    32K    |    32K    |    1M     |    [3]    |
			 *  | Exynos 4 Quad 4415 |   4   |    32K    |    32K    |    1M     |           |
			 *  | TI OMAP 4430       |   2   |    32K    |    32K    |    1M     |    [4]    |
			 *  | TI OMAP 4460       |   2   |    32K    |    32K    |    1M     |    [5]    |
			 *  +--------------------+-------+-----------+-----------+-----------+-----------+
			 *
			 * [1] http://www.samsung.com/global/business/semiconductor/file/product/Exynos_4_Dual_45nm_User_Manaul_Public_REV1.00-0.pdf
			 * [2] http://www.samsung.com/global/business/semiconductor/file/product/Exynos_4_Dual_32nm_User_Manaul_Public_REV100-0.pdf
			 * [3] http://www.samsung.com/global/business/semiconductor/file/product/Exynos_4_Quad_User_Manaul_Public_REV1.00-0.pdf
			 * [4] https://www.hotchips.org/wp-content/uploads/hc_archives/hc21/2_mon/HC21.24.400.ClientProcessors-Epub/HC21.24.421.Witt-OMAP4430.pdf
			 * [5] http://www.anandtech.com/show/5310/samsung-galaxy-nexus-ice-cream-sandwich-review/9
			 */

			/* Use Exynos 4 specs */
			*l1i = (struct cpuinfo_cache) {
				.size = 32 * 1024,
				.associativity = 4,
				.line_size = 32
			};
			*l1d = (struct cpuinfo_cache) {
				.size = 32 * 1024,
				.associativity = 4,
				.line_size = 32
			};
			*l2 = (struct cpuinfo_cache) {
				.size = 1024 * 1024,
				/* OMAP4460 in Pandaboard ES has 16-way set-associative L2 cache */
				.associativity = 16,
				.line_size = 32
			};
			break;
		case cpuinfo_uarch_cortex_a15:
			/*
			 * 6.1. About the L1 memory system
			 *    The L1 memory system consists of separate instruction and data caches.
			 *    The L1 instruction memory system has the following features:
			 *     - 32KB 2-way set-associative instruction cache.
			 *     - Fixed line length of 64 bytes.
			 *    The L1 data memory system has the following features:
			 *     - 32KB 2-way set-associative data cache.
			 *     - Fixed line length of 64 bytes.
			 * 7.1. About the L2 memory system
			 *    The features of the L2 memory system include:
			 *     - Configurable L2 cache size of 512KB, 1MB, 2MB and 4MB.
			 *     - Fixed line length of 64 bytes.
			 *     - 16-way set-associative cache structure.
			 *
			 *  +--------------------+-------+-----------+-----------+-----------+-----------+
			 *  | Processor model    | Cores | L1D cache | L1I cache | L2 cache  | Reference |
			 *  +--------------------+-------+-----------+-----------+-----------+-----------+
			 *  | Exynos 5 Dual 5250 |   2   |    32K    |    32K    |    1M     |    [1]    |
			 *  | Exynos 5 Hexa 5260 | 2(+4) |    32K    |    32K    | 1M(+512K) |    [2]    |
			 *  | Exynos 5 Octa 5410 | 4(+4) |    32K    |    32K    | 2M(+512K) |    [3]    |
			 *  | Exynos 5 Octa 5420 | 4(+4) |    32K    |    32K    | 2M(+512K) |    [3]    |
			 *  | Exynos 5 Octa 5422 | 4(+4) |    32K    |    32K    | 2M(+512K) |    [3]    |
			 *  | Exynos 5 Octa 5430 | 4(+4) |    32K    |    32K    | 2M(+512K) |    [3]    |
			 *  | Exynos 5 Octa 5800 | 4(+4) |    32K    |    32K    | 2M(+512K) |    [3]    |
			 *  +--------------------+-------+-----------+-----------+-----------+-----------+
			 *
			 * [1] http://www.arndaleboard.org/wiki/downloads/supports/Exynos_5_Dual_User_Manaul_Public_REV1.00.pdf
			 * [2] http://www.yicsystem.com/wp-content/uploads/2014/08/Espresso5260P-Guide-Book.pdf
			 * [3] http://www.anandtech.com/show/6768/samsung-details-exynos-5-octa-architecture-power-at-isscc-13
			 */
			*l1i = (struct cpuinfo_cache) {
				.size = 32 * 1024,
				.associativity = 2,
				.line_size = 64
			};
			*l1d = (struct cpuinfo_cache) {
				.size = 32 * 1024,
				.associativity = 2,
				.line_size = 64
			};
			*l2 = (struct cpuinfo_cache) {
				.size = cluster_cores * 512 * 1024,
				.associativity = 16,
				.line_size = 64
			};
			break;
		case cpuinfo_uarch_cortex_a17:
			/*
			 * ARM Cortex-A17 MPCore Processor Technical Reference Manual:
			 * 6.1. About the L1 memory system
			 *    The L1 memory system consists of separate instruction and data caches.
			 *    The size of the instruction cache is implemented as either 32KB or 64KB.
			 *    The size of the data cache is 32KB.
			 *
			 *    The L1 instruction cache has the following features:
			 *     - Instruction side cache line length of 64-bytes.
			 *     - 4-way set-associative instruction cache.
			 *
			 *    The L1 data cache has the following features:
			 *     - Data side cache line length of 64-bytes.
			 *     - 4-way set-associative data cache.
			 *
			 * 7.1. About the L2 Memory system
			 *    An integrated L2 cache:
			 *     - The cache size is implemented as either 256KB, 512KB, 1MB, 2MB, 4MB or 8MB.
			 *     - A fixed line length of 64 bytes.
			 *     - 16-way set-associative cache structure.
			 *
			 *  +------------------+-------+-----------+-----------+-----------+-----------+
			 *  | Processor model  | Cores | L1D cache | L1I cache | L2 cache  | Reference |
			 *  +------------------+-------+-----------+-----------+-----------+-----------+
			 *  | MediaTek MT6595  | 4(+4) |    32K    |    32K    | 2M(+512K) |    [1]    |
			 *  +------------------+-------+-----------+-----------+-----------+-----------+
			 *
			 * [1] https://blog.osakana.net/archives/5268
			 */
			*l1i = (struct cpuinfo_cache) {
				.size = 32 * 1024,
				.associativity = 4,
				.line_size = 64
			};
			*l1d = (struct cpuinfo_cache) {
				.size = 32 * 1024,
				.associativity = 4,
				.line_size = 64
			};
			*l2 = (struct cpuinfo_cache) {
				.size = cluster_cores * 512 * 1024,
				.associativity = 16,
				.line_size = 64
			};
			break;
		case cpuinfo_uarch_cortex_a35:
			/*
			 * ARM Cortex‑A35 Processor Technical Reference Manual:
			 * 6.1. About the L1 memory system
			 *   The L1 memory system includes several power-saving and performance-enhancing features.
			 *   These include separate instruction and data caches, which can be configured
			 *   independently during implementation to sizes of 8KB, 16KB, 32KB, or 64KB.
			 *
			 *   L1 instruction-side memory system
			 *     A dedicated instruction cache that:
			 *      - is virtually indexed and physically tagged.
			 *      - is 2-way set associative.
			 *      - is configurable to be 8KB, 16KB, 32KB, or 64KB.
			 *      - uses a cache line length of 64 bytes.
			 *
			 *   L1 data-side memory system
			 *     A dedicated data cache that:
			 *      - is physically indexed and physically tagged.
			 *      - is 4-way set associative.
			 *      - is configurable to be 8KB, 16KB, 32KB, or 64KB.
			 *      - uses a cache line length of 64 bytes.
			 *
			 * 7.1. About the L2 memory system
			 *   The L2 cache is 8-way set associative.
			 *   Further features of the L2 cache are:
			 *    - Configurable size of 128KB, 256KB, 512KB, and 1MB.
			 *    - Fixed line length of 64 bytes.
			 *    - Physically indexed and tagged.
			 *
			 *  +-----------------+---------+-----------+-----------+-----------+-----------+
			 *  | Processor model |  Cores  | L1D cache | L1I cache | L2 cache  | Reference |
			 *  +-----------------+---------+-----------+-----------+-----------+-----------+
			 *  | MediaTek MT6599 | 4(+4+2) |     ?     |     ?     |     ?     |           |
			 *  +-----------------+---------+-----------+-----------+-----------+-----------+
			 */
			*l1i = (struct cpuinfo_cache) {
				.size = 16 * 1024, /* assumption based on low-end Cortex-A53 */
				.associativity = 2,
				.line_size = 64
			};
			*l1d = (struct cpuinfo_cache) {
				.size = 16 * 1024, /* assumption based on low-end Cortex-A53 */
				.associativity = 4,
				.line_size = 64
			};
			*l2 = (struct cpuinfo_cache) {
				.size = 256 * 1024, /* assumption based on low-end Cortex-A53 */
				.associativity = 8,
				.line_size = 64
			};
			break;
		case cpuinfo_uarch_cortex_a53:
			/*
			 * ARM Cortex-A53 MPCore Processor Technical Reference Manual:
			 * 6.1. About the L1 memory system
			 *   The L1 memory system consists of separate instruction and data caches. The implementer configures the
			 *   instruction and data caches independently during implementation, to sizes of 8KB, 16KB, 32KB, or 64KB.
			 *
			 *   The L1 Instruction memory system has the following key features:
			 *    - Instruction side cache line length of 64 bytes.
			 *    - 2-way set associative L1 Instruction cache.
			 *
			 *   The L1 Data memory system has the following features:
			 *    - Data side cache line length of 64 bytes.
			 *    - 4-way set associative L1 Data cache.
			 *
			 * 7.1. About the L2 memory system
			 *   The L2 memory system consists of an:
			 *    - Optional tightly-coupled L2 cache that includes:
			 *      - Configurable L2 cache size of 128KB, 256KB, 512KB, 1MB and 2MB.
			 *      - Fixed line length of 64 bytes.
			 *      - 16-way set-associative cache structure.
			 *
			 *  +--------------------+-------+-----------+-----------+-----------+-----------+
			 *  | Processor model    | Cores | L1D cache | L1I cache | L2 cache  | Reference |
			 *  +--------------------+-------+-----------+-----------+-----------+-----------+
			 *  | Broadcom BCM2837   |   4   |    16K    |    16K    |    512K   |    [1]    |
			 *  | Exynos 7420        | 4(+4) |    32K    |    32K    |    256K   |  [2, 3]   |
			 *  | Exynos 8890        | 4(+4) |    32K    |    32K    |    256K   |    [4]    |
			 *  | Snapdragon 410     |   4   |    32K    |    32K    |    512K   |    [3]    |
			 *  | Snapdragon 835     | 4(+4) |  32K+64K  |  32K+64K  |  1M(+2M)  |   sysfs   |
			 *  | Kirin 620          |  4+4  |    32K    |    32K    |    512K   |    [5]    |
			 *  +--------------------+-------+-----------+-----------+-----------+-----------+
			 *
			 * [1] https://www.raspberrypi.org/forums/viewtopic.php?f=91&t=145766
			 * [2] http://www.anandtech.com/show/9330/exynos-7420-deep-dive/2
			 * [3] https://www.usenix.org/system/files/conference/usenixsecurity16/sec16_paper_lipp.pdf
			 * [4] http://www.boardset.com/products/products_v8890.php
			 * [5] http://mirror.lemaker.org/Hi6220V100_Multi-Mode_Application_Processor_Function_Description.pdf
			 */
			if (midr_is_qualcomm_cortex_a53_silver(midr)) {
				/* Qualcomm-modified Cortex-A53 in Snapdragon 630/660/835 */

				uint32_t l2_size = 512 * 1024;
				if (chipset->series == cpuinfo_arm_chipset_series_qualcomm_msm && chipset->model == 8998) {
					/* Snapdragon 835 (MSM8998): 1 MB L2 (little cores only) */
					l2_size = 1024 * 1024;
				} else if (chipset->series == cpuinfo_arm_chipset_series_qualcomm_snapdragon && chipset->model == 630 && cluster_id == 0) {
					/* Snapdragon 630 (MSM8998): 1 MB L2 for the big cores */
					l2_size = 1024 * 1024;
				}

				*l1i = (struct cpuinfo_cache) {
					.size = 32 * 1024,
					.associativity = 2,
					.line_size = 64
				};
				*l1d = (struct cpuinfo_cache) {
					.size = 32 * 1024,
					.associativity = 4,
					.line_size = 64
				};
				*l2 = (struct cpuinfo_cache) {
					.size = l2_size,
					.associativity = 16,
					.line_size = 64
				};
			} else {
				/* Standard Cortex-A53 */

				/* Use conservative values by default */
				uint32_t l1_size = 16 * 1024;
				uint32_t l2_size = 256 * 1024;
				switch (chipset->series) {
					case cpuinfo_arm_chipset_series_qualcomm_msm:
						l1_size = 32 * 1024;
						l2_size = 512 * 1024;
						switch (chipset->model) {
							case 8937: /* Snapdragon 430 */
							case 8940: /* Snapdragon 435 */
							case 8953: /* Snapdragon 625 or 626 (8953PRO) */
								if (cluster_id == 0) {
									/* 1M L2 for big cluster */
									l2_size = 1024 * 1024;
								}
								break;
							case 8952: /* Snapdragon 617 */
								if (cluster_id != 0) {
									/* 256K L2 for LITTLE cluster */
									l2_size = 256 * 1024;
								}
								break;
							default:
								/* Silence compiler warning about unhandled enum values */
								break;
						}
						break;
					case cpuinfo_arm_chipset_series_qualcomm_apq:
						l1_size = 32 * 1024;
						l2_size = 512 * 1024;
						break;
					case cpuinfo_arm_chipset_series_qualcomm_snapdragon:
						l1_size = 32 * 1024;
						l2_size = 512 * 1024;
						if (chipset->model == 450 && cluster_id == 0) {
							/* Snapdragon 450: 1M L2 for big cluster */
							l2_size = 1024 * 1024;
						}
						break;
					case cpuinfo_arm_chipset_series_hisilicon_hi:
						l1_size = 32 * 1024;
						l2_size = 512 * 1024;
						break;
					case cpuinfo_arm_chipset_series_hisilicon_kirin:
						l1_size = 32 * 1024;
						switch (chipset->model) {
							case 970: /* Kirin 970 */
								l2_size = 1024 * 1024;
								break;
							default:
								l2_size = 512 * 1024;
								break;
						}
						break;
					case cpuinfo_arm_chipset_series_samsung_exynos:
						l1_size = 32 * 1024;
						break;
					default:
						/* Silence compiler warning about unhandled enum values */
						break;
				}
				*l1i = (struct cpuinfo_cache) {
					.size = l1_size,
					.associativity = 2,
					.line_size = 64
				};
				*l1d = (struct cpuinfo_cache) {
					.size = l1_size,
					.associativity = 4,
					.line_size = 64
				};
				*l2 = (struct cpuinfo_cache) {
					.size = l2_size,
					.associativity = 16,
					.line_size = 64
				};
			}
			break;
		case cpuinfo_uarch_cortex_a57:
			/*
			 * ARM Cortex-A57 MPCore Processor Technical Reference Manual:
			 * 6.1. About the L1 memory system
			 *   The L1 memory system consists of separate instruction and data caches.
			 *
			 *   The L1 instruction memory system has the following features:
			 *    - 48KB 3-way set-associative instruction cache.
			 *    - Fixed line length of 64 bytes.
			 *
			 *   The L1 data memory system has the following features:
			 *    - 32KB 2-way set-associative data cache.
			 *    - Fixed line length of 64 bytes.
			 *
			 * 7.1 About the L2 memory system
			 *   The features of the L2 memory system include:
			 *    - Configurable L2 cache size of 512KB, 1MB, and 2MB.
			 *    - Fixed line length of 64 bytes.
			 *    - 16-way set-associative cache structure.
			 *    - Inclusion property with L1 data caches.
			 *
			 *  +--------------------+-------+-----------+-----------+-----------+-----------+
			 *  | Processor model    | Cores | L1D cache | L1I cache | L2 cache  | Reference |
			 *  +--------------------+-------+-----------+-----------+-----------+-----------+
			 *  | Snapdragon 810     | 4(+4) |    32K    |    48K    |    2M     |    [1]    |
			 *  | Exynos 7420        | 4(+4) |    32K    |    48K    |    2M     |    [2]    |
			 *  | Jetson TX1         |   4   |    32K    |    48K    |    2M     |    [3]    |
			 *  +--------------------+-------+-----------+-----------+-----------+-----------+
			 *
			 * [1] http://www.anandtech.com/show/9837/snapdragon-820-preview
			 * [2] http://www.anandtech.com/show/9330/exynos-7420-deep-dive/2
			 * [3] https://devblogs.nvidia.com/parallelforall/jetson-tx2-delivers-twice-intelligence-edge/
			 */
			*l1i = (struct cpuinfo_cache) {
				.size = 48 * 1024,
				.associativity = 3,
				.line_size = 64
			};
			*l1d = (struct cpuinfo_cache) {
				.size = 32 * 1024,
				.associativity = 2,
				.line_size = 64
			};
			*l2 = (struct cpuinfo_cache) {
				.size = cluster_cores * 512 * 1024,
				.associativity = 16,
				.line_size = 64,
				.flags = CPUINFO_CACHE_INCLUSIVE
			};
			break;
		case cpuinfo_uarch_cortex_a72:
		{
			/*
			 * ARM Cortex-A72 MPCore Processor Technical Reference Manual
			 * 6.1. About the L1 memory system
			 *   The L1 memory system consists of separate instruction and data caches.
			 *
			 *   The L1 instruction memory system has the following features:
			 *    - 48KB 3-way set-associative instruction cache.
			 *    - Fixed line length of 64 bytes.
			 *
			 *   The L1 data memory system has the following features:
			 *    - 32KB 2-way set-associative data cache.
			 *    - Fixed cache line length of 64 bytes.
			 *
			 * 7.1 About the L2 memory system
			 *   The features of the L2 memory system include:
			 *    - Configurable L2 cache size of 512KB, 1MB, 2MB and 4MB.
			 *    - Fixed line length of 64 bytes.
			 *    - Banked pipeline structures.
			 *    - Inclusion property with L1 data caches.
			 *    - 16-way set-associative cache structure.
			 *
			 *  +---------------------+---------+-----------+-----------+------------+-----------+
			 *  | Processor model     | Cores   | L1D cache | L1I cache | L2 cache   | Reference |
			 *  +---------------------+---------+-----------+-----------+------------+-----------+
			 *  | Snapdragon 650      |  2(+4)  | 32K(+32K) | 48K(+32K) |  1M(+512K) |    [1]    |
			 *  | Snapdragon 652      |  4(+4)  | 32K(+32K) | 48K(+32K) |  1M(+512K) |    [2]    |
			 *  | Snapdragon 653      |  4(+4)  | 32K(+32K) | 48K(+32K) |  1M(+512K) |    [3]    |
			 *  | HiSilicon Kirin 950 |  4(+4)  |  32K+32K  |  48K+32K  |     ?      |           |
			 *  | HiSilicon Kirin 955 |  4(+4)  |  32K+32K  |  48K+32K  |     ?      |           |
			 *  | MediaTek Helio X20  | 2(+4+4) |     ?     |     ?     |     ?      |           |
			 *  | MediaTek Helio X23  | 2(+4+4) |     ?     |     ?     |     ?      |           |
			 *  | MediaTek Helio X25  | 2(+4+4) |     ?     |     ?     |     ?      |           |
			 *  | MediaTek Helio X27  | 2(+4+4) |     ?     |     ?     |     ?      |           |
			 *  +---------------------+---------+-----------+-----------+------------+-----------+
			 *
			 * [1] http://pdadb.net/index.php?m=processor&id=578&c=qualcomm_snapdragon_618_msm8956__snapdragon_650
			 * [2] http://pdadb.net/index.php?m=processor&id=667&c=qualcomm_snapdragon_620_apq8076__snapdragon_652
			 * [3] http://pdadb.net/index.php?m=processor&id=692&c=qualcomm_snapdragon_653_msm8976sg__msm8976_pro
			 */
			uint32_t l2_size;
			switch (chipset->series) {
				case cpuinfo_arm_chipset_series_hisilicon_kirin:
					l2_size = 2 * 1024 * 1024;
					break;
				default:
					l2_size = 1024 * 1024;
					break;
			}

			*l1i = (struct cpuinfo_cache) {
				.size = 48 * 1024,
				.associativity = 3,
				.line_size = 64
			};
			*l1d = (struct cpuinfo_cache) {
				.size = 32 * 1024,
				.associativity = 2,
				.line_size = 64
			};
			*l2 = (struct cpuinfo_cache) {
				.size = l2_size,
				.associativity = 16,
				.line_size = 64,
				.flags = CPUINFO_CACHE_INCLUSIVE
			};
			break;
		}
		case cpuinfo_uarch_cortex_a73:
		{
			/*
			 * ARM Cortex‑A73 MPCore Processor Technical Reference Manual
			 * 6.1. About the L1 memory system
			 *   The L1 memory system consists of separate instruction and data caches.
			 *   The size of the instruction cache is 64KB.
			 *   The size of the data cache is configurable to either 32KB or 64KB.
			 *
			 *   The L1 instruction memory system has the following key features:
			 *    - Virtually Indexed, Physically Tagged (VIPT), four-way set-associative instruction cache.
			 *    - Fixed cache line length of 64 bytes.
			 *
			 *   The L1 data memory system has the following features:
			 *    - ...the data cache behaves like an eight-way set associative PIPT cache (for 32KB configurations)
			 *      and a 16-way set associative PIPT cache (for 64KB configurations).
			 *    - Fixed cache line length of 64 bytes.
			 *
			 * 7.1 About the L2 memory system
			 *   The L2 memory system consists of:
			 *    - A tightly-integrated L2 cache with:
			 *      - A configurable size of 256KB, 512KB, 1MB, 2MB, 4MB, or 8MB.
			 *      - A 16-way, set-associative structure.
			 *      - A fixed line length of 64 bytes.
			 *
			 * The ARM Cortex A73 - Artemis Unveiled [1]
			 *   "ARM still envisions that most vendors will choose to use configurations of 1 to
			 *    2MB in consumer products. The L2 cache is inclusive of the L1 cache. "
			 *
			 *  +---------------------+---------+-----------+-----------+-----------+-----------+
			 *  | Processor model     | Cores   | L1D cache | L1I cache | L2 cache  | Reference |
			 *  +---------------------+---------+-----------+-----------+-----------+-----------+
			 *  | HiSilicon Kirin 960 | 4(+4)   |  64K+32K  |  64K+32K  |     ?     |    [2]    |
			 *  | MediaTek Helio X30  | 2(+4+4) |     ?     |  64K+ ?   |     ?     |           |
			 *  | Snapdragon 835      | 4(+4)   |  64K+32K  |  64K+32K  |  2M(+1M)  |   sysfs   |
			 *  | Snapdragon 660      | 4(+4)   |  64K+32K  |  64K+32K  |  2M(+1M)  |    [3]    |
			 *  +---------------------+---------+-----------+-----------+-----------+-----------+
			 *
			 * [1] http://www.anandtech.com/show/10347/arm-cortex-a73-artemis-unveiled/2
			 * [2] http://www.anandtech.com/show/11088/hisilicon-kirin-960-performance-and-power/3
			 * [3] https://arstechnica.com/gadgets/2017/05/qualcomms-snapdragon-660-and-630-bring-more-high-end-features-to-midrange-chips/
			 */
			uint32_t l2_size = 1024 * 1024;
			switch (chipset->series) {
				case cpuinfo_arm_chipset_series_hisilicon_kirin:
					l2_size = 2 * 1024 * 1024;
					break;
				default:
					switch (midr) {
						case UINT32_C(0x51AF8001): /* Kryo 280 Gold */
							l2_size = 2 * 1024 * 1024;
							break;
						case UINT32_C(0x51AF8002): /* Kryo 260 Gold */
						default:
							break;
					}
			}

			*l1i = (struct cpuinfo_cache) {
				.size = 64 * 1024,
				.associativity = 4,
				.line_size = 64
			};
			*l1d = (struct cpuinfo_cache) {
				.size = 64 * 1024,
				.associativity = 16,
				.line_size = 64
			};
			*l2 = (struct cpuinfo_cache) {
				.size = l2_size,
				.associativity = 16,
				.line_size = 64,
				.flags = CPUINFO_CACHE_INCLUSIVE
			};
			break;
		}
		case cpuinfo_uarch_scorpion:
			/*
			 * - "The CPU includes 32KB instruction and data caches as
			 *    well as a complete memory-management unit (MMU) suitable
			 *    for high-level operating systems. The CPU also has
			 *    256KB of SRAM that can be allocated in 64KB increments
			 *    to level-two (L2) cache or tightly coupled memory (TCM)." [1]
			 *    We interpret it as L2 cache being 4-way set-associative on single-core Scorpion.
			 * - L1 Data Cache = 32 KB. 32 B/line. [2]
             * - L2 Cache = 256 KB. 128 B/line. [2]
			 * - 256 KB (single-core) or 512 KB (dual-core) L2 cache [3]
			 * - Single or dual-core configuration [3]
			 * - For L1 cache assume the same associativity as Krait
			 *
			 * [1] https://www.qualcomm.com/media/documents/files/linley-report-on-dual-core-snapdragon.pdf
			 * [2] http://www.7-cpu.com/cpu/Snapdragon.html
			 * [3] https://en.wikipedia.org/wiki/Scorpion_(CPU)
			 */
			*l1i = (struct cpuinfo_cache) {
				.size = 32 * 1024,
				.associativity = 4,
				.line_size = 32
			};
			*l1d = (struct cpuinfo_cache) {
				.size = 32 * 1024,
				.associativity = 4,
				.line_size = 32
			};
			*l2 = (struct cpuinfo_cache) {
				.size = cluster_cores * 256 * 1024,
				.associativity = 4,
				.line_size = 128
			};
			break;
		case cpuinfo_uarch_krait:
			/*
			 * - L0 Data cache = 4 KB. 64 B/line, direct mapped [1]
			 * - L0 Instruction cache = 4 KB. [1]
			 * - L1 Data cache = 16 KB. 64 B/line, 4-way [1]
			 * - L1 Instruction cache = 16 KB, 4-way [1]
			 * - L2 Cache = 1 MB, 128 B/line, 8-way. Each core has fast access only to 512 KB of L2 cache. [1]
			 * - L2	= 1MB (dual core) or 2MB (quad core), 8-way set associative [2]
			 *
			 * [1] http://www.7-cpu.com/cpu/Krait.html
			 * [2] http://www.anandtech.com/show/4940/qualcomm-new-snapdragon-s4-msm8960-krait-architecture/2
			 */
			*l1i = (struct cpuinfo_cache) {
				.size = 16 * 1024,
				.associativity = 4,
				.line_size = 64 /* assume same as L1D */
			};
			*l1d = (struct cpuinfo_cache) {
				.size = 16 * 1024,
				.associativity = 4,
				.line_size = 64
			};
			*l2 = (struct cpuinfo_cache) {
				.size = cluster_cores * 512 * 1024,
				.associativity = 8,
				.line_size = 128
			};
			break;
		case cpuinfo_uarch_kryo:
			/*
			 *  +-----------------+-------+-----------+-----------+-----------+-----------+
			 *  | Processor model | Cores | L1D cache | L1I cache | L2 cache  | Reference |
			 *  +-----------------+-------+-----------+-----------+-----------+-----------+
			 *  | Snapdragon 820  |  2+2  |     ?     |     ?     |  1M+512K  |    [1]    |
			 *  | Snapdragon 821  |  2+2  |     ?     |     ?     |  1M+512K  |    [1]    |
			 *  +-----------------+-------+-----------+-----------+-----------+-----------+
			 *
			 * [1] http://www.anandtech.com/show/9837/snapdragon-820-preview/2
			 */
			*l1i = (struct cpuinfo_cache) {
				.size = 32 * 1024 /* TODO: verify */,
				.associativity = 4,
				.line_size = 64
			};
			*l1d = (struct cpuinfo_cache) {
				.size = 24 * 1024 /* TODO: verify */,
				.associativity = 3,
				.line_size = 64
			};
			if (midr_is_kryo_silver(midr)) {
				/* Kryo "Silver" */
				*l2 = (struct cpuinfo_cache) {
					.size = 512 * 1024,
					.associativity = 8,
					.line_size = 128
				};
			} else {
				/* Kryo "Gold" */
				*l2 = (struct cpuinfo_cache) {
					.size = 1024 * 1024,
					.associativity = 8,
					.line_size = 128
				};
			}
			break;
		case cpuinfo_uarch_denver:
			/*
			 * The Denver chip includes a 128KB, 4-way level 1 instruction cache, a 64KB, 4-way level 2 data cache,
			 * and a 2MB, 16-way level 2 cache, all of which can service both cores. [1]
			 *
			 * All the caches have 64-byte lines. [2]
			 *
			 * [1] http://www.pcworld.com/article/2463900/nvidia-reveals-pc-like-performance-for-denver-tegra-k1.html
			 * [2] http://linleygroup.com/newsletters/newsletter_detail.php?num=5205&year=2014
			 */
			*l1i = (struct cpuinfo_cache) {
				.size = 128 * 1024,
				.associativity = 4,
				.line_size = 64
			};
			*l1d = (struct cpuinfo_cache) {
				.size = 64 * 1024,
				.associativity = 4,
				.line_size = 64
			};
			*l2 = (struct cpuinfo_cache) {
				.size = 2 * 1024 * 1024,
				.associativity = 16,
				.line_size = 64
			};
			break;
		case cpuinfo_uarch_mongoose:
			/*
			 * - "Moving past branch prediction we can see some elements of how the cache is set up for the L1 I$,
			 *    namely 64 KB split into four sets with 128-byte line sizes for 128 cache lines per set" [1]
			 * - "For loads and stores, a 32 KB, 8-way set associative cache with 64 byte line size is used" [1]
			 * - "The L2 cache here is 2MB shared across all cores split into 16 sets. This memory is also split
			 *    into 4 banks and has a 22 cycle latency" [1]
			 *
			 *  +--------------------+-------+-----------+-----------+-----------+-----------+
			 *  | Processor model    | Cores | L1D cache | L1I cache | L2 cache  | Reference |
			 *  +--------------------+-------+-----------+-----------+-----------+-----------+
			 *  | Exynos 8 Octa 8890 | 4(+4) |    64K    |    32K    |    2M     |    [1]    |
			 *  | Exynos 8 Octa 8895 | 4(+4) |    64K    |    32K    |    2M     |    [2]    |
			 *  +--------------------+-------+-----------+-----------+-----------+-----------+
			 *
			 * [1] http://www.anandtech.com/show/10590/hot-chips-2016-exynos-m1-architecture-disclosed
			 * [2] https://www.extremetech.com/mobile/244949-samsungs-exynos-8895-features-custom-cpu-cores-first-10nm-chip-market
			 */
			*l1i = (struct cpuinfo_cache) {
				.size = 64 * 1024,
				.associativity = 4,
				.line_size = 128
			};
			*l1d = (struct cpuinfo_cache) {
				.size = 32 * 1024,
				.associativity = 8,
				.line_size = 64
			};
			*l2 = (struct cpuinfo_cache) {
				.size = 2 * 1024 * 1024,
				.associativity = 16,
				.line_size = 64
			};
			break;
		case cpuinfo_uarch_thunderx:
			/*
			 * "78K-Icache and 32K-D cache per core, 16 MB shared L2 cache" [1]
			 *
			 * [1] https://www.cavium.com/pdfFiles/ThunderX_CP_PB_Rev1.pdf
			 */
			*l1i = (struct cpuinfo_cache) {
				.size = 78 * 1024,
				.associativity = 4 /* assumption */,
				.line_size = 64 /* assumption */
			};
			*l1d = (struct cpuinfo_cache) {
				.size = 32 * 1024,
				.associativity = 4 /* assumption */,
				.line_size = 64 /* assumption */
			};
			*l2 = (struct cpuinfo_cache) {
				.size = 16 * 1024 * 1024,
				.associativity = 8 /* assumption */,
				.line_size = 64 /* assumption */
			};
			break;
		case cpuinfo_uarch_cortex_a12:
		case cpuinfo_uarch_cortex_a32:
		default:
			cpuinfo_log_warning("target uarch not recognized; using generic cache parameters");
			/* Follow OpenBLAS */
			if (arch_version >= 8) {
				*l1i = (struct cpuinfo_cache) {
					.size = 32 * 1024,
					.associativity = 4,
					.line_size = 64
				};
				*l1d = (struct cpuinfo_cache) {
					.size = 32 * 1024,
					.associativity = 4,
					.line_size = 64
				};
				*l2 = (struct cpuinfo_cache) {
					.size = cluster_cores * 256 * 1024,
					.associativity = 8,
					.line_size = 64
				};
			} else {
				*l1i = (struct cpuinfo_cache) {
					.size = 16 * 1024,
					.associativity = 4,
					.line_size = 32
				};
				*l1d = (struct cpuinfo_cache) {
					.size = 16 * 1024,
					.associativity = 4,
					.line_size = 32
				};
				if (arch_version >= 7) {
					*l2 = (struct cpuinfo_cache) {
						.size = cluster_cores * 128 * 1024,
						.associativity = 8,
						.line_size = 32
					};
				}
			}
			break;
	}
	l1i->sets = l1i->size / (l1i->associativity * l1i->line_size);
	l1i->partitions = 1;
	l1d->sets = l1d->size / (l1d->associativity * l1d->line_size);
	l1d->partitions = 1;
	if (l2->size != 0) {
		l2->sets = l2->size / (l2->associativity * l2->line_size);
		l2->partitions = 1;
	}
}
