Blame - src/arm/cache.c - platform/external/cpuinfo

blob: b4d86305db8f7a14d0002438ee9c035d7323b9c0 [file] [log] [blame]

Marat Dukhan	3c98276	2017-05-08 06:16:45 +0000	[diff] [blame]	1	#include <stdint.h>
				2
				3	#include <cpuinfo.h>
				4	#include <log.h>
				5	#include <arm/api.h>
				6
				7
				8	void cpuinfo_arm_decode_cache(
				9	enum cpuinfo_uarch uarch,
				10	uint32_t uarch_cores,
				11	uint32_t cpu_part,
				12	uint32_t arch_version,
				13	struct cpuinfo_cache l1i[restrict static 1],
				14	struct cpuinfo_cache l1d[restrict static 1],
				15	struct cpuinfo_cache l2[restrict static 1])
				16	{
				17	switch (uarch) {
				18	case cpuinfo_uarch_xscale:
				19	switch (cpu_part >> 8) {
				20	case 2:
				21	/*
				22	* PXA 210/25X/26X
				23	*
				24	* See "Computer Organization and Design, Revised Printing: The Hardware/Software Interface"
				25	* by David A. Patterson, John L. Hennessy
				26	*/
				27	*l1i = (struct cpuinfo_cache) {
				28	.size = 16 * 1024,
				29	.associativity = 32,
				30	.line_size = 32
				31	};
				32	*l1d = (struct cpuinfo_cache) {
				33	.size = 16 * 1024,
				34	.associativity = 4,
				35	.line_size = 64
				36	};
				37	break;
				38	case 4:
				39	/* PXA 27X */
				40	*l1i = (struct cpuinfo_cache) {
				41	.size = 32 * 1024,
				42	.associativity = 32,
				43	.line_size = 32
				44	};
				45	*l1d = (struct cpuinfo_cache) {
				46	.size = 32 * 1024,
				47	.associativity = 32,
				48	.line_size = 32
				49	};
				50	break;
				51	case 6:
				52	/*
				53	* PXA 3XX
				54	*
				55	* See http://download.intel.com/design/intelxscale/31628302.pdf
				56	*/
				57	*l1i = (struct cpuinfo_cache) {
				58	.size = 32 * 1024,
				59	.associativity = 4,
				60	.line_size = 32
				61	};
				62	*l1d = (struct cpuinfo_cache) {
				63	.size = 32 * 1024,
				64	.associativity = 4,
				65	.line_size = 32
				66	};
				67	*l2 = (struct cpuinfo_cache) {
				68	.size = 256 * 1024,
				69	.associativity = 8,
				70	.line_size = 32
				71	};
				72	break;
				73	}
				74	break;
				75	case cpuinfo_uarch_arm11:
				76	*l1i = (struct cpuinfo_cache) {
				77	.size = 16 * 1024,
				78	.associativity = 4,
				79	.line_size = 32
				80	};
				81	*l1d = (struct cpuinfo_cache) {
				82	.size = 16 * 1024,
				83	.associativity = 4,
				84	.line_size = 32
				85	};
				86	break;
				87	case cpuinfo_uarch_cortex_a5:
				88	/*
				89	* Cortex-A5 Technical Reference Manual:
				90	* 7.1.1. Memory system
				91	* The Cortex-A5 processor has separate instruction and data caches.
				92	* The caches have the following features:
				93	* - Data cache is 4-way set-associative.
				94	* - Instruction cache is 2-way set-associative.
				95	* - The cache line length is eight words.
				96	* - You can configure the instruction and data caches independently during implementation
				97	* to sizes of 4KB, 8KB, 16KB, 32KB, or 64KB.
				98	* 1.1.3. System design components
				99	* PrimeCell Level 2 Cache Controller (PL310)
				100	* The addition of an on-chip secondary cache, also referred to as a Level 2 or L2 cache, is a
				101	* recognized method of improving the performance of ARM-based systems when significant memory traffic
				102	* is generated by the processor. The PrimeCell Level 2 Cache Controller reduces the number of external
				103	* memory accesses and has been optimized for use with the Cortex-A5 processor.
				104	* 8.1.7. Exclusive L2 cache
				105	* The Cortex-A5 processor can be connected to an L2 cache that supports an exclusive cache mode.
				106	* This mode must be activated both in the Cortex-A5 processor and in the L2 cache controller.
				107	*
				108	* +--------------------+-----------+-----------+----------+-----------+
				109	* \| Processor model \| L1D cache \| L1I cache \| L2 cache \| Reference \|
				110	* +--------------------+-----------+-----------+----------+-----------+
				111	* \| Qualcomm MSM7225A \| \| \| \| \|
				112	* \| Qualcomm MSM7625A \| \| \| \| \|
				113	* \| Qualcomm MSM7227A \| \| \| \| \|
				114	* \| Qualcomm MSM7627A \| 32K \| 32K \| 256K \| Wiki [1] \|
				115	* \| Qualcomm MSM7225AB \| \| \| \| \|
				116	* \| Qualcomm MSM7225AB \| \| \| \| \|
				117	* \| Qualcomm QSD8250 \| \| \| \| \|
				118	* \| Qualcomm QSD8650 \| \| \| \| \|
				119	* +--------------------+-----------+-----------+----------+-----------+
				120	* \| Spreadtrum SC6821 \| 32K \| 32K \| ? \| \|
				121	* \| Spreadtrum SC6825 \| 32K \| 32K \| 256K \| Wiki [2] \|
				122	* \| Spreadtrum SC8810 \| ? \| ? \| ? \| \|
				123	* \| Spreadtrum SC8825 \| 32K \| 32K \| ? \| \|
				124	* +--------------------+-----------+-----------+----------+-----------+
				125	*
				126	* [1] https://en.wikipedia.org/wiki/List_of_Qualcomm_Snapdragon_systems-on-chip#Snapdragon_S1
				127	* [2] https://en.wikipedia.org/wiki/Spreadtrum
				128	*/
				129	*l1i = (struct cpuinfo_cache) {
				130	.size = 32 * 1024,
				131	.associativity = 2,
				132	.line_size = 32
				133	};
				134	*l1d = (struct cpuinfo_cache) {
				135	.size = 32 * 1024,
				136	.associativity = 4,
				137	.line_size = 32
				138	};
				139	*l2 = (struct cpuinfo_cache) {
				140	.size = 256 * 1024,
				141	/*
				142	* Follow NXP specification: "Eight-way set-associative 512 kB L2 cache with 32B line size"
				143	* Reference: http://www.nxp.com/assets/documents/data/en/application-notes/AN4947.pdf
				144	*/
				145	.associativity = 8,
				146	.line_size = 32
				147	};
				148	break;
				149	case cpuinfo_uarch_cortex_a7:
				150	/*
				151	* Cortex-A7 MPCore Technical Reference Manual:
				152	* 6.1. About the L1 memory system
				153	* The L1 memory system consists of separate instruction and data caches. You can configure the
				154	* instruction and data caches independently during implementation to sizes of 8KB, 16KB, 32KB, or 64KB.
				155	*
				156	* The L1 instruction memory system has the following features:
				157	* - Instruction side cache line length of 32-bytes.
				158	* - 2-way set-associative instruction cache.
				159	*
				160	* The L1 data memory system has the following features:
				161	* - Data side cache line length of 64-bytes.
				162	* - 4-way set-associative data cache.
				163	*
				164	* 7.1. About the L2 Memory system
				165	* The L2 memory system consists of an:
				166	* - Optional tightly-coupled L2 cache that includes:
				167	* - Configurable L2 cache size of 128KB, 256KB, 512KB, and 1MB.
				168	*
				169	* +--------------------+-------+-----------+-----------+-----------+-----------+
				170	* \| Processor model \| Cores \| L1D cache \| L1I cache \| L2 cache \| Reference \|
				171	* +--------------------+-------+-----------+-----------+-----------+-----------+
				172	* \| Allwinner A20 \| 2 \| 32K \| 32K \| 256K \| [1] \|
				173	* \| Allwinner A23 \| 2 \| 32K \| 32K \| 256K \| [2] \|
				174	* \| Allwinner A31 \| 4 \| 32K \| 32K \| 1M \| [3] \|
				175	* \| Allwinner A31s \| 4 \| 32K \| 32K \| 1M \| [4] \|
				176	* \| Allwinner A33 \| 4 \| 32K \| 32K \| 512K \| [5] \|
				177	* \| Allwinner A80 Octa \| 4(+4) \| 32K \| 32K \| 512K(+2M) \| [6] \|
				178	* \| Allwinner A81T \| 8 \| 32K \| 32K \| 1M \| [7] \|
				179	* +--------------------+-------+-----------+-----------+-----------+-----------+
				180	* \| Broadcom BCM2836 \| 4 \| 32K \| 32K \| 512K \| [8] \|
				181	* +--------------------+-------+-----------+-----------+-----------+-----------+
				182	*
				183	* [1] https://linux-sunxi.org/A20
				184	* [2] https://linux-sunxi.org/A23
				185	* [3] http://dl.linux-sunxi.org/A31/A3x_release_document/A31/IC/A31%20datasheet%20V1.3%2020131106.pdf
				186	* [4] https://github.com/allwinner-zh/documents/blob/master/A31s/A31s_Datasheet_v1.5_20150510.pdf
				187	* [5] http://dl.linux-sunxi.org/A33/A33_Datasheet_release1.0.pdf
				188	* [6] https://linux-sunxi.org/images/1/10/A80_Datasheet_Revision_1.0_0404.pdf
				189	* [7] http://dl.linux-sunxi.org/A83T/A83T_datasheet_Revision_1.1.pdf
				190	* [8] https://www.raspberrypi.org/forums/viewtopic.php?t=98428
				191	*/
				192	*l1i = (struct cpuinfo_cache) {
				193	.size = 32 * 1024,
				194	.associativity = 2,
				195	.line_size = 32
				196	};
				197	*l1d = (struct cpuinfo_cache) {
				198	.size = 32 * 1024,
				199	.associativity = 4,
				200	.line_size = 64
				201	};
				202	*l2 = (struct cpuinfo_cache) {
				203	.size = 128 * 1024 * uarch_cores,
				204	.associativity = 8,
				205	.line_size = 64
				206	};
				207	break;
				208	case cpuinfo_uarch_cortex_a8:
				209	/*
				210	* Cortex-A8 Technical Reference Manual:
				211	* 7.1. About the L1 memory system
				212	* The L1 memory system consists of separate instruction and data caches in a Harvard arrangement.
				213	* The L1 memory system provides the core with:
				214	* - fixed line length of 64 bytes
				215	* - support for 16KB or 32KB caches
				216	* - 4-way set associative cache structure
				217	* 8.1. About the L2 memory system
				218	* The L2 memory system is tightly coupled to the L1 data cache and L1 instruction cache.
				219	* The key features of the L2 memory system include:
				220	* - configurable cache size of 0KB, 128KB, 256KB, 512KB, and 1MB
				221	* - fixed line length of 64 bytes
				222	* - 8-way set associative cache structure
Marat Dukhan	6d99605	2017-05-08 11:31:57 +0000	[diff] [blame]	223	*
Marat Dukhan	419a819	2017-05-08 12:25:17 +0000	[diff] [blame]	224	* +----------------------+-----------+-----------+-----------+-----------+
				225	* \| Processor model \| L1D cache \| L1I cache \| L2 cache \| Reference \|
				226	* +----------------------+-----------+-----------+-----------+-----------+
				227	* \| Exynos 3 Single 3110 \| 32K \| 32K \| 512K \| [1] \|
				228	* +----------------------+-----------+-----------+-----------+-----------+
				229	* \| TI DM 3730 \| 32K \| 32K \| 256K \| [2] \|
				230	* +----------------------+-----------+-----------+-----------+-----------+
Marat Dukhan	6d99605	2017-05-08 11:31:57 +0000	[diff] [blame]	231	*
Marat Dukhan	419a819	2017-05-08 12:25:17 +0000	[diff] [blame]	232	* [1] https://en.wikichip.org/w/images/0/04/Exynos_3110.pdf
				233	* [2] https://www.ti.com/lit/ds/symlink/dm3725.pdf
Marat Dukhan	3c98276	2017-05-08 06:16:45 +0000	[diff] [blame]	234	*/
				235	*l1i = (struct cpuinfo_cache) {
Marat Dukhan	6d99605	2017-05-08 11:31:57 +0000	[diff] [blame]	236	.size = 32 * 1024,
Marat Dukhan	3c98276	2017-05-08 06:16:45 +0000	[diff] [blame]	237	.associativity = 4,
				238	.line_size = 64
				239	};
				240	*l1d = (struct cpuinfo_cache) {
Marat Dukhan	6d99605	2017-05-08 11:31:57 +0000	[diff] [blame]	241	.size = 32 * 1024,
Marat Dukhan	3c98276	2017-05-08 06:16:45 +0000	[diff] [blame]	242	.associativity = 4,
				243	.line_size = 64
				244	};
				245	*l2 = (struct cpuinfo_cache) {
Marat Dukhan	6d99605	2017-05-08 11:31:57 +0000	[diff] [blame]	246	.size = 256 * 1024,
Marat Dukhan	3c98276	2017-05-08 06:16:45 +0000	[diff] [blame]	247	.associativity = 8,
				248	.line_size = 64
				249	};
				250	break;
				251	case cpuinfo_uarch_cortex_a9:
				252	/*
				253	* ARM Cortex‑A9 Technical Reference Manual:
				254	* 7.1.1 Memory system
				255	* The Cortex‑A9 processor has separate instruction and data caches.
				256	* The caches have the following features:
				257	* - Both caches are 4-way set-associative.
				258	* - The cache line length is eight words.
				259	* - You can configure the instruction and data caches independently during implementation
				260	* to sizes of 16KB, 32KB, or 64KB.
				261	* 8.1.5 Exclusive L2 cache
				262	* The Cortex‑A9 processor can be connected to an L2 cache that supports an exclusive cache mode.
				263	* This mode must be activated both in the Cortex‑A9 processor and in the L2 cache controller.
				264	*
				265	* +--------------------+-------+-----------+-----------+-----------+-----------+
				266	* \| Processor model \| Cores \| L1D cache \| L1I cache \| L2 cache \| Reference \|
				267	* +--------------------+-------+-----------+-----------+-----------+-----------+
				268	* \| Exynos 4 Dual 4210 \| 2 \| 32K \| 32K \| 1M \| [1] \|
				269	* \| Exynos 4 Dual 4212 \| 2 \| 32K \| 32K \| 1M \| [2] \|
				270	* \| Exynos 4 Quad 4412 \| 4 \| 32K \| 32K \| 1M \| [3] \|
				271	* \| Exynos 4 Quad 4415 \| 4 \| 32K \| 32K \| 1M \| \|
				272	* +--------------------+-------+-----------+-----------+-----------+-----------+
				273	*
				274	* [1] http://www.samsung.com/global/business/semiconductor/file/product/Exynos_4_Dual_45nm_User_Manaul_Public_REV1.00-0.pdf
				275	* [2] http://www.samsung.com/global/business/semiconductor/file/product/Exynos_4_Dual_32nm_User_Manaul_Public_REV100-0.pdf
				276	* [3] http://www.samsung.com/global/business/semiconductor/file/product/Exynos_4_Quad_User_Manaul_Public_REV1.00-0.pdf
				277	*/
				278
				279	/* Use Exynos 4 specs */
				280	*l1i = (struct cpuinfo_cache) {
				281	.size = 32 * 1024,
				282	.associativity = 4,
				283	.line_size = 32
				284	};
				285	*l1d = (struct cpuinfo_cache) {
				286	.size = 32 * 1024,
				287	.associativity = 4,
				288	.line_size = 32
				289	};
				290	*l2 = (struct cpuinfo_cache) {
				291	.size = 1024 * 1024,
Marat Dukhan	1b5c6ba	2017-05-08 10:35:27 +0000	[diff] [blame]	292	/* OMAP4460 in Pandaboard ES has 16-way set-associative L2 cache */
				293	.associativity = 16,
Marat Dukhan	3c98276	2017-05-08 06:16:45 +0000	[diff] [blame]	294	.line_size = 32
				295	};
				296	break;
				297	case cpuinfo_uarch_cortex_a15:
				298	/*
				299	* 6.1. About the L1 memory system
				300	* The L1 memory system consists of separate instruction and data caches.
				301	* The L1 instruction memory system has the following features:
				302	* - 32KB 2-way set-associative instruction cache.
				303	* - Fixed line length of 64 bytes.
				304	* The L1 data memory system has the following features:
				305	* - 32KB 2-way set-associative data cache.
				306	* - Fixed line length of 64 bytes.
				307	* 7.1. About the L2 memory system
				308	* The features of the L2 memory system include:
				309	* - Configurable L2 cache size of 512KB, 1MB, 2MB and 4MB.
				310	* - Fixed line length of 64 bytes.
				311	* - 16-way set-associative cache structure.
				312	*
				313	* +--------------------+-------+-----------+-----------+-----------+-----------+
				314	* \| Processor model \| Cores \| L1D cache \| L1I cache \| L2 cache \| Reference \|
				315	* +--------------------+-------+-----------+-----------+-----------+-----------+
				316	* \| Exynos 5 Dual 5250 \| 2 \| 32K \| 32K \| 1M \| [1] \|
				317	* \| Exynos 5 Hexa 5260 \| 2(+4) \| 32K \| 32K \| 1M(+512K) \| [2] \|
				318	* \| Exynos 5 Octa 5410 \| 4(+4) \| 32K \| 32K \| 2M(+512K) \| [3] \|
				319	* \| Exynos 5 Octa 5420 \| 4(+4) \| 32K \| 32K \| 2M(+512K) \| [3] \|
				320	* \| Exynos 5 Octa 5422 \| 4(+4) \| 32K \| 32K \| 2M(+512K) \| [3] \|
				321	* \| Exynos 5 Octa 5430 \| 4(+4) \| 32K \| 32K \| 2M(+512K) \| [3] \|
				322	* \| Exynos 5 Octa 5800 \| 4(+4) \| 32K \| 32K \| 2M(+512K) \| [3] \|
				323	* +--------------------+-------+-----------+-----------+-----------+-----------+
				324	*
				325	* [1] http://www.arndaleboard.org/wiki/downloads/supports/Exynos_5_Dual_User_Manaul_Public_REV1.00.pdf
				326	* [2] http://www.yicsystem.com/wp-content/uploads/2014/08/Espresso5260P-Guide-Book.pdf
				327	* [3] http://www.anandtech.com/show/6768/samsung-details-exynos-5-octa-architecture-power-at-isscc-13
				328	*/
				329	*l1i = (struct cpuinfo_cache) {
				330	.size = 32 * 1024,
				331	.associativity = 2,
				332	.line_size = 64
				333	};
				334	*l1d = (struct cpuinfo_cache) {
				335	.size = 32 * 1024,
				336	.associativity = 2,
				337	.line_size = 64
				338	};
				339	*l2 = (struct cpuinfo_cache) {
				340	.size = uarch_cores * 512 * 1024,
				341	.associativity = 16,
				342	.line_size = 64
				343	};
				344	break;
Marat Dukhan	ee705c7	2017-05-08 10:18:03 +0000	[diff] [blame]	345	case cpuinfo_uarch_cortex_a53:
				346	/*
				347	* ARM Cortex-A53 MPCore Processor Technical Reference Manual:
				348	* 6.1. About the L1 memory system
Marat Dukhan	c3035ec	2017-05-08 11:08:49 +0000	[diff] [blame]	349	* The L1 memory system consists of separate instruction and data caches. The implementer configures the
Marat Dukhan	ee705c7	2017-05-08 10:18:03 +0000	[diff] [blame]	350	* instruction and data caches independently during implementation, to sizes of 8KB, 16KB, 32KB, or 64KB.
				351	*
				352	* The L1 Instruction memory system has the following key features:
				353	* - Instruction side cache line length of 64 bytes.
				354	* - 2-way set associative L1 Instruction cache.
				355	*
				356	* The L1 Data memory system has the following features:
				357	* - Data side cache line length of 64 bytes.
				358	* - 4-way set associative L1 Data cache.
				359	*
				360	* 7.1. About the L2 memory system
				361	* The L2 memory system consists of an:
				362	* - Optional tightly-coupled L2 cache that includes:
				363	* - Configurable L2 cache size of 128KB, 256KB, 512KB, 1MB and 2MB.
				364	* - Fixed line length of 64 bytes.
				365	* - 16-way set-associative cache structure.
				366	*
				367	* +--------------------+-------+-----------+-----------+-----------+-----------+
				368	* \| Processor model \| Cores \| L1D cache \| L1I cache \| L2 cache \| Reference \|
				369	* +--------------------+-------+-----------+-----------+-----------+-----------+
				370	* \| Broadcom BCM2837 \| 4 \| 16K \| 16K \| 512K \| [1] \|
Marat Dukhan	4780ba5	2017-06-30 18:43:01 -0700	[diff] [blame^]	371	* \| Snapdragon 835 \| 4(+4) \| 32K+64K \| 32K+64K \| 1M(+2M) \| sysfs \|
Marat Dukhan	ee705c7	2017-05-08 10:18:03 +0000	[diff] [blame]	372	* +--------------------+-------+-----------+-----------+-----------+-----------+
				373	*
				374	* [1] https://www.raspberrypi.org/forums/viewtopic.php?f=91&t=145766
				375	*/
Marat Dukhan	f625402	2017-06-30 18:07:11 -0700	[diff] [blame]	376	if (cpu_part == 0x800) {
				377	/* Little cores of Snapdragon 835 */
				378	*l1i = (struct cpuinfo_cache) {
				379	.size = 32 * 1024,
				380	.associativity = 2,
				381	.line_size = 64
				382	};
				383	*l1d = (struct cpuinfo_cache) {
				384	.size = 32 * 1024,
				385	.associativity = 4,
				386	.line_size = 64
				387	};
				388	*l2 = (struct cpuinfo_cache) {
				389	.size = uarch_cores * 256 * 1024,
				390	.associativity = 16,
				391	.line_size = 64
				392	};
				393	} else {
				394	/* Standard Cortex-A53 */
				395	*l1i = (struct cpuinfo_cache) {
				396	.size = 16 * 1024,
				397	.associativity = 2,
				398	.line_size = 64
				399	};
				400	*l1d = (struct cpuinfo_cache) {
				401	.size = 16 * 1024,
				402	.associativity = 4,
				403	.line_size = 64
				404	};
				405	*l2 = (struct cpuinfo_cache) {
				406	.size = uarch_cores * 128 * 1024,
				407	.associativity = 16,
				408	.line_size = 64
				409	};
				410	}
Marat Dukhan	ee705c7	2017-05-08 10:18:03 +0000	[diff] [blame]	411	break;
Marat Dukhan	c3035ec	2017-05-08 11:08:49 +0000	[diff] [blame]	412	case cpuinfo_uarch_cortex_a57:
				413	/*
				414	* ARM Cortex-A57 MPCore Processor Technical Reference Manual:
				415	* 6.1. About the L1 memory system
				416	* The L1 memory system consists of separate instruction and data caches.
				417	*
				418	* The L1 instruction memory system has the following features:
				419	* - 48KB 3-way set-associative instruction cache.
				420	* - Fixed line length of 64 bytes.
				421	*
				422	* The L1 data memory system has the following features:
				423	* - 32KB 2-way set-associative data cache.
				424	* - Fixed line length of 64 bytes.
				425	*
				426	* 7.1 About the L2 memory system
				427	* The features of the L2 memory system include:
				428	* - Configurable L2 cache size of 512KB, 1MB, and 2MB.
				429	* - Fixed line length of 64 bytes.
				430	* - 16-way set-associative cache structure.
				431	* - Inclusion property with L1 data caches.
				432	*
				433	* +--------------------+-------+-----------+-----------+-----------+-----------+
				434	* \| Processor model \| Cores \| L1D cache \| L1I cache \| L2 cache \| Reference \|
				435	* +--------------------+-------+-----------+-----------+-----------+-----------+
				436	* \| Jetson TX1 \| 4 \| 32K \| 48K \| 2M \| [1] \|
				437	* +--------------------+-------+-----------+-----------+-----------+-----------+
				438	*
				439	* [1] https://devblogs.nvidia.com/parallelforall/jetson-tx2-delivers-twice-intelligence-edge/
				440	*/
				441	*l1i = (struct cpuinfo_cache) {
				442	.size = 48 * 1024,
				443	.associativity = 3,
				444	.line_size = 64
				445	};
				446	*l1d = (struct cpuinfo_cache) {
				447	.size = 32 * 1024,
				448	.associativity = 2,
				449	.line_size = 64
				450	};
				451	*l2 = (struct cpuinfo_cache) {
				452	.size = uarch_cores * 512 * 1024,
				453	.associativity = 16,
				454	.line_size = 64,
				455	.flags = CPUINFO_CACHE_INCLUSIVE
				456	};
				457	break;
Marat Dukhan	4780ba5	2017-06-30 18:43:01 -0700	[diff] [blame^]	458	case cpuinfo_uarch_cortex_a73:
				459	/*
				460	* ARM Cortex‑A73 MPCore Processor Technical Reference Manual
				461	* 6.1. About the L1 memory system
				462	* The L1 memory system consists of separate instruction and data caches.
				463	* The size of the instruction cache is 64KB.
				464	* The size of the data cache is configurable to either 32KB or 64KB.
				465	*
				466	* The L1 instruction memory system has the following key features:
				467	* - Virtually Indexed, Physically Tagged (VIPT), four-way set-associative instruction cache.
				468	* - Fixed cache line length of 64 bytes.
				469	*
				470	* The L1 data memory system has the following features:
				471	* - ...the data cache behaves like an eight-way set associative PIPT cache (for 32KB configurations)
				472	* and a 16-way set associative PIPT cache (for 64KB configurations).
				473	* - Fixed cache line length of 64 bytes.
				474	*
				475	* 7.1 About the L2 memory system
				476	* The L2 memory system consists of:
				477	* - A tightly-integrated L2 cache with:
				478	* - A configurable size of 256KB, 512KB, 1MB, 2MB, 4MB, or 8MB.
				479	* - A 16-way, set-associative structure.
				480	* - A fixed line length of 64 bytes.
				481	*
				482	* The ARM Cortex A73 - Artemis Unveiled [1]
				483	* "ARM still envisions that most vendors will choose to use configurations of 1 to
				484	* 2MB in consumer products. The L2 cache is inclusive of the L1 cache. "
				485	*
				486	* +---------------------+---------+-----------+-----------+-----------+-----------+
				487	* \| Processor model \| Cores \| L1D cache \| L1I cache \| L2 cache \| Reference \|
				488	* +---------------------+---------+-----------+-----------+-----------+-----------+
				489	* \| HiSilicon Kirin 960 \| 4(+4) \| 64K+32K \| 64K+32K \| ? \| [2] \|
				490	* \| MediaTek Helio X30 \| 2(+4+4) \| ? \| ? \| ? \| \|
				491	* \| Snapdragon 835 \| 4(+4) \| 64K+32K \| 64K+32K \| 2M(+1M) \| sysfs \|
				492	* +---------------------+---------+-----------+-----------+-----------+-----------+
				493	*
				494	* [1] http://www.anandtech.com/show/10347/arm-cortex-a73-artemis-unveiled/2
				495	* [2] http://www.anandtech.com/show/11088/hisilicon-kirin-960-performance-and-power/3
				496	*/
				497	*l1i = (struct cpuinfo_cache) {
				498	.size = 64 * 1024,
				499	.associativity = 4,
				500	.line_size = 64
				501	};
				502	*l1d = (struct cpuinfo_cache) {
				503	.size = 64 * 1024,
				504	.associativity = 16,
				505	.line_size = 64
				506	};
				507	*l2 = (struct cpuinfo_cache) {
				508	.size = uarch_cores * 512 * 1024,
				509	.associativity = 16,
				510	.line_size = 64,
				511	.flags = CPUINFO_CACHE_INCLUSIVE
				512	};
				513	break;
Marat Dukhan	3c98276	2017-05-08 06:16:45 +0000	[diff] [blame]	514	case cpuinfo_uarch_scorpion:
				515	/*
				516	* - "The CPU includes 32KB instruction and data caches as
				517	* well as a complete memory-management unit (MMU) suitable
				518	* for high-level operating systems. The CPU also has
				519	* 256KB of SRAM that can be allocated in 64KB increments
				520	* to level-two (L2) cache or tightly coupled memory (TCM)." [1]
				521	* We interpret it as L2 cache being 4-way set-associative on single-core Scorpion.
				522	* - L1 Data Cache = 32 KB. 32 B/line. [2]
				523	* - L2 Cache = 256 KB. 128 B/line. [2]
				524	* - 256 KB (single-core) or 512 KB (dual-core) L2 cache [3]
				525	* - Single or dual-core configuration [3]
				526	* - For L1 cache assume the same associativity as Krait
				527	*
				528	* [1] https://www.qualcomm.com/media/documents/files/linley-report-on-dual-core-snapdragon.pdf
				529	* [2] http://www.7-cpu.com/cpu/Snapdragon.html
				530	* [3] https://en.wikipedia.org/wiki/Scorpion_(CPU)
				531	*/
				532	*l1i = (struct cpuinfo_cache) {
				533	.size = 32 * 1024,
				534	.associativity = 4,
				535	.line_size = 32
				536	};
				537	*l1d = (struct cpuinfo_cache) {
				538	.size = 32 * 1024,
				539	.associativity = 4,
				540	.line_size = 32
				541	};
				542	*l2 = (struct cpuinfo_cache) {
				543	.size = uarch_cores * 256 * 1024,
				544	.associativity = 4,
				545	.line_size = 128
				546	};
				547	break;
				548	case cpuinfo_uarch_krait:
				549	/*
				550	* - L0 Data cache = 4 KB. 64 B/line, direct mapped [1]
				551	* - L0 Instruction cache = 4 KB. [1]
				552	* - L1 Data cache = 16 KB. 64 B/line, 4-way [1]
				553	* - L1 Instruction cache = 16 KB, 4-way [1]
				554	* - L2 Cache = 1 MB, 128 B/line, 8-way. Each core has fast access only to 512 KB of L2 cache. [1]
				555	* - L2 = 1MB (dual core) or 2MB (quad core), 8-way set associative [2]
				556	*
				557	* [1] http://www.7-cpu.com/cpu/Krait.html
				558	* [2] http://www.anandtech.com/show/4940/qualcomm-new-snapdragon-s4-msm8960-krait-architecture/2
				559	*/
				560	*l1i = (struct cpuinfo_cache) {
				561	.size = 16 * 1024,
				562	.associativity = 4,
				563	.line_size = 64 /* assume same as L1D */
				564	};
				565	*l1d = (struct cpuinfo_cache) {
				566	.size = 16 * 1024,
				567	.associativity = 4,
				568	.line_size = 64
				569	};
				570	*l2 = (struct cpuinfo_cache) {
				571	.size = uarch_cores * 512 * 1024,
				572	.associativity = 8,
				573	.line_size = 128
				574	};
				575	break;
				576	case cpuinfo_uarch_kryo:
				577	/*
				578	* +-----------------+-------+-----------+-----------+-----------+-----------+
				579	* \| Processor model \| Cores \| L1D cache \| L1I cache \| L2 cache \| Reference \|
				580	* +-----------------+-------+-----------+-----------+-----------+-----------+
				581	* \| Snapdragon 820 \| 2+2 \| 32K \| 32K \| 1M+512K \| [1] \|
				582	* \| Snapdragon 821 \| 2+2 \| 32K \| 32K \| 1M+512K \| [1] \|
Marat Dukhan	3c98276	2017-05-08 06:16:45 +0000	[diff] [blame]	583	* +-----------------+-------+-----------+-----------+-----------+-----------+
				584	*
				585	* [1] http://www.anandtech.com/show/9837/snapdragon-820-preview/2
				586	*/
				587	*l1i = (struct cpuinfo_cache) {
				588	.size = 32 * 1024,
				589	.associativity = 4 /* assume same as Krait */,
				590	.line_size = 64 /* assume same as Krait */
				591	};
				592	*l1d = (struct cpuinfo_cache) {
				593	.size = 32 * 1024,
				594	.associativity = 4 /* assume same as Krait */,
				595	.line_size = 64 /* assume same as Krait */
				596	};
Marat Dukhan	f625402	2017-06-30 18:07:11 -0700	[diff] [blame]	597	if (cpu_part == 0x205) {
				598	/* Kryo "Silver" */
				599	*l2 = (struct cpuinfo_cache) {
				600	.size = uarch_cores * 256 * 1024,
				601	.associativity = 8 /* assume same as Krait */
				602	.line_size = 64 /* assume same as Krait */
				603	};
				604	} else {
				605	/* Kryo "Gold" */
				606	*l2 = (struct cpuinfo_cache) {
				607	.size = uarch_cores * 512 * 1024,
				608	.associativity = 8 /* assume same as Krait */
				609	.line_size = 64 /* assume same as Krait */
				610	};
				611	}
Marat Dukhan	3c98276	2017-05-08 06:16:45 +0000	[diff] [blame]	612	break;
Marat Dukhan	ac57632	2017-05-08 13:08:25 +0000	[diff] [blame]	613	case cpuinfo_uarch_denver:
				614	/*
				615	* The Denver chip includes a 128KB, 4-way level 1 instruction cache, a 64KB, 4-way level 2 data cache,
				616	* and a 2MB, 16-way level 2 cache, all of which can service both cores. [1]
				617	*
				618	* All the caches have 64-byte lines. [2]
				619	*
				620	* [1] http://www.pcworld.com/article/2463900/nvidia-reveals-pc-like-performance-for-denver-tegra-k1.html
				621	* [2] http://linleygroup.com/newsletters/newsletter_detail.php?num=5205&year=2014
				622	*/
				623	*l1i = (struct cpuinfo_cache) {
				624	.size = 128 * 1024,
				625	.associativity = 4,
				626	.line_size = 64
				627	};
				628	*l1d = (struct cpuinfo_cache) {
				629	.size = 64 * 1024,
				630	.associativity = 4,
				631	.line_size = 64
				632	};
				633	*l2 = (struct cpuinfo_cache) {
				634	.size = 2 * 1024 * 1024,
				635	.associativity = 16,
				636	.line_size = 64
				637	};
				638	break;
Marat Dukhan	3c98276	2017-05-08 06:16:45 +0000	[diff] [blame]	639	case cpuinfo_uarch_mongoose:
				640	/*
				641	* - "Moving past branch prediction we can see some elements of how the cache is set up for the L1 I$,
				642	* namely 64 KB split into four sets with 128-byte line sizes for 128 cache lines per set" [1]
				643	* - "For loads and stores, a 32 KB, 8-way set associative cache with 64 byte line size is used" [1]
				644	* - "The L2 cache here is 2MB shared across all cores split into 16 sets. This memory is also split
				645	* into 4 banks and has a 22 cycle latency" [1]
				646	*
				647	* +--------------------+-------+-----------+-----------+-----------+-----------+
				648	* \| Processor model \| Cores \| L1D cache \| L1I cache \| L2 cache \| Reference \|
				649	* +--------------------+-------+-----------+-----------+-----------+-----------+
				650	* \| Exynos 8 Octa 8890 \| 4(+4) \| 64K \| 32K \| 2M \| [1] \|
				651	* \| Exynos 8 Octa 8895 \| 4(+4) \| 64K \| 32K \| 2M \| [2] \|
				652	* +--------------------+-------+-----------+-----------+-----------+-----------+
				653	*
				654	* [1] http://www.anandtech.com/show/10590/hot-chips-2016-exynos-m1-architecture-disclosed
				655	* [2] https://www.extremetech.com/mobile/244949-samsungs-exynos-8895-features-custom-cpu-cores-first-10nm-chip-market
				656	*/
				657	*l1i = (struct cpuinfo_cache) {
				658	.size = 64 * 1024,
				659	.associativity = 4,
				660	.line_size = 128
				661	};
				662	*l1d = (struct cpuinfo_cache) {
				663	.size = 32 * 1024,
				664	.associativity = 8,
				665	.line_size = 64
				666	};
				667	*l2 = (struct cpuinfo_cache) {
				668	.size = 2 * 1024 * 1024,
				669	.associativity = 16,
				670	.line_size = 64
				671	};
				672	break;
Marat Dukhan	92dae31	2017-05-09 14:10:17 +0000	[diff] [blame]	673	case cpuinfo_uarch_thunderx:
				674	/*
				675	* "78K-Icache and 32K-D cache per core, 16 MB shared L2 cache" [1]
				676	*
				677	* [1] https://www.cavium.com/pdfFiles/ThunderX_CP_PB_Rev1.pdf
				678	*/
				679	*l1i = (struct cpuinfo_cache) {
				680	.size = 78 * 1024,
				681	.associativity = 4 /* assumption */,
				682	.line_size = 64 /* assumption */
				683	};
				684	*l1d = (struct cpuinfo_cache) {
				685	.size = 32 * 1024,
				686	.associativity = 4 /* assumption */,
				687	.line_size = 64 /* assumption */
				688	};
				689	*l2 = (struct cpuinfo_cache) {
				690	.size = 16 * 1024 * 1024,
				691	.associativity = 8 /* assumption */,
				692	.line_size = 64 /* assumption */
				693	};
				694	break;
Marat Dukhan	3c98276	2017-05-08 06:16:45 +0000	[diff] [blame]	695	case cpuinfo_uarch_cortex_a12:
				696	case cpuinfo_uarch_cortex_a17:
				697	case cpuinfo_uarch_cortex_a32:
				698	case cpuinfo_uarch_cortex_a35:
Marat Dukhan	3c98276	2017-05-08 06:16:45 +0000	[diff] [blame]	699	case cpuinfo_uarch_cortex_a72:
				700	case cpuinfo_uarch_cortex_a73:
				701	default:
				702	cpuinfo_log_warning("target uarch not recognized; using generic cache parameters");
				703	/* Follow OpenBLAS */
				704	if (arch_version >= 8) {
				705	*l1i = (struct cpuinfo_cache) {
				706	.size = 32 * 1024,
				707	.associativity = 4,
				708	.line_size = 64
				709	};
				710	*l1d = (struct cpuinfo_cache) {
				711	.size = 32 * 1024,
				712	.associativity = 4,
				713	.line_size = 64
				714	};
				715	*l2 = (struct cpuinfo_cache) {
				716	.size = uarch_cores * 256 * 1024,
				717	.associativity = 8,
				718	.line_size = 64
				719	};
				720	} else {
				721	*l1i = (struct cpuinfo_cache) {
				722	.size = 16 * 1024,
				723	.associativity = 4,
				724	.line_size = 32
				725	};
				726	*l1d = (struct cpuinfo_cache) {
				727	.size = 16 * 1024,
				728	.associativity = 4,
				729	.line_size = 32
				730	};
				731	if (arch_version >= 7) {
				732	*l2 = (struct cpuinfo_cache) {
				733	.size = uarch_cores * 128 * 1024,
				734	.associativity = 8,
				735	.line_size = 32
				736	};
				737	}
				738	}
				739	break;
				740	}
				741	l1i->sets = l1i->size / (l1i->associativity * l1i->line_size);
				742	l1i->partitions = 1;
				743	l1d->sets = l1d->size / (l1d->associativity * l1d->line_size);
				744	l1d->partitions = 1;
				745	if (l2->size != 0) {
Marat Dukhan	8ecad1a	2017-05-08 07:21:57 +0000	[diff] [blame]	746	l2->sets = l2->size / (l2->associativity * l2->line_size);
Marat Dukhan	3c98276	2017-05-08 06:16:45 +0000	[diff] [blame]	747	l2->partitions = 1;
				748	}
				749	}