blob: 0225a8ce62bb6655f45069aaaed26baa40003a34 [file] [log] [blame]
Marat Dukhan3c982762017-05-08 06:16:45 +00001#include <stdint.h>
2
3#include <cpuinfo.h>
4#include <log.h>
5#include <arm/api.h>
Marat Dukhana8fb3dd2017-08-09 13:49:39 -07006#include <arm/midr.h>
Marat Dukhan3c982762017-05-08 06:16:45 +00007
8
9void cpuinfo_arm_decode_cache(
10 enum cpuinfo_uarch uarch,
Marat Dukhana8fb3dd2017-08-09 13:49:39 -070011 uint32_t cluster_cores,
12 uint32_t midr,
Marat Dukhan54a5b4d2017-08-25 12:24:57 -070013#ifdef __ANDROID__
14 const struct cpuinfo_arm_chipset chipset[restrict static 1],
15#endif
Marat Dukhan3c982762017-05-08 06:16:45 +000016 uint32_t arch_version,
17 struct cpuinfo_cache l1i[restrict static 1],
18 struct cpuinfo_cache l1d[restrict static 1],
19 struct cpuinfo_cache l2[restrict static 1])
20{
21 switch (uarch) {
22 case cpuinfo_uarch_xscale:
Marat Dukhana8fb3dd2017-08-09 13:49:39 -070023 switch (midr_get_part(midr) >> 8) {
Marat Dukhan3c982762017-05-08 06:16:45 +000024 case 2:
25 /*
26 * PXA 210/25X/26X
27 *
28 * See "Computer Organization and Design, Revised Printing: The Hardware/Software Interface"
29 * by David A. Patterson, John L. Hennessy
30 */
31 *l1i = (struct cpuinfo_cache) {
32 .size = 16 * 1024,
33 .associativity = 32,
34 .line_size = 32
35 };
36 *l1d = (struct cpuinfo_cache) {
37 .size = 16 * 1024,
38 .associativity = 4,
39 .line_size = 64
40 };
41 break;
42 case 4:
43 /* PXA 27X */
44 *l1i = (struct cpuinfo_cache) {
45 .size = 32 * 1024,
46 .associativity = 32,
47 .line_size = 32
48 };
49 *l1d = (struct cpuinfo_cache) {
50 .size = 32 * 1024,
51 .associativity = 32,
52 .line_size = 32
53 };
54 break;
55 case 6:
56 /*
57 * PXA 3XX
58 *
59 * See http://download.intel.com/design/intelxscale/31628302.pdf
60 */
61 *l1i = (struct cpuinfo_cache) {
62 .size = 32 * 1024,
63 .associativity = 4,
64 .line_size = 32
65 };
66 *l1d = (struct cpuinfo_cache) {
67 .size = 32 * 1024,
68 .associativity = 4,
69 .line_size = 32
70 };
71 *l2 = (struct cpuinfo_cache) {
72 .size = 256 * 1024,
73 .associativity = 8,
74 .line_size = 32
75 };
76 break;
77 }
78 break;
79 case cpuinfo_uarch_arm11:
80 *l1i = (struct cpuinfo_cache) {
81 .size = 16 * 1024,
82 .associativity = 4,
83 .line_size = 32
84 };
85 *l1d = (struct cpuinfo_cache) {
86 .size = 16 * 1024,
87 .associativity = 4,
88 .line_size = 32
89 };
90 break;
91 case cpuinfo_uarch_cortex_a5:
92 /*
93 * Cortex-A5 Technical Reference Manual:
94 * 7.1.1. Memory system
95 * The Cortex-A5 processor has separate instruction and data caches.
96 * The caches have the following features:
97 * - Data cache is 4-way set-associative.
98 * - Instruction cache is 2-way set-associative.
99 * - The cache line length is eight words.
100 * - You can configure the instruction and data caches independently during implementation
101 * to sizes of 4KB, 8KB, 16KB, 32KB, or 64KB.
102 * 1.1.3. System design components
103 * PrimeCell Level 2 Cache Controller (PL310)
104 * The addition of an on-chip secondary cache, also referred to as a Level 2 or L2 cache, is a
105 * recognized method of improving the performance of ARM-based systems when significant memory traffic
106 * is generated by the processor. The PrimeCell Level 2 Cache Controller reduces the number of external
107 * memory accesses and has been optimized for use with the Cortex-A5 processor.
108 * 8.1.7. Exclusive L2 cache
109 * The Cortex-A5 processor can be connected to an L2 cache that supports an exclusive cache mode.
110 * This mode must be activated both in the Cortex-A5 processor and in the L2 cache controller.
111 *
112 * +--------------------+-----------+-----------+----------+-----------+
113 * | Processor model | L1D cache | L1I cache | L2 cache | Reference |
114 * +--------------------+-----------+-----------+----------+-----------+
115 * | Qualcomm MSM7225A | | | | |
116 * | Qualcomm MSM7625A | | | | |
117 * | Qualcomm MSM7227A | | | | |
118 * | Qualcomm MSM7627A | 32K | 32K | 256K | Wiki [1] |
119 * | Qualcomm MSM7225AB | | | | |
120 * | Qualcomm MSM7225AB | | | | |
121 * | Qualcomm QSD8250 | | | | |
122 * | Qualcomm QSD8650 | | | | |
123 * +--------------------+-----------+-----------+----------+-----------+
124 * | Spreadtrum SC6821 | 32K | 32K | ? | |
125 * | Spreadtrum SC6825 | 32K | 32K | 256K | Wiki [2] |
126 * | Spreadtrum SC8810 | ? | ? | ? | |
127 * | Spreadtrum SC8825 | 32K | 32K | ? | |
128 * +--------------------+-----------+-----------+----------+-----------+
129 *
130 * [1] https://en.wikipedia.org/wiki/List_of_Qualcomm_Snapdragon_systems-on-chip#Snapdragon_S1
131 * [2] https://en.wikipedia.org/wiki/Spreadtrum
132 */
133 *l1i = (struct cpuinfo_cache) {
134 .size = 32 * 1024,
135 .associativity = 2,
136 .line_size = 32
137 };
138 *l1d = (struct cpuinfo_cache) {
139 .size = 32 * 1024,
140 .associativity = 4,
141 .line_size = 32
142 };
143 *l2 = (struct cpuinfo_cache) {
144 .size = 256 * 1024,
145 /*
146 * Follow NXP specification: "Eight-way set-associative 512 kB L2 cache with 32B line size"
147 * Reference: http://www.nxp.com/assets/documents/data/en/application-notes/AN4947.pdf
148 */
149 .associativity = 8,
150 .line_size = 32
151 };
152 break;
153 case cpuinfo_uarch_cortex_a7:
154 /*
155 * Cortex-A7 MPCore Technical Reference Manual:
156 * 6.1. About the L1 memory system
157 * The L1 memory system consists of separate instruction and data caches. You can configure the
158 * instruction and data caches independently during implementation to sizes of 8KB, 16KB, 32KB, or 64KB.
159 *
160 * The L1 instruction memory system has the following features:
161 * - Instruction side cache line length of 32-bytes.
162 * - 2-way set-associative instruction cache.
163 *
164 * The L1 data memory system has the following features:
165 * - Data side cache line length of 64-bytes.
166 * - 4-way set-associative data cache.
167 *
168 * 7.1. About the L2 Memory system
169 * The L2 memory system consists of an:
170 * - Optional tightly-coupled L2 cache that includes:
171 * - Configurable L2 cache size of 128KB, 256KB, 512KB, and 1MB.
172 *
173 * +--------------------+-------+-----------+-----------+-----------+-----------+
174 * | Processor model | Cores | L1D cache | L1I cache | L2 cache | Reference |
175 * +--------------------+-------+-----------+-----------+-----------+-----------+
176 * | Allwinner A20 | 2 | 32K | 32K | 256K | [1] |
177 * | Allwinner A23 | 2 | 32K | 32K | 256K | [2] |
178 * | Allwinner A31 | 4 | 32K | 32K | 1M | [3] |
179 * | Allwinner A31s | 4 | 32K | 32K | 1M | [4] |
180 * | Allwinner A33 | 4 | 32K | 32K | 512K | [5] |
181 * | Allwinner A80 Octa | 4(+4) | 32K | 32K | 512K(+2M) | [6] |
182 * | Allwinner A81T | 8 | 32K | 32K | 1M | [7] |
183 * +--------------------+-------+-----------+-----------+-----------+-----------+
184 * | Broadcom BCM2836 | 4 | 32K | 32K | 512K | [8] |
185 * +--------------------+-------+-----------+-----------+-----------+-----------+
186 *
187 * [1] https://linux-sunxi.org/A20
188 * [2] https://linux-sunxi.org/A23
189 * [3] http://dl.linux-sunxi.org/A31/A3x_release_document/A31/IC/A31%20datasheet%20V1.3%2020131106.pdf
190 * [4] https://github.com/allwinner-zh/documents/blob/master/A31s/A31s_Datasheet_v1.5_20150510.pdf
191 * [5] http://dl.linux-sunxi.org/A33/A33_Datasheet_release1.0.pdf
192 * [6] https://linux-sunxi.org/images/1/10/A80_Datasheet_Revision_1.0_0404.pdf
193 * [7] http://dl.linux-sunxi.org/A83T/A83T_datasheet_Revision_1.1.pdf
194 * [8] https://www.raspberrypi.org/forums/viewtopic.php?t=98428
195 */
196 *l1i = (struct cpuinfo_cache) {
197 .size = 32 * 1024,
198 .associativity = 2,
199 .line_size = 32
200 };
201 *l1d = (struct cpuinfo_cache) {
202 .size = 32 * 1024,
203 .associativity = 4,
204 .line_size = 64
205 };
206 *l2 = (struct cpuinfo_cache) {
Marat Dukhana8fb3dd2017-08-09 13:49:39 -0700207 .size = 128 * 1024 * cluster_cores,
Marat Dukhan3c982762017-05-08 06:16:45 +0000208 .associativity = 8,
209 .line_size = 64
210 };
211 break;
212 case cpuinfo_uarch_cortex_a8:
213 /*
214 * Cortex-A8 Technical Reference Manual:
215 * 7.1. About the L1 memory system
216 * The L1 memory system consists of separate instruction and data caches in a Harvard arrangement.
217 * The L1 memory system provides the core with:
218 * - fixed line length of 64 bytes
219 * - support for 16KB or 32KB caches
220 * - 4-way set associative cache structure
221 * 8.1. About the L2 memory system
222 * The L2 memory system is tightly coupled to the L1 data cache and L1 instruction cache.
223 * The key features of the L2 memory system include:
224 * - configurable cache size of 0KB, 128KB, 256KB, 512KB, and 1MB
225 * - fixed line length of 64 bytes
226 * - 8-way set associative cache structure
Marat Dukhan6d996052017-05-08 11:31:57 +0000227 *
Marat Dukhan419a8192017-05-08 12:25:17 +0000228 * +----------------------+-----------+-----------+-----------+-----------+
229 * | Processor model | L1D cache | L1I cache | L2 cache | Reference |
230 * +----------------------+-----------+-----------+-----------+-----------+
231 * | Exynos 3 Single 3110 | 32K | 32K | 512K | [1] |
232 * +----------------------+-----------+-----------+-----------+-----------+
233 * | TI DM 3730 | 32K | 32K | 256K | [2] |
234 * +----------------------+-----------+-----------+-----------+-----------+
Marat Dukhan6d996052017-05-08 11:31:57 +0000235 *
Marat Dukhan419a8192017-05-08 12:25:17 +0000236 * [1] https://en.wikichip.org/w/images/0/04/Exynos_3110.pdf
237 * [2] https://www.ti.com/lit/ds/symlink/dm3725.pdf
Marat Dukhan3c982762017-05-08 06:16:45 +0000238 */
239 *l1i = (struct cpuinfo_cache) {
Marat Dukhan6d996052017-05-08 11:31:57 +0000240 .size = 32 * 1024,
Marat Dukhan3c982762017-05-08 06:16:45 +0000241 .associativity = 4,
242 .line_size = 64
243 };
244 *l1d = (struct cpuinfo_cache) {
Marat Dukhan6d996052017-05-08 11:31:57 +0000245 .size = 32 * 1024,
Marat Dukhan3c982762017-05-08 06:16:45 +0000246 .associativity = 4,
247 .line_size = 64
248 };
249 *l2 = (struct cpuinfo_cache) {
Marat Dukhan6d996052017-05-08 11:31:57 +0000250 .size = 256 * 1024,
Marat Dukhan3c982762017-05-08 06:16:45 +0000251 .associativity = 8,
252 .line_size = 64
253 };
254 break;
255 case cpuinfo_uarch_cortex_a9:
256 /*
257 * ARM Cortex‑A9 Technical Reference Manual:
258 * 7.1.1 Memory system
259 * The Cortex‑A9 processor has separate instruction and data caches.
260 * The caches have the following features:
261 * - Both caches are 4-way set-associative.
262 * - The cache line length is eight words.
263 * - You can configure the instruction and data caches independently during implementation
264 * to sizes of 16KB, 32KB, or 64KB.
265 * 8.1.5 Exclusive L2 cache
266 * The Cortex‑A9 processor can be connected to an L2 cache that supports an exclusive cache mode.
267 * This mode must be activated both in the Cortex‑A9 processor and in the L2 cache controller.
268 *
269 * +--------------------+-------+-----------+-----------+-----------+-----------+
270 * | Processor model | Cores | L1D cache | L1I cache | L2 cache | Reference |
271 * +--------------------+-------+-----------+-----------+-----------+-----------+
272 * | Exynos 4 Dual 4210 | 2 | 32K | 32K | 1M | [1] |
273 * | Exynos 4 Dual 4212 | 2 | 32K | 32K | 1M | [2] |
274 * | Exynos 4 Quad 4412 | 4 | 32K | 32K | 1M | [3] |
275 * | Exynos 4 Quad 4415 | 4 | 32K | 32K | 1M | |
276 * +--------------------+-------+-----------+-----------+-----------+-----------+
277 *
278 * [1] http://www.samsung.com/global/business/semiconductor/file/product/Exynos_4_Dual_45nm_User_Manaul_Public_REV1.00-0.pdf
279 * [2] http://www.samsung.com/global/business/semiconductor/file/product/Exynos_4_Dual_32nm_User_Manaul_Public_REV100-0.pdf
280 * [3] http://www.samsung.com/global/business/semiconductor/file/product/Exynos_4_Quad_User_Manaul_Public_REV1.00-0.pdf
281 */
282
283 /* Use Exynos 4 specs */
284 *l1i = (struct cpuinfo_cache) {
285 .size = 32 * 1024,
286 .associativity = 4,
287 .line_size = 32
288 };
289 *l1d = (struct cpuinfo_cache) {
290 .size = 32 * 1024,
291 .associativity = 4,
292 .line_size = 32
293 };
294 *l2 = (struct cpuinfo_cache) {
295 .size = 1024 * 1024,
Marat Dukhan1b5c6ba2017-05-08 10:35:27 +0000296 /* OMAP4460 in Pandaboard ES has 16-way set-associative L2 cache */
297 .associativity = 16,
Marat Dukhan3c982762017-05-08 06:16:45 +0000298 .line_size = 32
299 };
300 break;
301 case cpuinfo_uarch_cortex_a15:
302 /*
303 * 6.1. About the L1 memory system
304 * The L1 memory system consists of separate instruction and data caches.
305 * The L1 instruction memory system has the following features:
306 * - 32KB 2-way set-associative instruction cache.
307 * - Fixed line length of 64 bytes.
308 * The L1 data memory system has the following features:
309 * - 32KB 2-way set-associative data cache.
310 * - Fixed line length of 64 bytes.
311 * 7.1. About the L2 memory system
312 * The features of the L2 memory system include:
313 * - Configurable L2 cache size of 512KB, 1MB, 2MB and 4MB.
314 * - Fixed line length of 64 bytes.
315 * - 16-way set-associative cache structure.
316 *
317 * +--------------------+-------+-----------+-----------+-----------+-----------+
318 * | Processor model | Cores | L1D cache | L1I cache | L2 cache | Reference |
319 * +--------------------+-------+-----------+-----------+-----------+-----------+
320 * | Exynos 5 Dual 5250 | 2 | 32K | 32K | 1M | [1] |
321 * | Exynos 5 Hexa 5260 | 2(+4) | 32K | 32K | 1M(+512K) | [2] |
322 * | Exynos 5 Octa 5410 | 4(+4) | 32K | 32K | 2M(+512K) | [3] |
323 * | Exynos 5 Octa 5420 | 4(+4) | 32K | 32K | 2M(+512K) | [3] |
324 * | Exynos 5 Octa 5422 | 4(+4) | 32K | 32K | 2M(+512K) | [3] |
325 * | Exynos 5 Octa 5430 | 4(+4) | 32K | 32K | 2M(+512K) | [3] |
326 * | Exynos 5 Octa 5800 | 4(+4) | 32K | 32K | 2M(+512K) | [3] |
327 * +--------------------+-------+-----------+-----------+-----------+-----------+
328 *
329 * [1] http://www.arndaleboard.org/wiki/downloads/supports/Exynos_5_Dual_User_Manaul_Public_REV1.00.pdf
330 * [2] http://www.yicsystem.com/wp-content/uploads/2014/08/Espresso5260P-Guide-Book.pdf
331 * [3] http://www.anandtech.com/show/6768/samsung-details-exynos-5-octa-architecture-power-at-isscc-13
332 */
333 *l1i = (struct cpuinfo_cache) {
334 .size = 32 * 1024,
335 .associativity = 2,
336 .line_size = 64
337 };
338 *l1d = (struct cpuinfo_cache) {
339 .size = 32 * 1024,
340 .associativity = 2,
341 .line_size = 64
342 };
343 *l2 = (struct cpuinfo_cache) {
Marat Dukhana8fb3dd2017-08-09 13:49:39 -0700344 .size = cluster_cores * 512 * 1024,
Marat Dukhan3c982762017-05-08 06:16:45 +0000345 .associativity = 16,
346 .line_size = 64
347 };
348 break;
Marat Dukhanee705c72017-05-08 10:18:03 +0000349 case cpuinfo_uarch_cortex_a53:
350 /*
351 * ARM Cortex-A53 MPCore Processor Technical Reference Manual:
352 * 6.1. About the L1 memory system
Marat Dukhanc3035ec2017-05-08 11:08:49 +0000353 * The L1 memory system consists of separate instruction and data caches. The implementer configures the
Marat Dukhanee705c72017-05-08 10:18:03 +0000354 * instruction and data caches independently during implementation, to sizes of 8KB, 16KB, 32KB, or 64KB.
355 *
356 * The L1 Instruction memory system has the following key features:
357 * - Instruction side cache line length of 64 bytes.
358 * - 2-way set associative L1 Instruction cache.
359 *
360 * The L1 Data memory system has the following features:
361 * - Data side cache line length of 64 bytes.
362 * - 4-way set associative L1 Data cache.
363 *
364 * 7.1. About the L2 memory system
365 * The L2 memory system consists of an:
366 * - Optional tightly-coupled L2 cache that includes:
367 * - Configurable L2 cache size of 128KB, 256KB, 512KB, 1MB and 2MB.
368 * - Fixed line length of 64 bytes.
369 * - 16-way set-associative cache structure.
370 *
371 * +--------------------+-------+-----------+-----------+-----------+-----------+
372 * | Processor model | Cores | L1D cache | L1I cache | L2 cache | Reference |
373 * +--------------------+-------+-----------+-----------+-----------+-----------+
374 * | Broadcom BCM2837 | 4 | 16K | 16K | 512K | [1] |
Marat Dukhanc11876d2017-08-10 10:38:01 -0700375 * | Exynos 7420 | 4(+4) | 32K | 32K | 256K | [2, 3] |
Marat Dukhan54a5b4d2017-08-25 12:24:57 -0700376 * | Exynos 8890 | 4(+4) | 32K | 32K | 256K | [4] |
Marat Dukhanc11876d2017-08-10 10:38:01 -0700377 * | Snapdragon 410 | 4 | 32K | 32K | 512K | [3] |
Marat Dukhan4780ba52017-06-30 18:43:01 -0700378 * | Snapdragon 835 | 4(+4) | 32K+64K | 32K+64K | 1M(+2M) | sysfs |
Marat Dukhan54a5b4d2017-08-25 12:24:57 -0700379 * | Kirin 620 | 4+4 | 32K | 32K | 512K | [5] |
Marat Dukhanee705c72017-05-08 10:18:03 +0000380 * +--------------------+-------+-----------+-----------+-----------+-----------+
381 *
382 * [1] https://www.raspberrypi.org/forums/viewtopic.php?f=91&t=145766
Marat Dukhanc11876d2017-08-10 10:38:01 -0700383 * [2] http://www.anandtech.com/show/9330/exynos-7420-deep-dive/2
384 * [3] https://www.usenix.org/system/files/conference/usenixsecurity16/sec16_paper_lipp.pdf
Marat Dukhan54a5b4d2017-08-25 12:24:57 -0700385 * [4] http://www.boardset.com/products/products_v8890.php
386 * [5] http://mirror.lemaker.org/Hi6220V100_Multi-Mode_Application_Processor_Function_Description.pdf
Marat Dukhanee705c72017-05-08 10:18:03 +0000387 */
Marat Dukhana8fb3dd2017-08-09 13:49:39 -0700388 if (midr_is_kryo280_silver(midr)) {
Marat Dukhanf6254022017-06-30 18:07:11 -0700389 /* Little cores of Snapdragon 835 */
390 *l1i = (struct cpuinfo_cache) {
391 .size = 32 * 1024,
392 .associativity = 2,
393 .line_size = 64
394 };
395 *l1d = (struct cpuinfo_cache) {
396 .size = 32 * 1024,
397 .associativity = 4,
398 .line_size = 64
399 };
400 *l2 = (struct cpuinfo_cache) {
Marat Dukhan3014efb2017-08-25 17:25:55 -0700401 .size = 1024 * 1024,
Marat Dukhanf6254022017-06-30 18:07:11 -0700402 .associativity = 16,
403 .line_size = 64
404 };
405 } else {
406 /* Standard Cortex-A53 */
Marat Dukhan54a5b4d2017-08-25 12:24:57 -0700407
408 /* Use conservative values by default */
409 size_t l1_size = 16 * 1024;
410 size_t l2_size = 256 * 1024;
411#ifdef __ANDROID__
412 switch (chipset->vendor) {
413 case cpuinfo_arm_chipset_vendor_qualcomm:
414 case cpuinfo_arm_chipset_vendor_hisilicon:
415 l1_size = 32 * 1024;
416 l2_size = 512 * 1024;
417 break;
418 case cpuinfo_arm_chipset_vendor_samsung:
419 l1_size = 32 * 1024;
420 break;
421 default:
422 /* Silence compiler warning about unhandled enum values */
423 break;
424 }
425#endif
426
Marat Dukhanf6254022017-06-30 18:07:11 -0700427 *l1i = (struct cpuinfo_cache) {
Marat Dukhan54a5b4d2017-08-25 12:24:57 -0700428 .size = l1_size,
Marat Dukhanf6254022017-06-30 18:07:11 -0700429 .associativity = 2,
430 .line_size = 64
431 };
432 *l1d = (struct cpuinfo_cache) {
Marat Dukhan54a5b4d2017-08-25 12:24:57 -0700433 .size = l1_size,
Marat Dukhanf6254022017-06-30 18:07:11 -0700434 .associativity = 4,
435 .line_size = 64
436 };
437 *l2 = (struct cpuinfo_cache) {
Marat Dukhan54a5b4d2017-08-25 12:24:57 -0700438 .size = l2_size,
Marat Dukhanf6254022017-06-30 18:07:11 -0700439 .associativity = 16,
440 .line_size = 64
441 };
442 }
Marat Dukhanee705c72017-05-08 10:18:03 +0000443 break;
Marat Dukhanc3035ec2017-05-08 11:08:49 +0000444 case cpuinfo_uarch_cortex_a57:
445 /*
446 * ARM Cortex-A57 MPCore Processor Technical Reference Manual:
447 * 6.1. About the L1 memory system
448 * The L1 memory system consists of separate instruction and data caches.
449 *
450 * The L1 instruction memory system has the following features:
451 * - 48KB 3-way set-associative instruction cache.
452 * - Fixed line length of 64 bytes.
453 *
454 * The L1 data memory system has the following features:
455 * - 32KB 2-way set-associative data cache.
456 * - Fixed line length of 64 bytes.
457 *
458 * 7.1 About the L2 memory system
459 * The features of the L2 memory system include:
460 * - Configurable L2 cache size of 512KB, 1MB, and 2MB.
461 * - Fixed line length of 64 bytes.
462 * - 16-way set-associative cache structure.
463 * - Inclusion property with L1 data caches.
464 *
465 * +--------------------+-------+-----------+-----------+-----------+-----------+
466 * | Processor model | Cores | L1D cache | L1I cache | L2 cache | Reference |
467 * +--------------------+-------+-----------+-----------+-----------+-----------+
Marat Dukhanc11876d2017-08-10 10:38:01 -0700468 * | Snapdragon 810 | 4(+4) | 32K | 48K | 2M | [1] |
469 * | Exynos 7420 | 4(+4) | 32K | 48K | 2M | [2] |
470 * | Jetson TX1 | 4 | 32K | 48K | 2M | [3] |
Marat Dukhanc3035ec2017-05-08 11:08:49 +0000471 * +--------------------+-------+-----------+-----------+-----------+-----------+
472 *
Marat Dukhanc11876d2017-08-10 10:38:01 -0700473 * [1] http://www.anandtech.com/show/9837/snapdragon-820-preview
474 * [2] http://www.anandtech.com/show/9330/exynos-7420-deep-dive/2
475 * [3] https://devblogs.nvidia.com/parallelforall/jetson-tx2-delivers-twice-intelligence-edge/
Marat Dukhanc3035ec2017-05-08 11:08:49 +0000476 */
477 *l1i = (struct cpuinfo_cache) {
478 .size = 48 * 1024,
479 .associativity = 3,
480 .line_size = 64
481 };
482 *l1d = (struct cpuinfo_cache) {
483 .size = 32 * 1024,
484 .associativity = 2,
485 .line_size = 64
486 };
487 *l2 = (struct cpuinfo_cache) {
Marat Dukhana8fb3dd2017-08-09 13:49:39 -0700488 .size = cluster_cores * 512 * 1024,
Marat Dukhanc3035ec2017-05-08 11:08:49 +0000489 .associativity = 16,
490 .line_size = 64,
491 .flags = CPUINFO_CACHE_INCLUSIVE
492 };
493 break;
Marat Dukhanf822e712017-06-30 20:22:23 -0700494 case cpuinfo_uarch_cortex_a72:
495 /*
496 * ARM® Cortex-A72 MPCore Processor Technical Reference Manual
497 * 6.1. About the L1 memory system
498 * The L1 memory system consists of separate instruction and data caches.
499 *
500 * The L1 instruction memory system has the following features:
501 * - 48KB 3-way set-associative instruction cache.
502 * - Fixed line length of 64 bytes.
503 *
504 * The L1 data memory system has the following features:
505 * - 32KB 2-way set-associative data cache.
506 * - Fixed cache line length of 64 bytes.
507 *
508 * 7.1 About the L2 memory system
509 * The features of the L2 memory system include:
510 * - Configurable L2 cache size of 512KB, 1MB, 2MB and 4MB.
511 * - Fixed line length of 64 bytes.
512 * - Banked pipeline structures.
513 * - Inclusion property with L1 data caches.
514 * - 16-way set-associative cache structure.
515 *
Marat Dukhanf822e712017-06-30 20:22:23 -0700516 * +---------------------+---------+-----------+-----------+------------+-----------+
517 * | Processor model | Cores | L1D cache | L1I cache | L2 cache | Reference |
518 * +---------------------+---------+-----------+-----------+------------+-----------+
Marat Dukhan54a5b4d2017-08-25 12:24:57 -0700519 * | Snapdragon 650 | 2(+4) | 32K(+32K) | 48K(+32K) | 1M(+512K) | [1] |
520 * | Snapdragon 652 | 4(+4) | 32K(+32K) | 48K(+32K) | 1M(+512K) | [2] |
521 * | Snapdragon 653 | 4(+4) | 32K(+32K) | 48K(+32K) | 1M(+512K) | [3] |
522 * | HiSilicon Kirin 950 | 4(+4) | 32K+32K | 48K+32K | ? | |
523 * | HiSilicon Kirin 955 | 4(+4) | 32K+32K | 48K+32K | ? | |
Marat Dukhanf822e712017-06-30 20:22:23 -0700524 * | MediaTek Helio X20 | 2(+4+4) | ? | ? | ? | |
525 * | MediaTek Helio X23 | 2(+4+4) | ? | ? | ? | |
526 * | MediaTek Helio X25 | 2(+4+4) | ? | ? | ? | |
527 * | MediaTek Helio X27 | 2(+4+4) | ? | ? | ? | |
528 * +---------------------+---------+-----------+-----------+------------+-----------+
529 *
530 * [1] http://pdadb.net/index.php?m=processor&id=578&c=qualcomm_snapdragon_618_msm8956__snapdragon_650
531 * [2] http://pdadb.net/index.php?m=processor&id=667&c=qualcomm_snapdragon_620_apq8076__snapdragon_652
532 * [3] http://pdadb.net/index.php?m=processor&id=692&c=qualcomm_snapdragon_653_msm8976sg__msm8976_pro
533 */
534 *l1i = (struct cpuinfo_cache) {
535 .size = 48 * 1024,
536 .associativity = 3,
537 .line_size = 64
538 };
539 *l1d = (struct cpuinfo_cache) {
540 .size = 32 * 1024,
541 .associativity = 2,
542 .line_size = 64
543 };
544 *l2 = (struct cpuinfo_cache) {
545 .size = 1024 * 1024,
546 .associativity = 16,
547 .line_size = 64,
548 .flags = CPUINFO_CACHE_INCLUSIVE
549 };
550 break;
Marat Dukhan4780ba52017-06-30 18:43:01 -0700551 case cpuinfo_uarch_cortex_a73:
552 /*
553 * ARM Cortex‑A73 MPCore Processor Technical Reference Manual
554 * 6.1. About the L1 memory system
555 * The L1 memory system consists of separate instruction and data caches.
556 * The size of the instruction cache is 64KB.
557 * The size of the data cache is configurable to either 32KB or 64KB.
558 *
559 * The L1 instruction memory system has the following key features:
560 * - Virtually Indexed, Physically Tagged (VIPT), four-way set-associative instruction cache.
561 * - Fixed cache line length of 64 bytes.
562 *
563 * The L1 data memory system has the following features:
564 * - ...the data cache behaves like an eight-way set associative PIPT cache (for 32KB configurations)
565 * and a 16-way set associative PIPT cache (for 64KB configurations).
566 * - Fixed cache line length of 64 bytes.
567 *
568 * 7.1 About the L2 memory system
569 * The L2 memory system consists of:
570 * - A tightly-integrated L2 cache with:
571 * - A configurable size of 256KB, 512KB, 1MB, 2MB, 4MB, or 8MB.
572 * - A 16-way, set-associative structure.
573 * - A fixed line length of 64 bytes.
574 *
575 * The ARM Cortex A73 - Artemis Unveiled [1]
576 * "ARM still envisions that most vendors will choose to use configurations of 1 to
577 * 2MB in consumer products. The L2 cache is inclusive of the L1 cache. "
578 *
579 * +---------------------+---------+-----------+-----------+-----------+-----------+
580 * | Processor model | Cores | L1D cache | L1I cache | L2 cache | Reference |
581 * +---------------------+---------+-----------+-----------+-----------+-----------+
582 * | HiSilicon Kirin 960 | 4(+4) | 64K+32K | 64K+32K | ? | [2] |
Marat Dukhanf822e712017-06-30 20:22:23 -0700583 * | MediaTek Helio X30 | 2(+4+4) | ? | 64K+ ? | ? | |
Marat Dukhan4780ba52017-06-30 18:43:01 -0700584 * | Snapdragon 835 | 4(+4) | 64K+32K | 64K+32K | 2M(+1M) | sysfs |
585 * +---------------------+---------+-----------+-----------+-----------+-----------+
586 *
587 * [1] http://www.anandtech.com/show/10347/arm-cortex-a73-artemis-unveiled/2
588 * [2] http://www.anandtech.com/show/11088/hisilicon-kirin-960-performance-and-power/3
589 */
590 *l1i = (struct cpuinfo_cache) {
591 .size = 64 * 1024,
592 .associativity = 4,
593 .line_size = 64
594 };
595 *l1d = (struct cpuinfo_cache) {
596 .size = 64 * 1024,
597 .associativity = 16,
598 .line_size = 64
599 };
600 *l2 = (struct cpuinfo_cache) {
Marat Dukhana8fb3dd2017-08-09 13:49:39 -0700601 .size = cluster_cores * 512 * 1024,
Marat Dukhan4780ba52017-06-30 18:43:01 -0700602 .associativity = 16,
603 .line_size = 64,
604 .flags = CPUINFO_CACHE_INCLUSIVE
605 };
606 break;
Marat Dukhan3c982762017-05-08 06:16:45 +0000607 case cpuinfo_uarch_scorpion:
608 /*
609 * - "The CPU includes 32KB instruction and data caches as
610 * well as a complete memory-management unit (MMU) suitable
611 * for high-level operating systems. The CPU also has
612 * 256KB of SRAM that can be allocated in 64KB increments
613 * to level-two (L2) cache or tightly coupled memory (TCM)." [1]
614 * We interpret it as L2 cache being 4-way set-associative on single-core Scorpion.
615 * - L1 Data Cache = 32 KB. 32 B/line. [2]
616 * - L2 Cache = 256 KB. 128 B/line. [2]
617 * - 256 KB (single-core) or 512 KB (dual-core) L2 cache [3]
618 * - Single or dual-core configuration [3]
619 * - For L1 cache assume the same associativity as Krait
620 *
621 * [1] https://www.qualcomm.com/media/documents/files/linley-report-on-dual-core-snapdragon.pdf
622 * [2] http://www.7-cpu.com/cpu/Snapdragon.html
623 * [3] https://en.wikipedia.org/wiki/Scorpion_(CPU)
624 */
625 *l1i = (struct cpuinfo_cache) {
626 .size = 32 * 1024,
627 .associativity = 4,
628 .line_size = 32
629 };
630 *l1d = (struct cpuinfo_cache) {
631 .size = 32 * 1024,
632 .associativity = 4,
633 .line_size = 32
634 };
635 *l2 = (struct cpuinfo_cache) {
Marat Dukhana8fb3dd2017-08-09 13:49:39 -0700636 .size = cluster_cores * 256 * 1024,
Marat Dukhan3c982762017-05-08 06:16:45 +0000637 .associativity = 4,
638 .line_size = 128
639 };
640 break;
641 case cpuinfo_uarch_krait:
642 /*
643 * - L0 Data cache = 4 KB. 64 B/line, direct mapped [1]
644 * - L0 Instruction cache = 4 KB. [1]
645 * - L1 Data cache = 16 KB. 64 B/line, 4-way [1]
646 * - L1 Instruction cache = 16 KB, 4-way [1]
647 * - L2 Cache = 1 MB, 128 B/line, 8-way. Each core has fast access only to 512 KB of L2 cache. [1]
648 * - L2 = 1MB (dual core) or 2MB (quad core), 8-way set associative [2]
649 *
650 * [1] http://www.7-cpu.com/cpu/Krait.html
651 * [2] http://www.anandtech.com/show/4940/qualcomm-new-snapdragon-s4-msm8960-krait-architecture/2
652 */
653 *l1i = (struct cpuinfo_cache) {
654 .size = 16 * 1024,
655 .associativity = 4,
656 .line_size = 64 /* assume same as L1D */
657 };
658 *l1d = (struct cpuinfo_cache) {
659 .size = 16 * 1024,
660 .associativity = 4,
661 .line_size = 64
662 };
663 *l2 = (struct cpuinfo_cache) {
Marat Dukhana8fb3dd2017-08-09 13:49:39 -0700664 .size = cluster_cores * 512 * 1024,
Marat Dukhan3c982762017-05-08 06:16:45 +0000665 .associativity = 8,
666 .line_size = 128
667 };
668 break;
669 case cpuinfo_uarch_kryo:
670 /*
671 * +-----------------+-------+-----------+-----------+-----------+-----------+
672 * | Processor model | Cores | L1D cache | L1I cache | L2 cache | Reference |
673 * +-----------------+-------+-----------+-----------+-----------+-----------+
Marat Dukhan54a5b4d2017-08-25 12:24:57 -0700674 * | Snapdragon 820 | 2+2 | ? | ? | 1M+512K | [1] |
675 * | Snapdragon 821 | 2+2 | ? | ? | 1M+512K | [1] |
Marat Dukhan3c982762017-05-08 06:16:45 +0000676 * +-----------------+-------+-----------+-----------+-----------+-----------+
677 *
678 * [1] http://www.anandtech.com/show/9837/snapdragon-820-preview/2
679 */
680 *l1i = (struct cpuinfo_cache) {
Marat Dukhan54a5b4d2017-08-25 12:24:57 -0700681 .size = 32 * 1024 /* TODO: verify */,
682 .associativity = 4,
683 .line_size = 64
Marat Dukhan3c982762017-05-08 06:16:45 +0000684 };
685 *l1d = (struct cpuinfo_cache) {
Marat Dukhan54a5b4d2017-08-25 12:24:57 -0700686 .size = 24 * 1024 /* TODO: verify */,
687 .associativity = 3,
688 .line_size = 64
Marat Dukhan3c982762017-05-08 06:16:45 +0000689 };
Marat Dukhana8fb3dd2017-08-09 13:49:39 -0700690 if (midr_is_kryo_silver(midr)) {
Marat Dukhanf6254022017-06-30 18:07:11 -0700691 /* Kryo "Silver" */
692 *l2 = (struct cpuinfo_cache) {
Marat Dukhan3014efb2017-08-25 17:25:55 -0700693 .size = 512 * 1024,
Marat Dukhan54a5b4d2017-08-25 12:24:57 -0700694 .associativity = 8,
695 .line_size = 128
Marat Dukhanf6254022017-06-30 18:07:11 -0700696 };
697 } else {
698 /* Kryo "Gold" */
699 *l2 = (struct cpuinfo_cache) {
Marat Dukhan3014efb2017-08-25 17:25:55 -0700700 .size = 1024 * 1024,
Marat Dukhan54a5b4d2017-08-25 12:24:57 -0700701 .associativity = 8,
702 .line_size = 128
Marat Dukhanf6254022017-06-30 18:07:11 -0700703 };
704 }
Marat Dukhan3c982762017-05-08 06:16:45 +0000705 break;
Marat Dukhanac576322017-05-08 13:08:25 +0000706 case cpuinfo_uarch_denver:
707 /*
708 * The Denver chip includes a 128KB, 4-way level 1 instruction cache, a 64KB, 4-way level 2 data cache,
709 * and a 2MB, 16-way level 2 cache, all of which can service both cores. [1]
710 *
711 * All the caches have 64-byte lines. [2]
712 *
713 * [1] http://www.pcworld.com/article/2463900/nvidia-reveals-pc-like-performance-for-denver-tegra-k1.html
714 * [2] http://linleygroup.com/newsletters/newsletter_detail.php?num=5205&year=2014
715 */
716 *l1i = (struct cpuinfo_cache) {
717 .size = 128 * 1024,
718 .associativity = 4,
719 .line_size = 64
720 };
721 *l1d = (struct cpuinfo_cache) {
722 .size = 64 * 1024,
723 .associativity = 4,
724 .line_size = 64
725 };
726 *l2 = (struct cpuinfo_cache) {
727 .size = 2 * 1024 * 1024,
728 .associativity = 16,
729 .line_size = 64
730 };
731 break;
Marat Dukhan3c982762017-05-08 06:16:45 +0000732 case cpuinfo_uarch_mongoose:
733 /*
734 * - "Moving past branch prediction we can see some elements of how the cache is set up for the L1 I$,
735 * namely 64 KB split into four sets with 128-byte line sizes for 128 cache lines per set" [1]
736 * - "For loads and stores, a 32 KB, 8-way set associative cache with 64 byte line size is used" [1]
737 * - "The L2 cache here is 2MB shared across all cores split into 16 sets. This memory is also split
738 * into 4 banks and has a 22 cycle latency" [1]
739 *
740 * +--------------------+-------+-----------+-----------+-----------+-----------+
741 * | Processor model | Cores | L1D cache | L1I cache | L2 cache | Reference |
742 * +--------------------+-------+-----------+-----------+-----------+-----------+
743 * | Exynos 8 Octa 8890 | 4(+4) | 64K | 32K | 2M | [1] |
744 * | Exynos 8 Octa 8895 | 4(+4) | 64K | 32K | 2M | [2] |
745 * +--------------------+-------+-----------+-----------+-----------+-----------+
746 *
747 * [1] http://www.anandtech.com/show/10590/hot-chips-2016-exynos-m1-architecture-disclosed
748 * [2] https://www.extremetech.com/mobile/244949-samsungs-exynos-8895-features-custom-cpu-cores-first-10nm-chip-market
749 */
750 *l1i = (struct cpuinfo_cache) {
751 .size = 64 * 1024,
752 .associativity = 4,
753 .line_size = 128
754 };
755 *l1d = (struct cpuinfo_cache) {
756 .size = 32 * 1024,
757 .associativity = 8,
758 .line_size = 64
759 };
760 *l2 = (struct cpuinfo_cache) {
761 .size = 2 * 1024 * 1024,
762 .associativity = 16,
763 .line_size = 64
764 };
765 break;
Marat Dukhan92dae312017-05-09 14:10:17 +0000766 case cpuinfo_uarch_thunderx:
767 /*
768 * "78K-Icache and 32K-D cache per core, 16 MB shared L2 cache" [1]
769 *
770 * [1] https://www.cavium.com/pdfFiles/ThunderX_CP_PB_Rev1.pdf
771 */
772 *l1i = (struct cpuinfo_cache) {
773 .size = 78 * 1024,
774 .associativity = 4 /* assumption */,
775 .line_size = 64 /* assumption */
776 };
777 *l1d = (struct cpuinfo_cache) {
778 .size = 32 * 1024,
779 .associativity = 4 /* assumption */,
780 .line_size = 64 /* assumption */
781 };
782 *l2 = (struct cpuinfo_cache) {
783 .size = 16 * 1024 * 1024,
784 .associativity = 8 /* assumption */,
785 .line_size = 64 /* assumption */
786 };
787 break;
Marat Dukhan3c982762017-05-08 06:16:45 +0000788 case cpuinfo_uarch_cortex_a12:
789 case cpuinfo_uarch_cortex_a17:
790 case cpuinfo_uarch_cortex_a32:
791 case cpuinfo_uarch_cortex_a35:
Marat Dukhan3c982762017-05-08 06:16:45 +0000792 default:
793 cpuinfo_log_warning("target uarch not recognized; using generic cache parameters");
794 /* Follow OpenBLAS */
795 if (arch_version >= 8) {
796 *l1i = (struct cpuinfo_cache) {
797 .size = 32 * 1024,
798 .associativity = 4,
799 .line_size = 64
800 };
801 *l1d = (struct cpuinfo_cache) {
802 .size = 32 * 1024,
803 .associativity = 4,
804 .line_size = 64
805 };
806 *l2 = (struct cpuinfo_cache) {
Marat Dukhana8fb3dd2017-08-09 13:49:39 -0700807 .size = cluster_cores * 256 * 1024,
Marat Dukhan3c982762017-05-08 06:16:45 +0000808 .associativity = 8,
809 .line_size = 64
810 };
811 } else {
812 *l1i = (struct cpuinfo_cache) {
813 .size = 16 * 1024,
814 .associativity = 4,
815 .line_size = 32
816 };
817 *l1d = (struct cpuinfo_cache) {
818 .size = 16 * 1024,
819 .associativity = 4,
820 .line_size = 32
821 };
822 if (arch_version >= 7) {
823 *l2 = (struct cpuinfo_cache) {
Marat Dukhana8fb3dd2017-08-09 13:49:39 -0700824 .size = cluster_cores * 128 * 1024,
Marat Dukhan3c982762017-05-08 06:16:45 +0000825 .associativity = 8,
826 .line_size = 32
827 };
828 }
829 }
830 break;
831 }
832 l1i->sets = l1i->size / (l1i->associativity * l1i->line_size);
833 l1i->partitions = 1;
834 l1d->sets = l1d->size / (l1d->associativity * l1d->line_size);
835 l1d->partitions = 1;
836 if (l2->size != 0) {
Marat Dukhan8ecad1a2017-05-08 07:21:57 +0000837 l2->sets = l2->size / (l2->associativity * l2->line_size);
Marat Dukhan3c982762017-05-08 06:16:45 +0000838 l2->partitions = 1;
839 }
840}