blob: b4d86305db8f7a14d0002438ee9c035d7323b9c0 [file] [log] [blame]
Marat Dukhan3c982762017-05-08 06:16:45 +00001#include <stdint.h>
2
3#include <cpuinfo.h>
4#include <log.h>
5#include <arm/api.h>
6
7
8void cpuinfo_arm_decode_cache(
9 enum cpuinfo_uarch uarch,
10 uint32_t uarch_cores,
11 uint32_t cpu_part,
12 uint32_t arch_version,
13 struct cpuinfo_cache l1i[restrict static 1],
14 struct cpuinfo_cache l1d[restrict static 1],
15 struct cpuinfo_cache l2[restrict static 1])
16{
17 switch (uarch) {
18 case cpuinfo_uarch_xscale:
19 switch (cpu_part >> 8) {
20 case 2:
21 /*
22 * PXA 210/25X/26X
23 *
24 * See "Computer Organization and Design, Revised Printing: The Hardware/Software Interface"
25 * by David A. Patterson, John L. Hennessy
26 */
27 *l1i = (struct cpuinfo_cache) {
28 .size = 16 * 1024,
29 .associativity = 32,
30 .line_size = 32
31 };
32 *l1d = (struct cpuinfo_cache) {
33 .size = 16 * 1024,
34 .associativity = 4,
35 .line_size = 64
36 };
37 break;
38 case 4:
39 /* PXA 27X */
40 *l1i = (struct cpuinfo_cache) {
41 .size = 32 * 1024,
42 .associativity = 32,
43 .line_size = 32
44 };
45 *l1d = (struct cpuinfo_cache) {
46 .size = 32 * 1024,
47 .associativity = 32,
48 .line_size = 32
49 };
50 break;
51 case 6:
52 /*
53 * PXA 3XX
54 *
55 * See http://download.intel.com/design/intelxscale/31628302.pdf
56 */
57 *l1i = (struct cpuinfo_cache) {
58 .size = 32 * 1024,
59 .associativity = 4,
60 .line_size = 32
61 };
62 *l1d = (struct cpuinfo_cache) {
63 .size = 32 * 1024,
64 .associativity = 4,
65 .line_size = 32
66 };
67 *l2 = (struct cpuinfo_cache) {
68 .size = 256 * 1024,
69 .associativity = 8,
70 .line_size = 32
71 };
72 break;
73 }
74 break;
75 case cpuinfo_uarch_arm11:
76 *l1i = (struct cpuinfo_cache) {
77 .size = 16 * 1024,
78 .associativity = 4,
79 .line_size = 32
80 };
81 *l1d = (struct cpuinfo_cache) {
82 .size = 16 * 1024,
83 .associativity = 4,
84 .line_size = 32
85 };
86 break;
87 case cpuinfo_uarch_cortex_a5:
88 /*
89 * Cortex-A5 Technical Reference Manual:
90 * 7.1.1. Memory system
91 * The Cortex-A5 processor has separate instruction and data caches.
92 * The caches have the following features:
93 * - Data cache is 4-way set-associative.
94 * - Instruction cache is 2-way set-associative.
95 * - The cache line length is eight words.
96 * - You can configure the instruction and data caches independently during implementation
97 * to sizes of 4KB, 8KB, 16KB, 32KB, or 64KB.
98 * 1.1.3. System design components
99 * PrimeCell Level 2 Cache Controller (PL310)
100 * The addition of an on-chip secondary cache, also referred to as a Level 2 or L2 cache, is a
101 * recognized method of improving the performance of ARM-based systems when significant memory traffic
102 * is generated by the processor. The PrimeCell Level 2 Cache Controller reduces the number of external
103 * memory accesses and has been optimized for use with the Cortex-A5 processor.
104 * 8.1.7. Exclusive L2 cache
105 * The Cortex-A5 processor can be connected to an L2 cache that supports an exclusive cache mode.
106 * This mode must be activated both in the Cortex-A5 processor and in the L2 cache controller.
107 *
108 * +--------------------+-----------+-----------+----------+-----------+
109 * | Processor model | L1D cache | L1I cache | L2 cache | Reference |
110 * +--------------------+-----------+-----------+----------+-----------+
111 * | Qualcomm MSM7225A | | | | |
112 * | Qualcomm MSM7625A | | | | |
113 * | Qualcomm MSM7227A | | | | |
114 * | Qualcomm MSM7627A | 32K | 32K | 256K | Wiki [1] |
115 * | Qualcomm MSM7225AB | | | | |
116 * | Qualcomm MSM7225AB | | | | |
117 * | Qualcomm QSD8250 | | | | |
118 * | Qualcomm QSD8650 | | | | |
119 * +--------------------+-----------+-----------+----------+-----------+
120 * | Spreadtrum SC6821 | 32K | 32K | ? | |
121 * | Spreadtrum SC6825 | 32K | 32K | 256K | Wiki [2] |
122 * | Spreadtrum SC8810 | ? | ? | ? | |
123 * | Spreadtrum SC8825 | 32K | 32K | ? | |
124 * +--------------------+-----------+-----------+----------+-----------+
125 *
126 * [1] https://en.wikipedia.org/wiki/List_of_Qualcomm_Snapdragon_systems-on-chip#Snapdragon_S1
127 * [2] https://en.wikipedia.org/wiki/Spreadtrum
128 */
129 *l1i = (struct cpuinfo_cache) {
130 .size = 32 * 1024,
131 .associativity = 2,
132 .line_size = 32
133 };
134 *l1d = (struct cpuinfo_cache) {
135 .size = 32 * 1024,
136 .associativity = 4,
137 .line_size = 32
138 };
139 *l2 = (struct cpuinfo_cache) {
140 .size = 256 * 1024,
141 /*
142 * Follow NXP specification: "Eight-way set-associative 512 kB L2 cache with 32B line size"
143 * Reference: http://www.nxp.com/assets/documents/data/en/application-notes/AN4947.pdf
144 */
145 .associativity = 8,
146 .line_size = 32
147 };
148 break;
149 case cpuinfo_uarch_cortex_a7:
150 /*
151 * Cortex-A7 MPCore Technical Reference Manual:
152 * 6.1. About the L1 memory system
153 * The L1 memory system consists of separate instruction and data caches. You can configure the
154 * instruction and data caches independently during implementation to sizes of 8KB, 16KB, 32KB, or 64KB.
155 *
156 * The L1 instruction memory system has the following features:
157 * - Instruction side cache line length of 32-bytes.
158 * - 2-way set-associative instruction cache.
159 *
160 * The L1 data memory system has the following features:
161 * - Data side cache line length of 64-bytes.
162 * - 4-way set-associative data cache.
163 *
164 * 7.1. About the L2 Memory system
165 * The L2 memory system consists of an:
166 * - Optional tightly-coupled L2 cache that includes:
167 * - Configurable L2 cache size of 128KB, 256KB, 512KB, and 1MB.
168 *
169 * +--------------------+-------+-----------+-----------+-----------+-----------+
170 * | Processor model | Cores | L1D cache | L1I cache | L2 cache | Reference |
171 * +--------------------+-------+-----------+-----------+-----------+-----------+
172 * | Allwinner A20 | 2 | 32K | 32K | 256K | [1] |
173 * | Allwinner A23 | 2 | 32K | 32K | 256K | [2] |
174 * | Allwinner A31 | 4 | 32K | 32K | 1M | [3] |
175 * | Allwinner A31s | 4 | 32K | 32K | 1M | [4] |
176 * | Allwinner A33 | 4 | 32K | 32K | 512K | [5] |
177 * | Allwinner A80 Octa | 4(+4) | 32K | 32K | 512K(+2M) | [6] |
178 * | Allwinner A81T | 8 | 32K | 32K | 1M | [7] |
179 * +--------------------+-------+-----------+-----------+-----------+-----------+
180 * | Broadcom BCM2836 | 4 | 32K | 32K | 512K | [8] |
181 * +--------------------+-------+-----------+-----------+-----------+-----------+
182 *
183 * [1] https://linux-sunxi.org/A20
184 * [2] https://linux-sunxi.org/A23
185 * [3] http://dl.linux-sunxi.org/A31/A3x_release_document/A31/IC/A31%20datasheet%20V1.3%2020131106.pdf
186 * [4] https://github.com/allwinner-zh/documents/blob/master/A31s/A31s_Datasheet_v1.5_20150510.pdf
187 * [5] http://dl.linux-sunxi.org/A33/A33_Datasheet_release1.0.pdf
188 * [6] https://linux-sunxi.org/images/1/10/A80_Datasheet_Revision_1.0_0404.pdf
189 * [7] http://dl.linux-sunxi.org/A83T/A83T_datasheet_Revision_1.1.pdf
190 * [8] https://www.raspberrypi.org/forums/viewtopic.php?t=98428
191 */
192 *l1i = (struct cpuinfo_cache) {
193 .size = 32 * 1024,
194 .associativity = 2,
195 .line_size = 32
196 };
197 *l1d = (struct cpuinfo_cache) {
198 .size = 32 * 1024,
199 .associativity = 4,
200 .line_size = 64
201 };
202 *l2 = (struct cpuinfo_cache) {
203 .size = 128 * 1024 * uarch_cores,
204 .associativity = 8,
205 .line_size = 64
206 };
207 break;
208 case cpuinfo_uarch_cortex_a8:
209 /*
210 * Cortex-A8 Technical Reference Manual:
211 * 7.1. About the L1 memory system
212 * The L1 memory system consists of separate instruction and data caches in a Harvard arrangement.
213 * The L1 memory system provides the core with:
214 * - fixed line length of 64 bytes
215 * - support for 16KB or 32KB caches
216 * - 4-way set associative cache structure
217 * 8.1. About the L2 memory system
218 * The L2 memory system is tightly coupled to the L1 data cache and L1 instruction cache.
219 * The key features of the L2 memory system include:
220 * - configurable cache size of 0KB, 128KB, 256KB, 512KB, and 1MB
221 * - fixed line length of 64 bytes
222 * - 8-way set associative cache structure
Marat Dukhan6d996052017-05-08 11:31:57 +0000223 *
Marat Dukhan419a8192017-05-08 12:25:17 +0000224 * +----------------------+-----------+-----------+-----------+-----------+
225 * | Processor model | L1D cache | L1I cache | L2 cache | Reference |
226 * +----------------------+-----------+-----------+-----------+-----------+
227 * | Exynos 3 Single 3110 | 32K | 32K | 512K | [1] |
228 * +----------------------+-----------+-----------+-----------+-----------+
229 * | TI DM 3730 | 32K | 32K | 256K | [2] |
230 * +----------------------+-----------+-----------+-----------+-----------+
Marat Dukhan6d996052017-05-08 11:31:57 +0000231 *
Marat Dukhan419a8192017-05-08 12:25:17 +0000232 * [1] https://en.wikichip.org/w/images/0/04/Exynos_3110.pdf
233 * [2] https://www.ti.com/lit/ds/symlink/dm3725.pdf
Marat Dukhan3c982762017-05-08 06:16:45 +0000234 */
235 *l1i = (struct cpuinfo_cache) {
Marat Dukhan6d996052017-05-08 11:31:57 +0000236 .size = 32 * 1024,
Marat Dukhan3c982762017-05-08 06:16:45 +0000237 .associativity = 4,
238 .line_size = 64
239 };
240 *l1d = (struct cpuinfo_cache) {
Marat Dukhan6d996052017-05-08 11:31:57 +0000241 .size = 32 * 1024,
Marat Dukhan3c982762017-05-08 06:16:45 +0000242 .associativity = 4,
243 .line_size = 64
244 };
245 *l2 = (struct cpuinfo_cache) {
Marat Dukhan6d996052017-05-08 11:31:57 +0000246 .size = 256 * 1024,
Marat Dukhan3c982762017-05-08 06:16:45 +0000247 .associativity = 8,
248 .line_size = 64
249 };
250 break;
251 case cpuinfo_uarch_cortex_a9:
252 /*
253 * ARM Cortex‑A9 Technical Reference Manual:
254 * 7.1.1 Memory system
255 * The Cortex‑A9 processor has separate instruction and data caches.
256 * The caches have the following features:
257 * - Both caches are 4-way set-associative.
258 * - The cache line length is eight words.
259 * - You can configure the instruction and data caches independently during implementation
260 * to sizes of 16KB, 32KB, or 64KB.
261 * 8.1.5 Exclusive L2 cache
262 * The Cortex‑A9 processor can be connected to an L2 cache that supports an exclusive cache mode.
263 * This mode must be activated both in the Cortex‑A9 processor and in the L2 cache controller.
264 *
265 * +--------------------+-------+-----------+-----------+-----------+-----------+
266 * | Processor model | Cores | L1D cache | L1I cache | L2 cache | Reference |
267 * +--------------------+-------+-----------+-----------+-----------+-----------+
268 * | Exynos 4 Dual 4210 | 2 | 32K | 32K | 1M | [1] |
269 * | Exynos 4 Dual 4212 | 2 | 32K | 32K | 1M | [2] |
270 * | Exynos 4 Quad 4412 | 4 | 32K | 32K | 1M | [3] |
271 * | Exynos 4 Quad 4415 | 4 | 32K | 32K | 1M | |
272 * +--------------------+-------+-----------+-----------+-----------+-----------+
273 *
274 * [1] http://www.samsung.com/global/business/semiconductor/file/product/Exynos_4_Dual_45nm_User_Manaul_Public_REV1.00-0.pdf
275 * [2] http://www.samsung.com/global/business/semiconductor/file/product/Exynos_4_Dual_32nm_User_Manaul_Public_REV100-0.pdf
276 * [3] http://www.samsung.com/global/business/semiconductor/file/product/Exynos_4_Quad_User_Manaul_Public_REV1.00-0.pdf
277 */
278
279 /* Use Exynos 4 specs */
280 *l1i = (struct cpuinfo_cache) {
281 .size = 32 * 1024,
282 .associativity = 4,
283 .line_size = 32
284 };
285 *l1d = (struct cpuinfo_cache) {
286 .size = 32 * 1024,
287 .associativity = 4,
288 .line_size = 32
289 };
290 *l2 = (struct cpuinfo_cache) {
291 .size = 1024 * 1024,
Marat Dukhan1b5c6ba2017-05-08 10:35:27 +0000292 /* OMAP4460 in Pandaboard ES has 16-way set-associative L2 cache */
293 .associativity = 16,
Marat Dukhan3c982762017-05-08 06:16:45 +0000294 .line_size = 32
295 };
296 break;
297 case cpuinfo_uarch_cortex_a15:
298 /*
299 * 6.1. About the L1 memory system
300 * The L1 memory system consists of separate instruction and data caches.
301 * The L1 instruction memory system has the following features:
302 * - 32KB 2-way set-associative instruction cache.
303 * - Fixed line length of 64 bytes.
304 * The L1 data memory system has the following features:
305 * - 32KB 2-way set-associative data cache.
306 * - Fixed line length of 64 bytes.
307 * 7.1. About the L2 memory system
308 * The features of the L2 memory system include:
309 * - Configurable L2 cache size of 512KB, 1MB, 2MB and 4MB.
310 * - Fixed line length of 64 bytes.
311 * - 16-way set-associative cache structure.
312 *
313 * +--------------------+-------+-----------+-----------+-----------+-----------+
314 * | Processor model | Cores | L1D cache | L1I cache | L2 cache | Reference |
315 * +--------------------+-------+-----------+-----------+-----------+-----------+
316 * | Exynos 5 Dual 5250 | 2 | 32K | 32K | 1M | [1] |
317 * | Exynos 5 Hexa 5260 | 2(+4) | 32K | 32K | 1M(+512K) | [2] |
318 * | Exynos 5 Octa 5410 | 4(+4) | 32K | 32K | 2M(+512K) | [3] |
319 * | Exynos 5 Octa 5420 | 4(+4) | 32K | 32K | 2M(+512K) | [3] |
320 * | Exynos 5 Octa 5422 | 4(+4) | 32K | 32K | 2M(+512K) | [3] |
321 * | Exynos 5 Octa 5430 | 4(+4) | 32K | 32K | 2M(+512K) | [3] |
322 * | Exynos 5 Octa 5800 | 4(+4) | 32K | 32K | 2M(+512K) | [3] |
323 * +--------------------+-------+-----------+-----------+-----------+-----------+
324 *
325 * [1] http://www.arndaleboard.org/wiki/downloads/supports/Exynos_5_Dual_User_Manaul_Public_REV1.00.pdf
326 * [2] http://www.yicsystem.com/wp-content/uploads/2014/08/Espresso5260P-Guide-Book.pdf
327 * [3] http://www.anandtech.com/show/6768/samsung-details-exynos-5-octa-architecture-power-at-isscc-13
328 */
329 *l1i = (struct cpuinfo_cache) {
330 .size = 32 * 1024,
331 .associativity = 2,
332 .line_size = 64
333 };
334 *l1d = (struct cpuinfo_cache) {
335 .size = 32 * 1024,
336 .associativity = 2,
337 .line_size = 64
338 };
339 *l2 = (struct cpuinfo_cache) {
340 .size = uarch_cores * 512 * 1024,
341 .associativity = 16,
342 .line_size = 64
343 };
344 break;
Marat Dukhanee705c72017-05-08 10:18:03 +0000345 case cpuinfo_uarch_cortex_a53:
346 /*
347 * ARM Cortex-A53 MPCore Processor Technical Reference Manual:
348 * 6.1. About the L1 memory system
Marat Dukhanc3035ec2017-05-08 11:08:49 +0000349 * The L1 memory system consists of separate instruction and data caches. The implementer configures the
Marat Dukhanee705c72017-05-08 10:18:03 +0000350 * instruction and data caches independently during implementation, to sizes of 8KB, 16KB, 32KB, or 64KB.
351 *
352 * The L1 Instruction memory system has the following key features:
353 * - Instruction side cache line length of 64 bytes.
354 * - 2-way set associative L1 Instruction cache.
355 *
356 * The L1 Data memory system has the following features:
357 * - Data side cache line length of 64 bytes.
358 * - 4-way set associative L1 Data cache.
359 *
360 * 7.1. About the L2 memory system
361 * The L2 memory system consists of an:
362 * - Optional tightly-coupled L2 cache that includes:
363 * - Configurable L2 cache size of 128KB, 256KB, 512KB, 1MB and 2MB.
364 * - Fixed line length of 64 bytes.
365 * - 16-way set-associative cache structure.
366 *
367 * +--------------------+-------+-----------+-----------+-----------+-----------+
368 * | Processor model | Cores | L1D cache | L1I cache | L2 cache | Reference |
369 * +--------------------+-------+-----------+-----------+-----------+-----------+
370 * | Broadcom BCM2837 | 4 | 16K | 16K | 512K | [1] |
Marat Dukhan4780ba52017-06-30 18:43:01 -0700371 * | Snapdragon 835 | 4(+4) | 32K+64K | 32K+64K | 1M(+2M) | sysfs |
Marat Dukhanee705c72017-05-08 10:18:03 +0000372 * +--------------------+-------+-----------+-----------+-----------+-----------+
373 *
374 * [1] https://www.raspberrypi.org/forums/viewtopic.php?f=91&t=145766
375 */
Marat Dukhanf6254022017-06-30 18:07:11 -0700376 if (cpu_part == 0x800) {
377 /* Little cores of Snapdragon 835 */
378 *l1i = (struct cpuinfo_cache) {
379 .size = 32 * 1024,
380 .associativity = 2,
381 .line_size = 64
382 };
383 *l1d = (struct cpuinfo_cache) {
384 .size = 32 * 1024,
385 .associativity = 4,
386 .line_size = 64
387 };
388 *l2 = (struct cpuinfo_cache) {
389 .size = uarch_cores * 256 * 1024,
390 .associativity = 16,
391 .line_size = 64
392 };
393 } else {
394 /* Standard Cortex-A53 */
395 *l1i = (struct cpuinfo_cache) {
396 .size = 16 * 1024,
397 .associativity = 2,
398 .line_size = 64
399 };
400 *l1d = (struct cpuinfo_cache) {
401 .size = 16 * 1024,
402 .associativity = 4,
403 .line_size = 64
404 };
405 *l2 = (struct cpuinfo_cache) {
406 .size = uarch_cores * 128 * 1024,
407 .associativity = 16,
408 .line_size = 64
409 };
410 }
Marat Dukhanee705c72017-05-08 10:18:03 +0000411 break;
Marat Dukhanc3035ec2017-05-08 11:08:49 +0000412 case cpuinfo_uarch_cortex_a57:
413 /*
414 * ARM Cortex-A57 MPCore Processor Technical Reference Manual:
415 * 6.1. About the L1 memory system
416 * The L1 memory system consists of separate instruction and data caches.
417 *
418 * The L1 instruction memory system has the following features:
419 * - 48KB 3-way set-associative instruction cache.
420 * - Fixed line length of 64 bytes.
421 *
422 * The L1 data memory system has the following features:
423 * - 32KB 2-way set-associative data cache.
424 * - Fixed line length of 64 bytes.
425 *
426 * 7.1 About the L2 memory system
427 * The features of the L2 memory system include:
428 * - Configurable L2 cache size of 512KB, 1MB, and 2MB.
429 * - Fixed line length of 64 bytes.
430 * - 16-way set-associative cache structure.
431 * - Inclusion property with L1 data caches.
432 *
433 * +--------------------+-------+-----------+-----------+-----------+-----------+
434 * | Processor model | Cores | L1D cache | L1I cache | L2 cache | Reference |
435 * +--------------------+-------+-----------+-----------+-----------+-----------+
436 * | Jetson TX1 | 4 | 32K | 48K | 2M | [1] |
437 * +--------------------+-------+-----------+-----------+-----------+-----------+
438 *
439 * [1] https://devblogs.nvidia.com/parallelforall/jetson-tx2-delivers-twice-intelligence-edge/
440 */
441 *l1i = (struct cpuinfo_cache) {
442 .size = 48 * 1024,
443 .associativity = 3,
444 .line_size = 64
445 };
446 *l1d = (struct cpuinfo_cache) {
447 .size = 32 * 1024,
448 .associativity = 2,
449 .line_size = 64
450 };
451 *l2 = (struct cpuinfo_cache) {
452 .size = uarch_cores * 512 * 1024,
453 .associativity = 16,
454 .line_size = 64,
455 .flags = CPUINFO_CACHE_INCLUSIVE
456 };
457 break;
Marat Dukhan4780ba52017-06-30 18:43:01 -0700458 case cpuinfo_uarch_cortex_a73:
459 /*
460 * ARM Cortex‑A73 MPCore Processor Technical Reference Manual
461 * 6.1. About the L1 memory system
462 * The L1 memory system consists of separate instruction and data caches.
463 * The size of the instruction cache is 64KB.
464 * The size of the data cache is configurable to either 32KB or 64KB.
465 *
466 * The L1 instruction memory system has the following key features:
467 * - Virtually Indexed, Physically Tagged (VIPT), four-way set-associative instruction cache.
468 * - Fixed cache line length of 64 bytes.
469 *
470 * The L1 data memory system has the following features:
471 * - ...the data cache behaves like an eight-way set associative PIPT cache (for 32KB configurations)
472 * and a 16-way set associative PIPT cache (for 64KB configurations).
473 * - Fixed cache line length of 64 bytes.
474 *
475 * 7.1 About the L2 memory system
476 * The L2 memory system consists of:
477 * - A tightly-integrated L2 cache with:
478 * - A configurable size of 256KB, 512KB, 1MB, 2MB, 4MB, or 8MB.
479 * - A 16-way, set-associative structure.
480 * - A fixed line length of 64 bytes.
481 *
482 * The ARM Cortex A73 - Artemis Unveiled [1]
483 * "ARM still envisions that most vendors will choose to use configurations of 1 to
484 * 2MB in consumer products. The L2 cache is inclusive of the L1 cache. "
485 *
486 * +---------------------+---------+-----------+-----------+-----------+-----------+
487 * | Processor model | Cores | L1D cache | L1I cache | L2 cache | Reference |
488 * +---------------------+---------+-----------+-----------+-----------+-----------+
489 * | HiSilicon Kirin 960 | 4(+4) | 64K+32K | 64K+32K | ? | [2] |
490 * | MediaTek Helio X30 | 2(+4+4) | ? | ? | ? | |
491 * | Snapdragon 835 | 4(+4) | 64K+32K | 64K+32K | 2M(+1M) | sysfs |
492 * +---------------------+---------+-----------+-----------+-----------+-----------+
493 *
494 * [1] http://www.anandtech.com/show/10347/arm-cortex-a73-artemis-unveiled/2
495 * [2] http://www.anandtech.com/show/11088/hisilicon-kirin-960-performance-and-power/3
496 */
497 *l1i = (struct cpuinfo_cache) {
498 .size = 64 * 1024,
499 .associativity = 4,
500 .line_size = 64
501 };
502 *l1d = (struct cpuinfo_cache) {
503 .size = 64 * 1024,
504 .associativity = 16,
505 .line_size = 64
506 };
507 *l2 = (struct cpuinfo_cache) {
508 .size = uarch_cores * 512 * 1024,
509 .associativity = 16,
510 .line_size = 64,
511 .flags = CPUINFO_CACHE_INCLUSIVE
512 };
513 break;
Marat Dukhan3c982762017-05-08 06:16:45 +0000514 case cpuinfo_uarch_scorpion:
515 /*
516 * - "The CPU includes 32KB instruction and data caches as
517 * well as a complete memory-management unit (MMU) suitable
518 * for high-level operating systems. The CPU also has
519 * 256KB of SRAM that can be allocated in 64KB increments
520 * to level-two (L2) cache or tightly coupled memory (TCM)." [1]
521 * We interpret it as L2 cache being 4-way set-associative on single-core Scorpion.
522 * - L1 Data Cache = 32 KB. 32 B/line. [2]
523 * - L2 Cache = 256 KB. 128 B/line. [2]
524 * - 256 KB (single-core) or 512 KB (dual-core) L2 cache [3]
525 * - Single or dual-core configuration [3]
526 * - For L1 cache assume the same associativity as Krait
527 *
528 * [1] https://www.qualcomm.com/media/documents/files/linley-report-on-dual-core-snapdragon.pdf
529 * [2] http://www.7-cpu.com/cpu/Snapdragon.html
530 * [3] https://en.wikipedia.org/wiki/Scorpion_(CPU)
531 */
532 *l1i = (struct cpuinfo_cache) {
533 .size = 32 * 1024,
534 .associativity = 4,
535 .line_size = 32
536 };
537 *l1d = (struct cpuinfo_cache) {
538 .size = 32 * 1024,
539 .associativity = 4,
540 .line_size = 32
541 };
542 *l2 = (struct cpuinfo_cache) {
543 .size = uarch_cores * 256 * 1024,
544 .associativity = 4,
545 .line_size = 128
546 };
547 break;
548 case cpuinfo_uarch_krait:
549 /*
550 * - L0 Data cache = 4 KB. 64 B/line, direct mapped [1]
551 * - L0 Instruction cache = 4 KB. [1]
552 * - L1 Data cache = 16 KB. 64 B/line, 4-way [1]
553 * - L1 Instruction cache = 16 KB, 4-way [1]
554 * - L2 Cache = 1 MB, 128 B/line, 8-way. Each core has fast access only to 512 KB of L2 cache. [1]
555 * - L2 = 1MB (dual core) or 2MB (quad core), 8-way set associative [2]
556 *
557 * [1] http://www.7-cpu.com/cpu/Krait.html
558 * [2] http://www.anandtech.com/show/4940/qualcomm-new-snapdragon-s4-msm8960-krait-architecture/2
559 */
560 *l1i = (struct cpuinfo_cache) {
561 .size = 16 * 1024,
562 .associativity = 4,
563 .line_size = 64 /* assume same as L1D */
564 };
565 *l1d = (struct cpuinfo_cache) {
566 .size = 16 * 1024,
567 .associativity = 4,
568 .line_size = 64
569 };
570 *l2 = (struct cpuinfo_cache) {
571 .size = uarch_cores * 512 * 1024,
572 .associativity = 8,
573 .line_size = 128
574 };
575 break;
576 case cpuinfo_uarch_kryo:
577 /*
578 * +-----------------+-------+-----------+-----------+-----------+-----------+
579 * | Processor model | Cores | L1D cache | L1I cache | L2 cache | Reference |
580 * +-----------------+-------+-----------+-----------+-----------+-----------+
581 * | Snapdragon 820 | 2+2 | 32K | 32K | 1M+512K | [1] |
582 * | Snapdragon 821 | 2+2 | 32K | 32K | 1M+512K | [1] |
Marat Dukhan3c982762017-05-08 06:16:45 +0000583 * +-----------------+-------+-----------+-----------+-----------+-----------+
584 *
585 * [1] http://www.anandtech.com/show/9837/snapdragon-820-preview/2
586 */
587 *l1i = (struct cpuinfo_cache) {
588 .size = 32 * 1024,
589 .associativity = 4 /* assume same as Krait */,
590 .line_size = 64 /* assume same as Krait */
591 };
592 *l1d = (struct cpuinfo_cache) {
593 .size = 32 * 1024,
594 .associativity = 4 /* assume same as Krait */,
595 .line_size = 64 /* assume same as Krait */
596 };
Marat Dukhanf6254022017-06-30 18:07:11 -0700597 if (cpu_part == 0x205) {
598 /* Kryo "Silver" */
599 *l2 = (struct cpuinfo_cache) {
600 .size = uarch_cores * 256 * 1024,
601 .associativity = 8 /* assume same as Krait */
602 .line_size = 64 /* assume same as Krait */
603 };
604 } else {
605 /* Kryo "Gold" */
606 *l2 = (struct cpuinfo_cache) {
607 .size = uarch_cores * 512 * 1024,
608 .associativity = 8 /* assume same as Krait */
609 .line_size = 64 /* assume same as Krait */
610 };
611 }
Marat Dukhan3c982762017-05-08 06:16:45 +0000612 break;
Marat Dukhanac576322017-05-08 13:08:25 +0000613 case cpuinfo_uarch_denver:
614 /*
615 * The Denver chip includes a 128KB, 4-way level 1 instruction cache, a 64KB, 4-way level 2 data cache,
616 * and a 2MB, 16-way level 2 cache, all of which can service both cores. [1]
617 *
618 * All the caches have 64-byte lines. [2]
619 *
620 * [1] http://www.pcworld.com/article/2463900/nvidia-reveals-pc-like-performance-for-denver-tegra-k1.html
621 * [2] http://linleygroup.com/newsletters/newsletter_detail.php?num=5205&year=2014
622 */
623 *l1i = (struct cpuinfo_cache) {
624 .size = 128 * 1024,
625 .associativity = 4,
626 .line_size = 64
627 };
628 *l1d = (struct cpuinfo_cache) {
629 .size = 64 * 1024,
630 .associativity = 4,
631 .line_size = 64
632 };
633 *l2 = (struct cpuinfo_cache) {
634 .size = 2 * 1024 * 1024,
635 .associativity = 16,
636 .line_size = 64
637 };
638 break;
Marat Dukhan3c982762017-05-08 06:16:45 +0000639 case cpuinfo_uarch_mongoose:
640 /*
641 * - "Moving past branch prediction we can see some elements of how the cache is set up for the L1 I$,
642 * namely 64 KB split into four sets with 128-byte line sizes for 128 cache lines per set" [1]
643 * - "For loads and stores, a 32 KB, 8-way set associative cache with 64 byte line size is used" [1]
644 * - "The L2 cache here is 2MB shared across all cores split into 16 sets. This memory is also split
645 * into 4 banks and has a 22 cycle latency" [1]
646 *
647 * +--------------------+-------+-----------+-----------+-----------+-----------+
648 * | Processor model | Cores | L1D cache | L1I cache | L2 cache | Reference |
649 * +--------------------+-------+-----------+-----------+-----------+-----------+
650 * | Exynos 8 Octa 8890 | 4(+4) | 64K | 32K | 2M | [1] |
651 * | Exynos 8 Octa 8895 | 4(+4) | 64K | 32K | 2M | [2] |
652 * +--------------------+-------+-----------+-----------+-----------+-----------+
653 *
654 * [1] http://www.anandtech.com/show/10590/hot-chips-2016-exynos-m1-architecture-disclosed
655 * [2] https://www.extremetech.com/mobile/244949-samsungs-exynos-8895-features-custom-cpu-cores-first-10nm-chip-market
656 */
657 *l1i = (struct cpuinfo_cache) {
658 .size = 64 * 1024,
659 .associativity = 4,
660 .line_size = 128
661 };
662 *l1d = (struct cpuinfo_cache) {
663 .size = 32 * 1024,
664 .associativity = 8,
665 .line_size = 64
666 };
667 *l2 = (struct cpuinfo_cache) {
668 .size = 2 * 1024 * 1024,
669 .associativity = 16,
670 .line_size = 64
671 };
672 break;
Marat Dukhan92dae312017-05-09 14:10:17 +0000673 case cpuinfo_uarch_thunderx:
674 /*
675 * "78K-Icache and 32K-D cache per core, 16 MB shared L2 cache" [1]
676 *
677 * [1] https://www.cavium.com/pdfFiles/ThunderX_CP_PB_Rev1.pdf
678 */
679 *l1i = (struct cpuinfo_cache) {
680 .size = 78 * 1024,
681 .associativity = 4 /* assumption */,
682 .line_size = 64 /* assumption */
683 };
684 *l1d = (struct cpuinfo_cache) {
685 .size = 32 * 1024,
686 .associativity = 4 /* assumption */,
687 .line_size = 64 /* assumption */
688 };
689 *l2 = (struct cpuinfo_cache) {
690 .size = 16 * 1024 * 1024,
691 .associativity = 8 /* assumption */,
692 .line_size = 64 /* assumption */
693 };
694 break;
Marat Dukhan3c982762017-05-08 06:16:45 +0000695 case cpuinfo_uarch_cortex_a12:
696 case cpuinfo_uarch_cortex_a17:
697 case cpuinfo_uarch_cortex_a32:
698 case cpuinfo_uarch_cortex_a35:
Marat Dukhan3c982762017-05-08 06:16:45 +0000699 case cpuinfo_uarch_cortex_a72:
700 case cpuinfo_uarch_cortex_a73:
701 default:
702 cpuinfo_log_warning("target uarch not recognized; using generic cache parameters");
703 /* Follow OpenBLAS */
704 if (arch_version >= 8) {
705 *l1i = (struct cpuinfo_cache) {
706 .size = 32 * 1024,
707 .associativity = 4,
708 .line_size = 64
709 };
710 *l1d = (struct cpuinfo_cache) {
711 .size = 32 * 1024,
712 .associativity = 4,
713 .line_size = 64
714 };
715 *l2 = (struct cpuinfo_cache) {
716 .size = uarch_cores * 256 * 1024,
717 .associativity = 8,
718 .line_size = 64
719 };
720 } else {
721 *l1i = (struct cpuinfo_cache) {
722 .size = 16 * 1024,
723 .associativity = 4,
724 .line_size = 32
725 };
726 *l1d = (struct cpuinfo_cache) {
727 .size = 16 * 1024,
728 .associativity = 4,
729 .line_size = 32
730 };
731 if (arch_version >= 7) {
732 *l2 = (struct cpuinfo_cache) {
733 .size = uarch_cores * 128 * 1024,
734 .associativity = 8,
735 .line_size = 32
736 };
737 }
738 }
739 break;
740 }
741 l1i->sets = l1i->size / (l1i->associativity * l1i->line_size);
742 l1i->partitions = 1;
743 l1d->sets = l1d->size / (l1d->associativity * l1d->line_size);
744 l1d->partitions = 1;
745 if (l2->size != 0) {
Marat Dukhan8ecad1a2017-05-08 07:21:57 +0000746 l2->sets = l2->size / (l2->associativity * l2->line_size);
Marat Dukhan3c982762017-05-08 06:16:45 +0000747 l2->partitions = 1;
748 }
749}