Marat Dukhan | 3c98276 | 2017-05-08 06:16:45 +0000 | [diff] [blame] | 1 | #include <stdint.h> |
| 2 | |
| 3 | #include <cpuinfo.h> |
| 4 | #include <log.h> |
| 5 | #include <arm/api.h> |
Marat Dukhan | a8fb3dd | 2017-08-09 13:49:39 -0700 | [diff] [blame] | 6 | #include <arm/midr.h> |
Marat Dukhan | 3c98276 | 2017-05-08 06:16:45 +0000 | [diff] [blame] | 7 | |
Marat Dukhan | 3c98276 | 2017-05-08 06:16:45 +0000 | [diff] [blame] | 8 | void cpuinfo_arm_decode_cache( |
| 9 | enum cpuinfo_uarch uarch, |
Marat Dukhan | a8fb3dd | 2017-08-09 13:49:39 -0700 | [diff] [blame] | 10 | uint32_t cluster_cores, |
| 11 | uint32_t midr, |
Marat Dukhan | 54a5b4d | 2017-08-25 12:24:57 -0700 | [diff] [blame] | 12 | const struct cpuinfo_arm_chipset chipset[restrict static 1], |
Marat Dukhan | 8e6b17a | 2017-09-13 11:50:03 -0700 | [diff] [blame] | 13 | uint32_t cluster_id, |
Marat Dukhan | 3c98276 | 2017-05-08 06:16:45 +0000 | [diff] [blame] | 14 | uint32_t arch_version, |
| 15 | struct cpuinfo_cache l1i[restrict static 1], |
| 16 | struct cpuinfo_cache l1d[restrict static 1], |
| 17 | struct cpuinfo_cache l2[restrict static 1]) |
| 18 | { |
| 19 | switch (uarch) { |
| 20 | case cpuinfo_uarch_xscale: |
Marat Dukhan | a8fb3dd | 2017-08-09 13:49:39 -0700 | [diff] [blame] | 21 | switch (midr_get_part(midr) >> 8) { |
Marat Dukhan | 3c98276 | 2017-05-08 06:16:45 +0000 | [diff] [blame] | 22 | case 2: |
| 23 | /* |
| 24 | * PXA 210/25X/26X |
| 25 | * |
| 26 | * See "Computer Organization and Design, Revised Printing: The Hardware/Software Interface" |
| 27 | * by David A. Patterson, John L. Hennessy |
| 28 | */ |
| 29 | *l1i = (struct cpuinfo_cache) { |
| 30 | .size = 16 * 1024, |
| 31 | .associativity = 32, |
| 32 | .line_size = 32 |
| 33 | }; |
| 34 | *l1d = (struct cpuinfo_cache) { |
| 35 | .size = 16 * 1024, |
| 36 | .associativity = 4, |
| 37 | .line_size = 64 |
| 38 | }; |
| 39 | break; |
| 40 | case 4: |
| 41 | /* PXA 27X */ |
| 42 | *l1i = (struct cpuinfo_cache) { |
| 43 | .size = 32 * 1024, |
| 44 | .associativity = 32, |
| 45 | .line_size = 32 |
| 46 | }; |
| 47 | *l1d = (struct cpuinfo_cache) { |
| 48 | .size = 32 * 1024, |
| 49 | .associativity = 32, |
| 50 | .line_size = 32 |
| 51 | }; |
| 52 | break; |
| 53 | case 6: |
| 54 | /* |
| 55 | * PXA 3XX |
| 56 | * |
| 57 | * See http://download.intel.com/design/intelxscale/31628302.pdf |
| 58 | */ |
| 59 | *l1i = (struct cpuinfo_cache) { |
| 60 | .size = 32 * 1024, |
| 61 | .associativity = 4, |
| 62 | .line_size = 32 |
| 63 | }; |
| 64 | *l1d = (struct cpuinfo_cache) { |
| 65 | .size = 32 * 1024, |
| 66 | .associativity = 4, |
| 67 | .line_size = 32 |
| 68 | }; |
| 69 | *l2 = (struct cpuinfo_cache) { |
| 70 | .size = 256 * 1024, |
| 71 | .associativity = 8, |
| 72 | .line_size = 32 |
| 73 | }; |
| 74 | break; |
| 75 | } |
| 76 | break; |
| 77 | case cpuinfo_uarch_arm11: |
| 78 | *l1i = (struct cpuinfo_cache) { |
| 79 | .size = 16 * 1024, |
| 80 | .associativity = 4, |
| 81 | .line_size = 32 |
| 82 | }; |
| 83 | *l1d = (struct cpuinfo_cache) { |
| 84 | .size = 16 * 1024, |
| 85 | .associativity = 4, |
| 86 | .line_size = 32 |
| 87 | }; |
| 88 | break; |
| 89 | case cpuinfo_uarch_cortex_a5: |
| 90 | /* |
| 91 | * Cortex-A5 Technical Reference Manual: |
| 92 | * 7.1.1. Memory system |
| 93 | * The Cortex-A5 processor has separate instruction and data caches. |
| 94 | * The caches have the following features: |
| 95 | * - Data cache is 4-way set-associative. |
| 96 | * - Instruction cache is 2-way set-associative. |
| 97 | * - The cache line length is eight words. |
| 98 | * - You can configure the instruction and data caches independently during implementation |
| 99 | * to sizes of 4KB, 8KB, 16KB, 32KB, or 64KB. |
| 100 | * 1.1.3. System design components |
| 101 | * PrimeCell Level 2 Cache Controller (PL310) |
| 102 | * The addition of an on-chip secondary cache, also referred to as a Level 2 or L2 cache, is a |
| 103 | * recognized method of improving the performance of ARM-based systems when significant memory traffic |
| 104 | * is generated by the processor. The PrimeCell Level 2 Cache Controller reduces the number of external |
| 105 | * memory accesses and has been optimized for use with the Cortex-A5 processor. |
| 106 | * 8.1.7. Exclusive L2 cache |
| 107 | * The Cortex-A5 processor can be connected to an L2 cache that supports an exclusive cache mode. |
| 108 | * This mode must be activated both in the Cortex-A5 processor and in the L2 cache controller. |
| 109 | * |
| 110 | * +--------------------+-----------+-----------+----------+-----------+ |
| 111 | * | Processor model | L1D cache | L1I cache | L2 cache | Reference | |
| 112 | * +--------------------+-----------+-----------+----------+-----------+ |
| 113 | * | Qualcomm MSM7225A | | | | | |
| 114 | * | Qualcomm MSM7625A | | | | | |
| 115 | * | Qualcomm MSM7227A | | | | | |
| 116 | * | Qualcomm MSM7627A | 32K | 32K | 256K | Wiki [1] | |
| 117 | * | Qualcomm MSM7225AB | | | | | |
| 118 | * | Qualcomm MSM7225AB | | | | | |
| 119 | * | Qualcomm QSD8250 | | | | | |
| 120 | * | Qualcomm QSD8650 | | | | | |
| 121 | * +--------------------+-----------+-----------+----------+-----------+ |
| 122 | * | Spreadtrum SC6821 | 32K | 32K | ? | | |
| 123 | * | Spreadtrum SC6825 | 32K | 32K | 256K | Wiki [2] | |
| 124 | * | Spreadtrum SC8810 | ? | ? | ? | | |
| 125 | * | Spreadtrum SC8825 | 32K | 32K | ? | | |
| 126 | * +--------------------+-----------+-----------+----------+-----------+ |
| 127 | * |
| 128 | * [1] https://en.wikipedia.org/wiki/List_of_Qualcomm_Snapdragon_systems-on-chip#Snapdragon_S1 |
| 129 | * [2] https://en.wikipedia.org/wiki/Spreadtrum |
| 130 | */ |
| 131 | *l1i = (struct cpuinfo_cache) { |
| 132 | .size = 32 * 1024, |
| 133 | .associativity = 2, |
| 134 | .line_size = 32 |
| 135 | }; |
| 136 | *l1d = (struct cpuinfo_cache) { |
| 137 | .size = 32 * 1024, |
| 138 | .associativity = 4, |
| 139 | .line_size = 32 |
| 140 | }; |
| 141 | *l2 = (struct cpuinfo_cache) { |
| 142 | .size = 256 * 1024, |
| 143 | /* |
| 144 | * Follow NXP specification: "Eight-way set-associative 512 kB L2 cache with 32B line size" |
| 145 | * Reference: http://www.nxp.com/assets/documents/data/en/application-notes/AN4947.pdf |
| 146 | */ |
| 147 | .associativity = 8, |
| 148 | .line_size = 32 |
| 149 | }; |
| 150 | break; |
| 151 | case cpuinfo_uarch_cortex_a7: |
| 152 | /* |
| 153 | * Cortex-A7 MPCore Technical Reference Manual: |
| 154 | * 6.1. About the L1 memory system |
| 155 | * The L1 memory system consists of separate instruction and data caches. You can configure the |
| 156 | * instruction and data caches independently during implementation to sizes of 8KB, 16KB, 32KB, or 64KB. |
| 157 | * |
| 158 | * The L1 instruction memory system has the following features: |
| 159 | * - Instruction side cache line length of 32-bytes. |
| 160 | * - 2-way set-associative instruction cache. |
| 161 | * |
| 162 | * The L1 data memory system has the following features: |
| 163 | * - Data side cache line length of 64-bytes. |
| 164 | * - 4-way set-associative data cache. |
| 165 | * |
| 166 | * 7.1. About the L2 Memory system |
| 167 | * The L2 memory system consists of an: |
| 168 | * - Optional tightly-coupled L2 cache that includes: |
| 169 | * - Configurable L2 cache size of 128KB, 256KB, 512KB, and 1MB. |
Marat Dukhan | 36b969e | 2017-09-10 21:54:32 -0700 | [diff] [blame] | 170 | * - Fixed line length of 64 bytes |
| 171 | * - 8-way set-associative cache structure |
Marat Dukhan | 3c98276 | 2017-05-08 06:16:45 +0000 | [diff] [blame] | 172 | * |
| 173 | * +--------------------+-------+-----------+-----------+-----------+-----------+ |
| 174 | * | Processor model | Cores | L1D cache | L1I cache | L2 cache | Reference | |
| 175 | * +--------------------+-------+-----------+-----------+-----------+-----------+ |
| 176 | * | Allwinner A20 | 2 | 32K | 32K | 256K | [1] | |
| 177 | * | Allwinner A23 | 2 | 32K | 32K | 256K | [2] | |
| 178 | * | Allwinner A31 | 4 | 32K | 32K | 1M | [3] | |
| 179 | * | Allwinner A31s | 4 | 32K | 32K | 1M | [4] | |
| 180 | * | Allwinner A33 | 4 | 32K | 32K | 512K | [5] | |
| 181 | * | Allwinner A80 Octa | 4(+4) | 32K | 32K | 512K(+2M) | [6] | |
| 182 | * | Allwinner A81T | 8 | 32K | 32K | 1M | [7] | |
| 183 | * +--------------------+-------+-----------+-----------+-----------+-----------+ |
| 184 | * | Broadcom BCM2836 | 4 | 32K | 32K | 512K | [8] | |
| 185 | * +--------------------+-------+-----------+-----------+-----------+-----------+ |
| 186 | * |
| 187 | * [1] https://linux-sunxi.org/A20 |
| 188 | * [2] https://linux-sunxi.org/A23 |
| 189 | * [3] http://dl.linux-sunxi.org/A31/A3x_release_document/A31/IC/A31%20datasheet%20V1.3%2020131106.pdf |
| 190 | * [4] https://github.com/allwinner-zh/documents/blob/master/A31s/A31s_Datasheet_v1.5_20150510.pdf |
| 191 | * [5] http://dl.linux-sunxi.org/A33/A33_Datasheet_release1.0.pdf |
| 192 | * [6] https://linux-sunxi.org/images/1/10/A80_Datasheet_Revision_1.0_0404.pdf |
| 193 | * [7] http://dl.linux-sunxi.org/A83T/A83T_datasheet_Revision_1.1.pdf |
| 194 | * [8] https://www.raspberrypi.org/forums/viewtopic.php?t=98428 |
| 195 | */ |
| 196 | *l1i = (struct cpuinfo_cache) { |
| 197 | .size = 32 * 1024, |
| 198 | .associativity = 2, |
| 199 | .line_size = 32 |
| 200 | }; |
| 201 | *l1d = (struct cpuinfo_cache) { |
| 202 | .size = 32 * 1024, |
| 203 | .associativity = 4, |
| 204 | .line_size = 64 |
| 205 | }; |
| 206 | *l2 = (struct cpuinfo_cache) { |
Marat Dukhan | a8fb3dd | 2017-08-09 13:49:39 -0700 | [diff] [blame] | 207 | .size = 128 * 1024 * cluster_cores, |
Marat Dukhan | 3c98276 | 2017-05-08 06:16:45 +0000 | [diff] [blame] | 208 | .associativity = 8, |
| 209 | .line_size = 64 |
| 210 | }; |
| 211 | break; |
| 212 | case cpuinfo_uarch_cortex_a8: |
| 213 | /* |
| 214 | * Cortex-A8 Technical Reference Manual: |
| 215 | * 7.1. About the L1 memory system |
| 216 | * The L1 memory system consists of separate instruction and data caches in a Harvard arrangement. |
| 217 | * The L1 memory system provides the core with: |
| 218 | * - fixed line length of 64 bytes |
| 219 | * - support for 16KB or 32KB caches |
| 220 | * - 4-way set associative cache structure |
| 221 | * 8.1. About the L2 memory system |
| 222 | * The L2 memory system is tightly coupled to the L1 data cache and L1 instruction cache. |
| 223 | * The key features of the L2 memory system include: |
| 224 | * - configurable cache size of 0KB, 128KB, 256KB, 512KB, and 1MB |
| 225 | * - fixed line length of 64 bytes |
| 226 | * - 8-way set associative cache structure |
Marat Dukhan | 6d99605 | 2017-05-08 11:31:57 +0000 | [diff] [blame] | 227 | * |
Marat Dukhan | 419a819 | 2017-05-08 12:25:17 +0000 | [diff] [blame] | 228 | * +----------------------+-----------+-----------+-----------+-----------+ |
| 229 | * | Processor model | L1D cache | L1I cache | L2 cache | Reference | |
| 230 | * +----------------------+-----------+-----------+-----------+-----------+ |
| 231 | * | Exynos 3 Single 3110 | 32K | 32K | 512K | [1] | |
| 232 | * +----------------------+-----------+-----------+-----------+-----------+ |
| 233 | * | TI DM 3730 | 32K | 32K | 256K | [2] | |
| 234 | * +----------------------+-----------+-----------+-----------+-----------+ |
Marat Dukhan | 6d99605 | 2017-05-08 11:31:57 +0000 | [diff] [blame] | 235 | * |
Marat Dukhan | 419a819 | 2017-05-08 12:25:17 +0000 | [diff] [blame] | 236 | * [1] https://en.wikichip.org/w/images/0/04/Exynos_3110.pdf |
| 237 | * [2] https://www.ti.com/lit/ds/symlink/dm3725.pdf |
Marat Dukhan | 3c98276 | 2017-05-08 06:16:45 +0000 | [diff] [blame] | 238 | */ |
| 239 | *l1i = (struct cpuinfo_cache) { |
Marat Dukhan | 6d99605 | 2017-05-08 11:31:57 +0000 | [diff] [blame] | 240 | .size = 32 * 1024, |
Marat Dukhan | 3c98276 | 2017-05-08 06:16:45 +0000 | [diff] [blame] | 241 | .associativity = 4, |
| 242 | .line_size = 64 |
| 243 | }; |
| 244 | *l1d = (struct cpuinfo_cache) { |
Marat Dukhan | 6d99605 | 2017-05-08 11:31:57 +0000 | [diff] [blame] | 245 | .size = 32 * 1024, |
Marat Dukhan | 3c98276 | 2017-05-08 06:16:45 +0000 | [diff] [blame] | 246 | .associativity = 4, |
| 247 | .line_size = 64 |
| 248 | }; |
| 249 | *l2 = (struct cpuinfo_cache) { |
Marat Dukhan | 3c98276 | 2017-05-08 06:16:45 +0000 | [diff] [blame] | 250 | .associativity = 8, |
| 251 | .line_size = 64 |
| 252 | }; |
Marat Dukhan | 8e6b17a | 2017-09-13 11:50:03 -0700 | [diff] [blame] | 253 | switch (chipset->vendor) { |
| 254 | case cpuinfo_arm_chipset_vendor_samsung: |
| 255 | l2->size = 512 * 1024; |
| 256 | break; |
| 257 | default: |
| 258 | l2->size = 256 * 1024; |
| 259 | break; |
| 260 | } |
| 261 | |
Marat Dukhan | 3c98276 | 2017-05-08 06:16:45 +0000 | [diff] [blame] | 262 | break; |
| 263 | case cpuinfo_uarch_cortex_a9: |
| 264 | /* |
| 265 | * ARM Cortex‑A9 Technical Reference Manual: |
| 266 | * 7.1.1 Memory system |
| 267 | * The Cortex‑A9 processor has separate instruction and data caches. |
| 268 | * The caches have the following features: |
| 269 | * - Both caches are 4-way set-associative. |
| 270 | * - The cache line length is eight words. |
| 271 | * - You can configure the instruction and data caches independently during implementation |
| 272 | * to sizes of 16KB, 32KB, or 64KB. |
| 273 | * 8.1.5 Exclusive L2 cache |
| 274 | * The Cortex‑A9 processor can be connected to an L2 cache that supports an exclusive cache mode. |
| 275 | * This mode must be activated both in the Cortex‑A9 processor and in the L2 cache controller. |
| 276 | * |
| 277 | * +--------------------+-------+-----------+-----------+-----------+-----------+ |
| 278 | * | Processor model | Cores | L1D cache | L1I cache | L2 cache | Reference | |
| 279 | * +--------------------+-------+-----------+-----------+-----------+-----------+ |
| 280 | * | Exynos 4 Dual 4210 | 2 | 32K | 32K | 1M | [1] | |
| 281 | * | Exynos 4 Dual 4212 | 2 | 32K | 32K | 1M | [2] | |
| 282 | * | Exynos 4 Quad 4412 | 4 | 32K | 32K | 1M | [3] | |
| 283 | * | Exynos 4 Quad 4415 | 4 | 32K | 32K | 1M | | |
Marat Dukhan | 058589b | 2017-09-17 11:15:26 -0700 | [diff] [blame] | 284 | * | TI OMAP 4430 | 2 | 32K | 32K | 1M | [4] | |
| 285 | * | TI OMAP 4460 | 2 | 32K | 32K | 1M | [5] | |
Marat Dukhan | 3c98276 | 2017-05-08 06:16:45 +0000 | [diff] [blame] | 286 | * +--------------------+-------+-----------+-----------+-----------+-----------+ |
| 287 | * |
| 288 | * [1] http://www.samsung.com/global/business/semiconductor/file/product/Exynos_4_Dual_45nm_User_Manaul_Public_REV1.00-0.pdf |
| 289 | * [2] http://www.samsung.com/global/business/semiconductor/file/product/Exynos_4_Dual_32nm_User_Manaul_Public_REV100-0.pdf |
| 290 | * [3] http://www.samsung.com/global/business/semiconductor/file/product/Exynos_4_Quad_User_Manaul_Public_REV1.00-0.pdf |
Marat Dukhan | 058589b | 2017-09-17 11:15:26 -0700 | [diff] [blame] | 291 | * [4] https://www.hotchips.org/wp-content/uploads/hc_archives/hc21/2_mon/HC21.24.400.ClientProcessors-Epub/HC21.24.421.Witt-OMAP4430.pdf |
| 292 | * [5] http://www.anandtech.com/show/5310/samsung-galaxy-nexus-ice-cream-sandwich-review/9 |
Marat Dukhan | 3c98276 | 2017-05-08 06:16:45 +0000 | [diff] [blame] | 293 | */ |
| 294 | |
| 295 | /* Use Exynos 4 specs */ |
| 296 | *l1i = (struct cpuinfo_cache) { |
| 297 | .size = 32 * 1024, |
| 298 | .associativity = 4, |
| 299 | .line_size = 32 |
| 300 | }; |
| 301 | *l1d = (struct cpuinfo_cache) { |
| 302 | .size = 32 * 1024, |
| 303 | .associativity = 4, |
| 304 | .line_size = 32 |
| 305 | }; |
| 306 | *l2 = (struct cpuinfo_cache) { |
| 307 | .size = 1024 * 1024, |
Marat Dukhan | 1b5c6ba | 2017-05-08 10:35:27 +0000 | [diff] [blame] | 308 | /* OMAP4460 in Pandaboard ES has 16-way set-associative L2 cache */ |
| 309 | .associativity = 16, |
Marat Dukhan | 3c98276 | 2017-05-08 06:16:45 +0000 | [diff] [blame] | 310 | .line_size = 32 |
| 311 | }; |
| 312 | break; |
| 313 | case cpuinfo_uarch_cortex_a15: |
| 314 | /* |
| 315 | * 6.1. About the L1 memory system |
| 316 | * The L1 memory system consists of separate instruction and data caches. |
| 317 | * The L1 instruction memory system has the following features: |
| 318 | * - 32KB 2-way set-associative instruction cache. |
| 319 | * - Fixed line length of 64 bytes. |
| 320 | * The L1 data memory system has the following features: |
| 321 | * - 32KB 2-way set-associative data cache. |
| 322 | * - Fixed line length of 64 bytes. |
| 323 | * 7.1. About the L2 memory system |
| 324 | * The features of the L2 memory system include: |
| 325 | * - Configurable L2 cache size of 512KB, 1MB, 2MB and 4MB. |
| 326 | * - Fixed line length of 64 bytes. |
| 327 | * - 16-way set-associative cache structure. |
| 328 | * |
| 329 | * +--------------------+-------+-----------+-----------+-----------+-----------+ |
| 330 | * | Processor model | Cores | L1D cache | L1I cache | L2 cache | Reference | |
| 331 | * +--------------------+-------+-----------+-----------+-----------+-----------+ |
| 332 | * | Exynos 5 Dual 5250 | 2 | 32K | 32K | 1M | [1] | |
| 333 | * | Exynos 5 Hexa 5260 | 2(+4) | 32K | 32K | 1M(+512K) | [2] | |
| 334 | * | Exynos 5 Octa 5410 | 4(+4) | 32K | 32K | 2M(+512K) | [3] | |
| 335 | * | Exynos 5 Octa 5420 | 4(+4) | 32K | 32K | 2M(+512K) | [3] | |
| 336 | * | Exynos 5 Octa 5422 | 4(+4) | 32K | 32K | 2M(+512K) | [3] | |
| 337 | * | Exynos 5 Octa 5430 | 4(+4) | 32K | 32K | 2M(+512K) | [3] | |
| 338 | * | Exynos 5 Octa 5800 | 4(+4) | 32K | 32K | 2M(+512K) | [3] | |
| 339 | * +--------------------+-------+-----------+-----------+-----------+-----------+ |
| 340 | * |
| 341 | * [1] http://www.arndaleboard.org/wiki/downloads/supports/Exynos_5_Dual_User_Manaul_Public_REV1.00.pdf |
| 342 | * [2] http://www.yicsystem.com/wp-content/uploads/2014/08/Espresso5260P-Guide-Book.pdf |
| 343 | * [3] http://www.anandtech.com/show/6768/samsung-details-exynos-5-octa-architecture-power-at-isscc-13 |
| 344 | */ |
| 345 | *l1i = (struct cpuinfo_cache) { |
| 346 | .size = 32 * 1024, |
| 347 | .associativity = 2, |
| 348 | .line_size = 64 |
| 349 | }; |
| 350 | *l1d = (struct cpuinfo_cache) { |
| 351 | .size = 32 * 1024, |
| 352 | .associativity = 2, |
| 353 | .line_size = 64 |
| 354 | }; |
| 355 | *l2 = (struct cpuinfo_cache) { |
Marat Dukhan | a8fb3dd | 2017-08-09 13:49:39 -0700 | [diff] [blame] | 356 | .size = cluster_cores * 512 * 1024, |
Marat Dukhan | 3c98276 | 2017-05-08 06:16:45 +0000 | [diff] [blame] | 357 | .associativity = 16, |
| 358 | .line_size = 64 |
| 359 | }; |
| 360 | break; |
Marat Dukhan | 36b969e | 2017-09-10 21:54:32 -0700 | [diff] [blame] | 361 | case cpuinfo_uarch_cortex_a17: |
| 362 | /* |
| 363 | * ARM Cortex-A17 MPCore Processor Technical Reference Manual: |
| 364 | * 6.1. About the L1 memory system |
| 365 | * The L1 memory system consists of separate instruction and data caches. |
| 366 | * The size of the instruction cache is implemented as either 32KB or 64KB. |
| 367 | * The size of the data cache is 32KB. |
| 368 | * |
| 369 | * The L1 instruction cache has the following features: |
| 370 | * - Instruction side cache line length of 64-bytes. |
| 371 | * - 4-way set-associative instruction cache. |
| 372 | * |
| 373 | * The L1 data cache has the following features: |
| 374 | * - Data side cache line length of 64-bytes. |
| 375 | * - 4-way set-associative data cache. |
| 376 | * |
| 377 | * 7.1. About the L2 Memory system |
| 378 | * An integrated L2 cache: |
| 379 | * - The cache size is implemented as either 256KB, 512KB, 1MB, 2MB, 4MB or 8MB. |
| 380 | * - A fixed line length of 64 bytes. |
| 381 | * - 16-way set-associative cache structure. |
| 382 | * |
| 383 | * +------------------+-------+-----------+-----------+-----------+-----------+ |
| 384 | * | Processor model | Cores | L1D cache | L1I cache | L2 cache | Reference | |
| 385 | * +------------------+-------+-----------+-----------+-----------+-----------+ |
| 386 | * | MediaTek MT6595 | 4(+4) | 32K | 32K | 2M(+512K) | [1] | |
| 387 | * +------------------+-------+-----------+-----------+-----------+-----------+ |
| 388 | * |
| 389 | * [1] https://blog.osakana.net/archives/5268 |
| 390 | */ |
| 391 | *l1i = (struct cpuinfo_cache) { |
| 392 | .size = 32 * 1024, |
| 393 | .associativity = 4, |
| 394 | .line_size = 64 |
| 395 | }; |
| 396 | *l1d = (struct cpuinfo_cache) { |
| 397 | .size = 32 * 1024, |
| 398 | .associativity = 4, |
| 399 | .line_size = 64 |
| 400 | }; |
| 401 | *l2 = (struct cpuinfo_cache) { |
| 402 | .size = cluster_cores * 512 * 1024, |
| 403 | .associativity = 16, |
| 404 | .line_size = 64 |
| 405 | }; |
| 406 | break; |
Marat Dukhan | 2b4e2c5 | 2017-10-16 15:01:36 -0700 | [diff] [blame] | 407 | case cpuinfo_uarch_cortex_a35: |
| 408 | /* |
| 409 | * ARM Cortex‑A35 Processor Technical Reference Manual: |
| 410 | * 6.1. About the L1 memory system |
| 411 | * The L1 memory system includes several power-saving and performance-enhancing features. |
| 412 | * These include separate instruction and data caches, which can be configured |
| 413 | * independently during implementation to sizes of 8KB, 16KB, 32KB, or 64KB. |
| 414 | * |
| 415 | * L1 instruction-side memory system |
| 416 | * A dedicated instruction cache that: |
| 417 | * - is virtually indexed and physically tagged. |
| 418 | * - is 2-way set associative. |
| 419 | * - is configurable to be 8KB, 16KB, 32KB, or 64KB. |
| 420 | * - uses a cache line length of 64 bytes. |
| 421 | * |
| 422 | * L1 data-side memory system |
| 423 | * A dedicated data cache that: |
| 424 | * - is physically indexed and physically tagged. |
| 425 | * - is 4-way set associative. |
| 426 | * - is configurable to be 8KB, 16KB, 32KB, or 64KB. |
| 427 | * - uses a cache line length of 64 bytes. |
| 428 | * |
| 429 | * 7.1. About the L2 memory system |
| 430 | * The L2 cache is 8-way set associative. |
| 431 | * Further features of the L2 cache are: |
| 432 | * - Configurable size of 128KB, 256KB, 512KB, and 1MB. |
| 433 | * - Fixed line length of 64 bytes. |
| 434 | * - Physically indexed and tagged. |
| 435 | * |
| 436 | * +-----------------+---------+-----------+-----------+-----------+-----------+ |
| 437 | * | Processor model | Cores | L1D cache | L1I cache | L2 cache | Reference | |
| 438 | * +-----------------+---------+-----------+-----------+-----------+-----------+ |
| 439 | * | MediaTek MT6599 | 4(+4+2) | ? | ? | ? | | |
| 440 | * +-----------------+---------+-----------+-----------+-----------+-----------+ |
| 441 | */ |
| 442 | *l1i = (struct cpuinfo_cache) { |
| 443 | .size = 16 * 1024, /* assumption based on low-end Cortex-A53 */ |
| 444 | .associativity = 2, |
| 445 | .line_size = 64 |
| 446 | }; |
| 447 | *l1d = (struct cpuinfo_cache) { |
| 448 | .size = 16 * 1024, /* assumption based on low-end Cortex-A53 */ |
| 449 | .associativity = 4, |
| 450 | .line_size = 64 |
| 451 | }; |
| 452 | *l2 = (struct cpuinfo_cache) { |
| 453 | .size = 256 * 1024, /* assumption based on low-end Cortex-A53 */ |
| 454 | .associativity = 8, |
| 455 | .line_size = 64 |
| 456 | }; |
| 457 | break; |
Marat Dukhan | ee705c7 | 2017-05-08 10:18:03 +0000 | [diff] [blame] | 458 | case cpuinfo_uarch_cortex_a53: |
| 459 | /* |
| 460 | * ARM Cortex-A53 MPCore Processor Technical Reference Manual: |
| 461 | * 6.1. About the L1 memory system |
Marat Dukhan | c3035ec | 2017-05-08 11:08:49 +0000 | [diff] [blame] | 462 | * The L1 memory system consists of separate instruction and data caches. The implementer configures the |
Marat Dukhan | ee705c7 | 2017-05-08 10:18:03 +0000 | [diff] [blame] | 463 | * instruction and data caches independently during implementation, to sizes of 8KB, 16KB, 32KB, or 64KB. |
| 464 | * |
| 465 | * The L1 Instruction memory system has the following key features: |
| 466 | * - Instruction side cache line length of 64 bytes. |
| 467 | * - 2-way set associative L1 Instruction cache. |
| 468 | * |
| 469 | * The L1 Data memory system has the following features: |
| 470 | * - Data side cache line length of 64 bytes. |
| 471 | * - 4-way set associative L1 Data cache. |
| 472 | * |
| 473 | * 7.1. About the L2 memory system |
| 474 | * The L2 memory system consists of an: |
| 475 | * - Optional tightly-coupled L2 cache that includes: |
| 476 | * - Configurable L2 cache size of 128KB, 256KB, 512KB, 1MB and 2MB. |
| 477 | * - Fixed line length of 64 bytes. |
| 478 | * - 16-way set-associative cache structure. |
| 479 | * |
| 480 | * +--------------------+-------+-----------+-----------+-----------+-----------+ |
| 481 | * | Processor model | Cores | L1D cache | L1I cache | L2 cache | Reference | |
| 482 | * +--------------------+-------+-----------+-----------+-----------+-----------+ |
| 483 | * | Broadcom BCM2837 | 4 | 16K | 16K | 512K | [1] | |
Marat Dukhan | c11876d | 2017-08-10 10:38:01 -0700 | [diff] [blame] | 484 | * | Exynos 7420 | 4(+4) | 32K | 32K | 256K | [2, 3] | |
Marat Dukhan | 54a5b4d | 2017-08-25 12:24:57 -0700 | [diff] [blame] | 485 | * | Exynos 8890 | 4(+4) | 32K | 32K | 256K | [4] | |
Marat Dukhan | c11876d | 2017-08-10 10:38:01 -0700 | [diff] [blame] | 486 | * | Snapdragon 410 | 4 | 32K | 32K | 512K | [3] | |
Marat Dukhan | 4780ba5 | 2017-06-30 18:43:01 -0700 | [diff] [blame] | 487 | * | Snapdragon 835 | 4(+4) | 32K+64K | 32K+64K | 1M(+2M) | sysfs | |
Marat Dukhan | 54a5b4d | 2017-08-25 12:24:57 -0700 | [diff] [blame] | 488 | * | Kirin 620 | 4+4 | 32K | 32K | 512K | [5] | |
Marat Dukhan | ee705c7 | 2017-05-08 10:18:03 +0000 | [diff] [blame] | 489 | * +--------------------+-------+-----------+-----------+-----------+-----------+ |
| 490 | * |
| 491 | * [1] https://www.raspberrypi.org/forums/viewtopic.php?f=91&t=145766 |
Marat Dukhan | c11876d | 2017-08-10 10:38:01 -0700 | [diff] [blame] | 492 | * [2] http://www.anandtech.com/show/9330/exynos-7420-deep-dive/2 |
| 493 | * [3] https://www.usenix.org/system/files/conference/usenixsecurity16/sec16_paper_lipp.pdf |
Marat Dukhan | 54a5b4d | 2017-08-25 12:24:57 -0700 | [diff] [blame] | 494 | * [4] http://www.boardset.com/products/products_v8890.php |
| 495 | * [5] http://mirror.lemaker.org/Hi6220V100_Multi-Mode_Application_Processor_Function_Description.pdf |
Marat Dukhan | ee705c7 | 2017-05-08 10:18:03 +0000 | [diff] [blame] | 496 | */ |
Marat Dukhan | 8eb5e65 | 2017-09-21 23:17:14 -0700 | [diff] [blame] | 497 | if (midr_is_qualcomm_cortex_a53_silver(midr)) { |
| 498 | /* Qualcomm-modified Cortex-A53 in Snapdragon 630/660/835 */ |
| 499 | |
Marat Dukhan | 7c775ab | 2017-10-15 21:50:11 +0000 | [diff] [blame] | 500 | uint32_t l2_size = 512 * 1024; |
Marat Dukhan | 8eb5e65 | 2017-09-21 23:17:14 -0700 | [diff] [blame] | 501 | if (chipset->series == cpuinfo_arm_chipset_series_qualcomm_msm && chipset->model == 8998) { |
| 502 | /* Snapdragon 835 (MSM8998): 1 MB L2 (little cores only) */ |
| 503 | l2_size = 1024 * 1024; |
| 504 | } else if (chipset->series == cpuinfo_arm_chipset_series_qualcomm_snapdragon && chipset->model == 630 && cluster_id == 0) { |
| 505 | /* Snapdragon 630 (MSM8998): 1 MB L2 for the big cores */ |
| 506 | l2_size = 1024 * 1024; |
| 507 | } |
| 508 | |
Marat Dukhan | f625402 | 2017-06-30 18:07:11 -0700 | [diff] [blame] | 509 | *l1i = (struct cpuinfo_cache) { |
| 510 | .size = 32 * 1024, |
| 511 | .associativity = 2, |
| 512 | .line_size = 64 |
| 513 | }; |
| 514 | *l1d = (struct cpuinfo_cache) { |
| 515 | .size = 32 * 1024, |
| 516 | .associativity = 4, |
| 517 | .line_size = 64 |
| 518 | }; |
| 519 | *l2 = (struct cpuinfo_cache) { |
Marat Dukhan | 8eb5e65 | 2017-09-21 23:17:14 -0700 | [diff] [blame] | 520 | .size = l2_size, |
Marat Dukhan | f625402 | 2017-06-30 18:07:11 -0700 | [diff] [blame] | 521 | .associativity = 16, |
| 522 | .line_size = 64 |
| 523 | }; |
| 524 | } else { |
| 525 | /* Standard Cortex-A53 */ |
Marat Dukhan | 54a5b4d | 2017-08-25 12:24:57 -0700 | [diff] [blame] | 526 | |
| 527 | /* Use conservative values by default */ |
Marat Dukhan | 7c775ab | 2017-10-15 21:50:11 +0000 | [diff] [blame] | 528 | uint32_t l1_size = 16 * 1024; |
| 529 | uint32_t l2_size = 256 * 1024; |
Marat Dukhan | 8e6b17a | 2017-09-13 11:50:03 -0700 | [diff] [blame] | 530 | switch (chipset->series) { |
| 531 | case cpuinfo_arm_chipset_series_qualcomm_msm: |
| 532 | l1_size = 32 * 1024; |
| 533 | l2_size = 512 * 1024; |
| 534 | switch (chipset->model) { |
| 535 | case 8937: /* Snapdragon 430 */ |
| 536 | case 8940: /* Snapdragon 435 */ |
| 537 | case 8953: /* Snapdragon 625 or 626 (8953PRO) */ |
| 538 | if (cluster_id == 0) { |
| 539 | /* 1M L2 for big cluster */ |
| 540 | l2_size = 1024 * 1024; |
| 541 | } |
| 542 | break; |
| 543 | case 8952: /* Snapdragon 617 */ |
| 544 | if (cluster_id != 0) { |
| 545 | /* 256K L2 for LITTLE cluster */ |
| 546 | l2_size = 256 * 1024; |
| 547 | } |
| 548 | break; |
| 549 | default: |
| 550 | /* Silence compiler warning about unhandled enum values */ |
| 551 | break; |
| 552 | } |
| 553 | break; |
| 554 | case cpuinfo_arm_chipset_series_qualcomm_apq: |
Marat Dukhan | 54a5b4d | 2017-08-25 12:24:57 -0700 | [diff] [blame] | 555 | l1_size = 32 * 1024; |
| 556 | l2_size = 512 * 1024; |
| 557 | break; |
Marat Dukhan | 8e6b17a | 2017-09-13 11:50:03 -0700 | [diff] [blame] | 558 | case cpuinfo_arm_chipset_series_qualcomm_snapdragon: |
| 559 | l1_size = 32 * 1024; |
| 560 | l2_size = 512 * 1024; |
Marat Dukhan | 8eb5e65 | 2017-09-21 23:17:14 -0700 | [diff] [blame] | 561 | if (chipset->model == 450 && cluster_id == 0) { |
| 562 | /* Snapdragon 450: 1M L2 for big cluster */ |
| 563 | l2_size = 1024 * 1024; |
Marat Dukhan | 8e6b17a | 2017-09-13 11:50:03 -0700 | [diff] [blame] | 564 | } |
| 565 | break; |
| 566 | case cpuinfo_arm_chipset_series_hisilicon_hi: |
Marat Dukhan | 8e6b17a | 2017-09-13 11:50:03 -0700 | [diff] [blame] | 567 | l1_size = 32 * 1024; |
| 568 | l2_size = 512 * 1024; |
| 569 | break; |
Marat Dukhan | 0333d90 | 2017-11-20 14:42:23 +0300 | [diff] [blame] | 570 | case cpuinfo_arm_chipset_series_hisilicon_kirin: |
| 571 | l1_size = 32 * 1024; |
| 572 | switch (chipset->model) { |
| 573 | case 970: /* Kirin 970 */ |
| 574 | l2_size = 1024 * 1024; |
| 575 | break; |
| 576 | default: |
| 577 | l2_size = 512 * 1024; |
| 578 | break; |
| 579 | } |
| 580 | break; |
Marat Dukhan | 8e6b17a | 2017-09-13 11:50:03 -0700 | [diff] [blame] | 581 | case cpuinfo_arm_chipset_series_samsung_exynos: |
Marat Dukhan | 54a5b4d | 2017-08-25 12:24:57 -0700 | [diff] [blame] | 582 | l1_size = 32 * 1024; |
| 583 | break; |
| 584 | default: |
| 585 | /* Silence compiler warning about unhandled enum values */ |
| 586 | break; |
| 587 | } |
Marat Dukhan | f625402 | 2017-06-30 18:07:11 -0700 | [diff] [blame] | 588 | *l1i = (struct cpuinfo_cache) { |
Marat Dukhan | 54a5b4d | 2017-08-25 12:24:57 -0700 | [diff] [blame] | 589 | .size = l1_size, |
Marat Dukhan | f625402 | 2017-06-30 18:07:11 -0700 | [diff] [blame] | 590 | .associativity = 2, |
| 591 | .line_size = 64 |
| 592 | }; |
| 593 | *l1d = (struct cpuinfo_cache) { |
Marat Dukhan | 54a5b4d | 2017-08-25 12:24:57 -0700 | [diff] [blame] | 594 | .size = l1_size, |
Marat Dukhan | f625402 | 2017-06-30 18:07:11 -0700 | [diff] [blame] | 595 | .associativity = 4, |
| 596 | .line_size = 64 |
| 597 | }; |
| 598 | *l2 = (struct cpuinfo_cache) { |
Marat Dukhan | 54a5b4d | 2017-08-25 12:24:57 -0700 | [diff] [blame] | 599 | .size = l2_size, |
Marat Dukhan | f625402 | 2017-06-30 18:07:11 -0700 | [diff] [blame] | 600 | .associativity = 16, |
| 601 | .line_size = 64 |
| 602 | }; |
| 603 | } |
Marat Dukhan | ee705c7 | 2017-05-08 10:18:03 +0000 | [diff] [blame] | 604 | break; |
Marat Dukhan | c3035ec | 2017-05-08 11:08:49 +0000 | [diff] [blame] | 605 | case cpuinfo_uarch_cortex_a57: |
| 606 | /* |
| 607 | * ARM Cortex-A57 MPCore Processor Technical Reference Manual: |
| 608 | * 6.1. About the L1 memory system |
| 609 | * The L1 memory system consists of separate instruction and data caches. |
| 610 | * |
| 611 | * The L1 instruction memory system has the following features: |
| 612 | * - 48KB 3-way set-associative instruction cache. |
| 613 | * - Fixed line length of 64 bytes. |
| 614 | * |
| 615 | * The L1 data memory system has the following features: |
| 616 | * - 32KB 2-way set-associative data cache. |
| 617 | * - Fixed line length of 64 bytes. |
| 618 | * |
| 619 | * 7.1 About the L2 memory system |
| 620 | * The features of the L2 memory system include: |
| 621 | * - Configurable L2 cache size of 512KB, 1MB, and 2MB. |
| 622 | * - Fixed line length of 64 bytes. |
| 623 | * - 16-way set-associative cache structure. |
| 624 | * - Inclusion property with L1 data caches. |
| 625 | * |
| 626 | * +--------------------+-------+-----------+-----------+-----------+-----------+ |
| 627 | * | Processor model | Cores | L1D cache | L1I cache | L2 cache | Reference | |
| 628 | * +--------------------+-------+-----------+-----------+-----------+-----------+ |
Marat Dukhan | c11876d | 2017-08-10 10:38:01 -0700 | [diff] [blame] | 629 | * | Snapdragon 810 | 4(+4) | 32K | 48K | 2M | [1] | |
| 630 | * | Exynos 7420 | 4(+4) | 32K | 48K | 2M | [2] | |
| 631 | * | Jetson TX1 | 4 | 32K | 48K | 2M | [3] | |
Marat Dukhan | c3035ec | 2017-05-08 11:08:49 +0000 | [diff] [blame] | 632 | * +--------------------+-------+-----------+-----------+-----------+-----------+ |
| 633 | * |
Marat Dukhan | c11876d | 2017-08-10 10:38:01 -0700 | [diff] [blame] | 634 | * [1] http://www.anandtech.com/show/9837/snapdragon-820-preview |
| 635 | * [2] http://www.anandtech.com/show/9330/exynos-7420-deep-dive/2 |
| 636 | * [3] https://devblogs.nvidia.com/parallelforall/jetson-tx2-delivers-twice-intelligence-edge/ |
Marat Dukhan | c3035ec | 2017-05-08 11:08:49 +0000 | [diff] [blame] | 637 | */ |
| 638 | *l1i = (struct cpuinfo_cache) { |
| 639 | .size = 48 * 1024, |
| 640 | .associativity = 3, |
| 641 | .line_size = 64 |
| 642 | }; |
| 643 | *l1d = (struct cpuinfo_cache) { |
| 644 | .size = 32 * 1024, |
| 645 | .associativity = 2, |
| 646 | .line_size = 64 |
| 647 | }; |
| 648 | *l2 = (struct cpuinfo_cache) { |
Marat Dukhan | a8fb3dd | 2017-08-09 13:49:39 -0700 | [diff] [blame] | 649 | .size = cluster_cores * 512 * 1024, |
Marat Dukhan | c3035ec | 2017-05-08 11:08:49 +0000 | [diff] [blame] | 650 | .associativity = 16, |
| 651 | .line_size = 64, |
| 652 | .flags = CPUINFO_CACHE_INCLUSIVE |
| 653 | }; |
| 654 | break; |
Marat Dukhan | f822e71 | 2017-06-30 20:22:23 -0700 | [diff] [blame] | 655 | case cpuinfo_uarch_cortex_a72: |
Marat Dukhan | 0333d90 | 2017-11-20 14:42:23 +0300 | [diff] [blame] | 656 | { |
Marat Dukhan | f822e71 | 2017-06-30 20:22:23 -0700 | [diff] [blame] | 657 | /* |
Marat Dukhan | 47b34e2 | 2017-08-28 16:42:58 -0700 | [diff] [blame] | 658 | * ARM Cortex-A72 MPCore Processor Technical Reference Manual |
Marat Dukhan | f822e71 | 2017-06-30 20:22:23 -0700 | [diff] [blame] | 659 | * 6.1. About the L1 memory system |
| 660 | * The L1 memory system consists of separate instruction and data caches. |
| 661 | * |
| 662 | * The L1 instruction memory system has the following features: |
| 663 | * - 48KB 3-way set-associative instruction cache. |
| 664 | * - Fixed line length of 64 bytes. |
| 665 | * |
| 666 | * The L1 data memory system has the following features: |
| 667 | * - 32KB 2-way set-associative data cache. |
| 668 | * - Fixed cache line length of 64 bytes. |
| 669 | * |
| 670 | * 7.1 About the L2 memory system |
| 671 | * The features of the L2 memory system include: |
| 672 | * - Configurable L2 cache size of 512KB, 1MB, 2MB and 4MB. |
| 673 | * - Fixed line length of 64 bytes. |
| 674 | * - Banked pipeline structures. |
| 675 | * - Inclusion property with L1 data caches. |
| 676 | * - 16-way set-associative cache structure. |
| 677 | * |
Marat Dukhan | f822e71 | 2017-06-30 20:22:23 -0700 | [diff] [blame] | 678 | * +---------------------+---------+-----------+-----------+------------+-----------+ |
| 679 | * | Processor model | Cores | L1D cache | L1I cache | L2 cache | Reference | |
| 680 | * +---------------------+---------+-----------+-----------+------------+-----------+ |
Marat Dukhan | 54a5b4d | 2017-08-25 12:24:57 -0700 | [diff] [blame] | 681 | * | Snapdragon 650 | 2(+4) | 32K(+32K) | 48K(+32K) | 1M(+512K) | [1] | |
| 682 | * | Snapdragon 652 | 4(+4) | 32K(+32K) | 48K(+32K) | 1M(+512K) | [2] | |
| 683 | * | Snapdragon 653 | 4(+4) | 32K(+32K) | 48K(+32K) | 1M(+512K) | [3] | |
| 684 | * | HiSilicon Kirin 950 | 4(+4) | 32K+32K | 48K+32K | ? | | |
| 685 | * | HiSilicon Kirin 955 | 4(+4) | 32K+32K | 48K+32K | ? | | |
Marat Dukhan | f822e71 | 2017-06-30 20:22:23 -0700 | [diff] [blame] | 686 | * | MediaTek Helio X20 | 2(+4+4) | ? | ? | ? | | |
| 687 | * | MediaTek Helio X23 | 2(+4+4) | ? | ? | ? | | |
| 688 | * | MediaTek Helio X25 | 2(+4+4) | ? | ? | ? | | |
| 689 | * | MediaTek Helio X27 | 2(+4+4) | ? | ? | ? | | |
| 690 | * +---------------------+---------+-----------+-----------+------------+-----------+ |
| 691 | * |
| 692 | * [1] http://pdadb.net/index.php?m=processor&id=578&c=qualcomm_snapdragon_618_msm8956__snapdragon_650 |
| 693 | * [2] http://pdadb.net/index.php?m=processor&id=667&c=qualcomm_snapdragon_620_apq8076__snapdragon_652 |
| 694 | * [3] http://pdadb.net/index.php?m=processor&id=692&c=qualcomm_snapdragon_653_msm8976sg__msm8976_pro |
| 695 | */ |
Marat Dukhan | 0333d90 | 2017-11-20 14:42:23 +0300 | [diff] [blame] | 696 | uint32_t l2_size; |
| 697 | switch (chipset->series) { |
| 698 | case cpuinfo_arm_chipset_series_hisilicon_kirin: |
| 699 | l2_size = 2 * 1024 * 1024; |
| 700 | break; |
| 701 | default: |
| 702 | l2_size = 1024 * 1024; |
| 703 | break; |
| 704 | } |
| 705 | |
Marat Dukhan | f822e71 | 2017-06-30 20:22:23 -0700 | [diff] [blame] | 706 | *l1i = (struct cpuinfo_cache) { |
| 707 | .size = 48 * 1024, |
| 708 | .associativity = 3, |
| 709 | .line_size = 64 |
| 710 | }; |
| 711 | *l1d = (struct cpuinfo_cache) { |
| 712 | .size = 32 * 1024, |
| 713 | .associativity = 2, |
| 714 | .line_size = 64 |
| 715 | }; |
| 716 | *l2 = (struct cpuinfo_cache) { |
Marat Dukhan | 0333d90 | 2017-11-20 14:42:23 +0300 | [diff] [blame] | 717 | .size = l2_size, |
Marat Dukhan | f822e71 | 2017-06-30 20:22:23 -0700 | [diff] [blame] | 718 | .associativity = 16, |
| 719 | .line_size = 64, |
| 720 | .flags = CPUINFO_CACHE_INCLUSIVE |
| 721 | }; |
| 722 | break; |
Marat Dukhan | 0333d90 | 2017-11-20 14:42:23 +0300 | [diff] [blame] | 723 | } |
Marat Dukhan | 4780ba5 | 2017-06-30 18:43:01 -0700 | [diff] [blame] | 724 | case cpuinfo_uarch_cortex_a73: |
Marat Dukhan | 8eb5e65 | 2017-09-21 23:17:14 -0700 | [diff] [blame] | 725 | { |
Marat Dukhan | 4780ba5 | 2017-06-30 18:43:01 -0700 | [diff] [blame] | 726 | /* |
| 727 | * ARM Cortex‑A73 MPCore Processor Technical Reference Manual |
| 728 | * 6.1. About the L1 memory system |
| 729 | * The L1 memory system consists of separate instruction and data caches. |
| 730 | * The size of the instruction cache is 64KB. |
| 731 | * The size of the data cache is configurable to either 32KB or 64KB. |
| 732 | * |
| 733 | * The L1 instruction memory system has the following key features: |
| 734 | * - Virtually Indexed, Physically Tagged (VIPT), four-way set-associative instruction cache. |
| 735 | * - Fixed cache line length of 64 bytes. |
| 736 | * |
| 737 | * The L1 data memory system has the following features: |
| 738 | * - ...the data cache behaves like an eight-way set associative PIPT cache (for 32KB configurations) |
| 739 | * and a 16-way set associative PIPT cache (for 64KB configurations). |
| 740 | * - Fixed cache line length of 64 bytes. |
| 741 | * |
| 742 | * 7.1 About the L2 memory system |
| 743 | * The L2 memory system consists of: |
| 744 | * - A tightly-integrated L2 cache with: |
| 745 | * - A configurable size of 256KB, 512KB, 1MB, 2MB, 4MB, or 8MB. |
| 746 | * - A 16-way, set-associative structure. |
| 747 | * - A fixed line length of 64 bytes. |
| 748 | * |
| 749 | * The ARM Cortex A73 - Artemis Unveiled [1] |
| 750 | * "ARM still envisions that most vendors will choose to use configurations of 1 to |
| 751 | * 2MB in consumer products. The L2 cache is inclusive of the L1 cache. " |
| 752 | * |
| 753 | * +---------------------+---------+-----------+-----------+-----------+-----------+ |
| 754 | * | Processor model | Cores | L1D cache | L1I cache | L2 cache | Reference | |
| 755 | * +---------------------+---------+-----------+-----------+-----------+-----------+ |
| 756 | * | HiSilicon Kirin 960 | 4(+4) | 64K+32K | 64K+32K | ? | [2] | |
Marat Dukhan | f822e71 | 2017-06-30 20:22:23 -0700 | [diff] [blame] | 757 | * | MediaTek Helio X30 | 2(+4+4) | ? | 64K+ ? | ? | | |
Marat Dukhan | 4780ba5 | 2017-06-30 18:43:01 -0700 | [diff] [blame] | 758 | * | Snapdragon 835 | 4(+4) | 64K+32K | 64K+32K | 2M(+1M) | sysfs | |
Marat Dukhan | 8eb5e65 | 2017-09-21 23:17:14 -0700 | [diff] [blame] | 759 | * | Snapdragon 660 | 4(+4) | 64K+32K | 64K+32K | 2M(+1M) | [3] | |
Marat Dukhan | 4780ba5 | 2017-06-30 18:43:01 -0700 | [diff] [blame] | 760 | * +---------------------+---------+-----------+-----------+-----------+-----------+ |
| 761 | * |
| 762 | * [1] http://www.anandtech.com/show/10347/arm-cortex-a73-artemis-unveiled/2 |
| 763 | * [2] http://www.anandtech.com/show/11088/hisilicon-kirin-960-performance-and-power/3 |
Marat Dukhan | 8eb5e65 | 2017-09-21 23:17:14 -0700 | [diff] [blame] | 764 | * [3] https://arstechnica.com/gadgets/2017/05/qualcomms-snapdragon-660-and-630-bring-more-high-end-features-to-midrange-chips/ |
Marat Dukhan | 4780ba5 | 2017-06-30 18:43:01 -0700 | [diff] [blame] | 765 | */ |
Marat Dukhan | 7c775ab | 2017-10-15 21:50:11 +0000 | [diff] [blame] | 766 | uint32_t l2_size = 1024 * 1024; |
Marat Dukhan | 0333d90 | 2017-11-20 14:42:23 +0300 | [diff] [blame] | 767 | switch (chipset->series) { |
| 768 | case cpuinfo_arm_chipset_series_hisilicon_kirin: |
Marat Dukhan | 8eb5e65 | 2017-09-21 23:17:14 -0700 | [diff] [blame] | 769 | l2_size = 2 * 1024 * 1024; |
| 770 | break; |
Marat Dukhan | 8eb5e65 | 2017-09-21 23:17:14 -0700 | [diff] [blame] | 771 | default: |
Marat Dukhan | 0333d90 | 2017-11-20 14:42:23 +0300 | [diff] [blame] | 772 | switch (midr) { |
| 773 | case UINT32_C(0x51AF8001): /* Kryo 280 Gold */ |
| 774 | l2_size = 2 * 1024 * 1024; |
| 775 | break; |
| 776 | case UINT32_C(0x51AF8002): /* Kryo 260 Gold */ |
| 777 | default: |
| 778 | break; |
| 779 | } |
Marat Dukhan | 8eb5e65 | 2017-09-21 23:17:14 -0700 | [diff] [blame] | 780 | } |
| 781 | |
Marat Dukhan | 4780ba5 | 2017-06-30 18:43:01 -0700 | [diff] [blame] | 782 | *l1i = (struct cpuinfo_cache) { |
| 783 | .size = 64 * 1024, |
| 784 | .associativity = 4, |
| 785 | .line_size = 64 |
| 786 | }; |
| 787 | *l1d = (struct cpuinfo_cache) { |
| 788 | .size = 64 * 1024, |
| 789 | .associativity = 16, |
| 790 | .line_size = 64 |
| 791 | }; |
| 792 | *l2 = (struct cpuinfo_cache) { |
Marat Dukhan | 8eb5e65 | 2017-09-21 23:17:14 -0700 | [diff] [blame] | 793 | .size = l2_size, |
Marat Dukhan | 4780ba5 | 2017-06-30 18:43:01 -0700 | [diff] [blame] | 794 | .associativity = 16, |
| 795 | .line_size = 64, |
| 796 | .flags = CPUINFO_CACHE_INCLUSIVE |
| 797 | }; |
| 798 | break; |
Marat Dukhan | 8eb5e65 | 2017-09-21 23:17:14 -0700 | [diff] [blame] | 799 | } |
Marat Dukhan | 3c98276 | 2017-05-08 06:16:45 +0000 | [diff] [blame] | 800 | case cpuinfo_uarch_scorpion: |
| 801 | /* |
| 802 | * - "The CPU includes 32KB instruction and data caches as |
| 803 | * well as a complete memory-management unit (MMU) suitable |
| 804 | * for high-level operating systems. The CPU also has |
| 805 | * 256KB of SRAM that can be allocated in 64KB increments |
| 806 | * to level-two (L2) cache or tightly coupled memory (TCM)." [1] |
| 807 | * We interpret it as L2 cache being 4-way set-associative on single-core Scorpion. |
| 808 | * - L1 Data Cache = 32 KB. 32 B/line. [2] |
| 809 | * - L2 Cache = 256 KB. 128 B/line. [2] |
| 810 | * - 256 KB (single-core) or 512 KB (dual-core) L2 cache [3] |
| 811 | * - Single or dual-core configuration [3] |
| 812 | * - For L1 cache assume the same associativity as Krait |
| 813 | * |
| 814 | * [1] https://www.qualcomm.com/media/documents/files/linley-report-on-dual-core-snapdragon.pdf |
| 815 | * [2] http://www.7-cpu.com/cpu/Snapdragon.html |
| 816 | * [3] https://en.wikipedia.org/wiki/Scorpion_(CPU) |
| 817 | */ |
| 818 | *l1i = (struct cpuinfo_cache) { |
| 819 | .size = 32 * 1024, |
| 820 | .associativity = 4, |
| 821 | .line_size = 32 |
| 822 | }; |
| 823 | *l1d = (struct cpuinfo_cache) { |
| 824 | .size = 32 * 1024, |
| 825 | .associativity = 4, |
| 826 | .line_size = 32 |
| 827 | }; |
| 828 | *l2 = (struct cpuinfo_cache) { |
Marat Dukhan | a8fb3dd | 2017-08-09 13:49:39 -0700 | [diff] [blame] | 829 | .size = cluster_cores * 256 * 1024, |
Marat Dukhan | 3c98276 | 2017-05-08 06:16:45 +0000 | [diff] [blame] | 830 | .associativity = 4, |
| 831 | .line_size = 128 |
| 832 | }; |
| 833 | break; |
| 834 | case cpuinfo_uarch_krait: |
| 835 | /* |
| 836 | * - L0 Data cache = 4 KB. 64 B/line, direct mapped [1] |
| 837 | * - L0 Instruction cache = 4 KB. [1] |
| 838 | * - L1 Data cache = 16 KB. 64 B/line, 4-way [1] |
| 839 | * - L1 Instruction cache = 16 KB, 4-way [1] |
| 840 | * - L2 Cache = 1 MB, 128 B/line, 8-way. Each core has fast access only to 512 KB of L2 cache. [1] |
| 841 | * - L2 = 1MB (dual core) or 2MB (quad core), 8-way set associative [2] |
| 842 | * |
| 843 | * [1] http://www.7-cpu.com/cpu/Krait.html |
| 844 | * [2] http://www.anandtech.com/show/4940/qualcomm-new-snapdragon-s4-msm8960-krait-architecture/2 |
| 845 | */ |
| 846 | *l1i = (struct cpuinfo_cache) { |
| 847 | .size = 16 * 1024, |
| 848 | .associativity = 4, |
| 849 | .line_size = 64 /* assume same as L1D */ |
| 850 | }; |
| 851 | *l1d = (struct cpuinfo_cache) { |
| 852 | .size = 16 * 1024, |
| 853 | .associativity = 4, |
| 854 | .line_size = 64 |
| 855 | }; |
| 856 | *l2 = (struct cpuinfo_cache) { |
Marat Dukhan | a8fb3dd | 2017-08-09 13:49:39 -0700 | [diff] [blame] | 857 | .size = cluster_cores * 512 * 1024, |
Marat Dukhan | 3c98276 | 2017-05-08 06:16:45 +0000 | [diff] [blame] | 858 | .associativity = 8, |
| 859 | .line_size = 128 |
| 860 | }; |
| 861 | break; |
| 862 | case cpuinfo_uarch_kryo: |
| 863 | /* |
| 864 | * +-----------------+-------+-----------+-----------+-----------+-----------+ |
| 865 | * | Processor model | Cores | L1D cache | L1I cache | L2 cache | Reference | |
| 866 | * +-----------------+-------+-----------+-----------+-----------+-----------+ |
Marat Dukhan | 54a5b4d | 2017-08-25 12:24:57 -0700 | [diff] [blame] | 867 | * | Snapdragon 820 | 2+2 | ? | ? | 1M+512K | [1] | |
| 868 | * | Snapdragon 821 | 2+2 | ? | ? | 1M+512K | [1] | |
Marat Dukhan | 3c98276 | 2017-05-08 06:16:45 +0000 | [diff] [blame] | 869 | * +-----------------+-------+-----------+-----------+-----------+-----------+ |
| 870 | * |
| 871 | * [1] http://www.anandtech.com/show/9837/snapdragon-820-preview/2 |
| 872 | */ |
| 873 | *l1i = (struct cpuinfo_cache) { |
Marat Dukhan | 54a5b4d | 2017-08-25 12:24:57 -0700 | [diff] [blame] | 874 | .size = 32 * 1024 /* TODO: verify */, |
| 875 | .associativity = 4, |
| 876 | .line_size = 64 |
Marat Dukhan | 3c98276 | 2017-05-08 06:16:45 +0000 | [diff] [blame] | 877 | }; |
| 878 | *l1d = (struct cpuinfo_cache) { |
Marat Dukhan | 54a5b4d | 2017-08-25 12:24:57 -0700 | [diff] [blame] | 879 | .size = 24 * 1024 /* TODO: verify */, |
| 880 | .associativity = 3, |
| 881 | .line_size = 64 |
Marat Dukhan | 3c98276 | 2017-05-08 06:16:45 +0000 | [diff] [blame] | 882 | }; |
Marat Dukhan | a8fb3dd | 2017-08-09 13:49:39 -0700 | [diff] [blame] | 883 | if (midr_is_kryo_silver(midr)) { |
Marat Dukhan | f625402 | 2017-06-30 18:07:11 -0700 | [diff] [blame] | 884 | /* Kryo "Silver" */ |
| 885 | *l2 = (struct cpuinfo_cache) { |
Marat Dukhan | 3014efb | 2017-08-25 17:25:55 -0700 | [diff] [blame] | 886 | .size = 512 * 1024, |
Marat Dukhan | 54a5b4d | 2017-08-25 12:24:57 -0700 | [diff] [blame] | 887 | .associativity = 8, |
| 888 | .line_size = 128 |
Marat Dukhan | f625402 | 2017-06-30 18:07:11 -0700 | [diff] [blame] | 889 | }; |
| 890 | } else { |
| 891 | /* Kryo "Gold" */ |
| 892 | *l2 = (struct cpuinfo_cache) { |
Marat Dukhan | 3014efb | 2017-08-25 17:25:55 -0700 | [diff] [blame] | 893 | .size = 1024 * 1024, |
Marat Dukhan | 54a5b4d | 2017-08-25 12:24:57 -0700 | [diff] [blame] | 894 | .associativity = 8, |
| 895 | .line_size = 128 |
Marat Dukhan | f625402 | 2017-06-30 18:07:11 -0700 | [diff] [blame] | 896 | }; |
| 897 | } |
Marat Dukhan | 3c98276 | 2017-05-08 06:16:45 +0000 | [diff] [blame] | 898 | break; |
Marat Dukhan | ac57632 | 2017-05-08 13:08:25 +0000 | [diff] [blame] | 899 | case cpuinfo_uarch_denver: |
| 900 | /* |
| 901 | * The Denver chip includes a 128KB, 4-way level 1 instruction cache, a 64KB, 4-way level 2 data cache, |
| 902 | * and a 2MB, 16-way level 2 cache, all of which can service both cores. [1] |
| 903 | * |
| 904 | * All the caches have 64-byte lines. [2] |
| 905 | * |
| 906 | * [1] http://www.pcworld.com/article/2463900/nvidia-reveals-pc-like-performance-for-denver-tegra-k1.html |
| 907 | * [2] http://linleygroup.com/newsletters/newsletter_detail.php?num=5205&year=2014 |
| 908 | */ |
| 909 | *l1i = (struct cpuinfo_cache) { |
| 910 | .size = 128 * 1024, |
| 911 | .associativity = 4, |
| 912 | .line_size = 64 |
| 913 | }; |
| 914 | *l1d = (struct cpuinfo_cache) { |
| 915 | .size = 64 * 1024, |
| 916 | .associativity = 4, |
| 917 | .line_size = 64 |
| 918 | }; |
| 919 | *l2 = (struct cpuinfo_cache) { |
| 920 | .size = 2 * 1024 * 1024, |
| 921 | .associativity = 16, |
| 922 | .line_size = 64 |
| 923 | }; |
| 924 | break; |
Marat Dukhan | 3c98276 | 2017-05-08 06:16:45 +0000 | [diff] [blame] | 925 | case cpuinfo_uarch_mongoose: |
| 926 | /* |
| 927 | * - "Moving past branch prediction we can see some elements of how the cache is set up for the L1 I$, |
| 928 | * namely 64 KB split into four sets with 128-byte line sizes for 128 cache lines per set" [1] |
| 929 | * - "For loads and stores, a 32 KB, 8-way set associative cache with 64 byte line size is used" [1] |
| 930 | * - "The L2 cache here is 2MB shared across all cores split into 16 sets. This memory is also split |
| 931 | * into 4 banks and has a 22 cycle latency" [1] |
| 932 | * |
| 933 | * +--------------------+-------+-----------+-----------+-----------+-----------+ |
| 934 | * | Processor model | Cores | L1D cache | L1I cache | L2 cache | Reference | |
| 935 | * +--------------------+-------+-----------+-----------+-----------+-----------+ |
| 936 | * | Exynos 8 Octa 8890 | 4(+4) | 64K | 32K | 2M | [1] | |
| 937 | * | Exynos 8 Octa 8895 | 4(+4) | 64K | 32K | 2M | [2] | |
| 938 | * +--------------------+-------+-----------+-----------+-----------+-----------+ |
| 939 | * |
| 940 | * [1] http://www.anandtech.com/show/10590/hot-chips-2016-exynos-m1-architecture-disclosed |
| 941 | * [2] https://www.extremetech.com/mobile/244949-samsungs-exynos-8895-features-custom-cpu-cores-first-10nm-chip-market |
| 942 | */ |
| 943 | *l1i = (struct cpuinfo_cache) { |
| 944 | .size = 64 * 1024, |
| 945 | .associativity = 4, |
| 946 | .line_size = 128 |
| 947 | }; |
| 948 | *l1d = (struct cpuinfo_cache) { |
| 949 | .size = 32 * 1024, |
| 950 | .associativity = 8, |
| 951 | .line_size = 64 |
| 952 | }; |
| 953 | *l2 = (struct cpuinfo_cache) { |
| 954 | .size = 2 * 1024 * 1024, |
| 955 | .associativity = 16, |
| 956 | .line_size = 64 |
| 957 | }; |
| 958 | break; |
Marat Dukhan | 92dae31 | 2017-05-09 14:10:17 +0000 | [diff] [blame] | 959 | case cpuinfo_uarch_thunderx: |
| 960 | /* |
| 961 | * "78K-Icache and 32K-D cache per core, 16 MB shared L2 cache" [1] |
| 962 | * |
| 963 | * [1] https://www.cavium.com/pdfFiles/ThunderX_CP_PB_Rev1.pdf |
| 964 | */ |
| 965 | *l1i = (struct cpuinfo_cache) { |
| 966 | .size = 78 * 1024, |
| 967 | .associativity = 4 /* assumption */, |
| 968 | .line_size = 64 /* assumption */ |
| 969 | }; |
| 970 | *l1d = (struct cpuinfo_cache) { |
| 971 | .size = 32 * 1024, |
| 972 | .associativity = 4 /* assumption */, |
| 973 | .line_size = 64 /* assumption */ |
| 974 | }; |
| 975 | *l2 = (struct cpuinfo_cache) { |
| 976 | .size = 16 * 1024 * 1024, |
| 977 | .associativity = 8 /* assumption */, |
| 978 | .line_size = 64 /* assumption */ |
| 979 | }; |
| 980 | break; |
Marat Dukhan | 3c98276 | 2017-05-08 06:16:45 +0000 | [diff] [blame] | 981 | case cpuinfo_uarch_cortex_a12: |
Marat Dukhan | 3c98276 | 2017-05-08 06:16:45 +0000 | [diff] [blame] | 982 | case cpuinfo_uarch_cortex_a32: |
Marat Dukhan | 3c98276 | 2017-05-08 06:16:45 +0000 | [diff] [blame] | 983 | default: |
| 984 | cpuinfo_log_warning("target uarch not recognized; using generic cache parameters"); |
| 985 | /* Follow OpenBLAS */ |
| 986 | if (arch_version >= 8) { |
| 987 | *l1i = (struct cpuinfo_cache) { |
| 988 | .size = 32 * 1024, |
| 989 | .associativity = 4, |
| 990 | .line_size = 64 |
| 991 | }; |
| 992 | *l1d = (struct cpuinfo_cache) { |
| 993 | .size = 32 * 1024, |
| 994 | .associativity = 4, |
| 995 | .line_size = 64 |
| 996 | }; |
| 997 | *l2 = (struct cpuinfo_cache) { |
Marat Dukhan | a8fb3dd | 2017-08-09 13:49:39 -0700 | [diff] [blame] | 998 | .size = cluster_cores * 256 * 1024, |
Marat Dukhan | 3c98276 | 2017-05-08 06:16:45 +0000 | [diff] [blame] | 999 | .associativity = 8, |
| 1000 | .line_size = 64 |
| 1001 | }; |
| 1002 | } else { |
| 1003 | *l1i = (struct cpuinfo_cache) { |
| 1004 | .size = 16 * 1024, |
| 1005 | .associativity = 4, |
| 1006 | .line_size = 32 |
| 1007 | }; |
| 1008 | *l1d = (struct cpuinfo_cache) { |
| 1009 | .size = 16 * 1024, |
| 1010 | .associativity = 4, |
| 1011 | .line_size = 32 |
| 1012 | }; |
| 1013 | if (arch_version >= 7) { |
| 1014 | *l2 = (struct cpuinfo_cache) { |
Marat Dukhan | a8fb3dd | 2017-08-09 13:49:39 -0700 | [diff] [blame] | 1015 | .size = cluster_cores * 128 * 1024, |
Marat Dukhan | 3c98276 | 2017-05-08 06:16:45 +0000 | [diff] [blame] | 1016 | .associativity = 8, |
| 1017 | .line_size = 32 |
| 1018 | }; |
| 1019 | } |
| 1020 | } |
| 1021 | break; |
| 1022 | } |
| 1023 | l1i->sets = l1i->size / (l1i->associativity * l1i->line_size); |
| 1024 | l1i->partitions = 1; |
| 1025 | l1d->sets = l1d->size / (l1d->associativity * l1d->line_size); |
| 1026 | l1d->partitions = 1; |
| 1027 | if (l2->size != 0) { |
Marat Dukhan | 8ecad1a | 2017-05-08 07:21:57 +0000 | [diff] [blame] | 1028 | l2->sets = l2->size / (l2->associativity * l2->line_size); |
Marat Dukhan | 3c98276 | 2017-05-08 06:16:45 +0000 | [diff] [blame] | 1029 | l2->partitions = 1; |
| 1030 | } |
| 1031 | } |