Hao Lu | 922070c | 2017-10-18 16:29:02 -0700 | [diff] [blame] | 1 | #include <stdio.h> |
| 2 | #include <stdint.h> |
| 3 | #include <stdlib.h> |
| 4 | #include <string.h> |
| 5 | #include <alloca.h> |
| 6 | |
| 7 | #include <errno.h> |
| 8 | #include <sys/types.h> |
| 9 | #include <sys/sysctl.h> |
Hao Lu | 922070c | 2017-10-18 16:29:02 -0700 | [diff] [blame] | 10 | #include <mach/machine.h> |
| 11 | |
| 12 | #include <cpuinfo.h> |
| 13 | #include <mach/api.h> |
Marat Dukhan | d0b3760 | 2018-12-09 01:59:26 -0800 | [diff] [blame] | 14 | #include <cpuinfo/internal-api.h> |
| 15 | #include <cpuinfo/log.h> |
Hao Lu | 922070c | 2017-10-18 16:29:02 -0700 | [diff] [blame] | 16 | |
| 17 | |
Marat Dukhan | ec86214 | 2017-10-18 17:24:46 -0700 | [diff] [blame] | 18 | struct cpuinfo_arm_isa cpuinfo_isa = { |
| 19 | #if CPUINFO_ARCH_ARM |
| 20 | .thumb = true, |
| 21 | .thumb2 = true, |
| 22 | .thumbee = false, |
| 23 | .jazelle = false, |
| 24 | .armv5e = true, |
| 25 | .armv6 = true, |
| 26 | .armv6k = true, |
| 27 | .armv7 = true, |
| 28 | .vfpv2 = false, |
| 29 | .vfpv3 = true, |
| 30 | .d32 = true, |
| 31 | .wmmx = false, |
| 32 | .wmmx2 = false, |
| 33 | .neon = true, |
| 34 | #endif |
| 35 | #if CPUINFO_ARCH_ARM64 |
| 36 | .aes = true, |
| 37 | .sha1 = true, |
| 38 | .sha2 = true, |
| 39 | .pmull = true, |
| 40 | .crc32 = true, |
| 41 | #endif |
| 42 | }; |
Marat Dukhan | 7b73888 | 2017-10-18 16:59:28 -0700 | [diff] [blame] | 43 | |
Marat Dukhan | 7d52b05 | 2018-03-18 22:57:05 -0700 | [diff] [blame] | 44 | static uint32_t get_sys_info(int type_specifier, const char* name) { |
Hao Lu | 922070c | 2017-10-18 16:29:02 -0700 | [diff] [blame] | 45 | size_t size = 0; |
| 46 | uint32_t result = 0; |
| 47 | int mib[2] = { CTL_HW, type_specifier }; |
| 48 | if (sysctl(mib, 2, NULL, &size, NULL, 0) != 0) { |
| 49 | cpuinfo_log_error("sysctl(\"%s\") failed: %s", name, strerror(errno)); |
| 50 | } else if (size == sizeof(uint32_t)) { |
| 51 | sysctl(mib, 2, &result, &size, NULL, 0); |
| 52 | cpuinfo_log_debug("%s: %"PRIu32 ", size = %lu", name, result, size); |
| 53 | } else { |
| 54 | cpuinfo_log_warning("sysctl does not support non-integer lookup for (\"%s\")", name); |
| 55 | } |
| 56 | return result; |
| 57 | } |
| 58 | |
Marat Dukhan | 7d52b05 | 2018-03-18 22:57:05 -0700 | [diff] [blame] | 59 | static uint64_t get_sys_info_by_name(const char* type_specifier) { |
Hao Lu | 922070c | 2017-10-18 16:29:02 -0700 | [diff] [blame] | 60 | size_t size = 0; |
| 61 | uint32_t result = 0; |
| 62 | if (sysctlbyname(type_specifier, NULL, &size, NULL, 0) != 0) { |
| 63 | cpuinfo_log_error("sysctlbyname(\"%s\") failed: %s", type_specifier, strerror(errno)); |
| 64 | } else if (size == sizeof(uint32_t)) { |
| 65 | sysctlbyname(type_specifier, &result, &size, NULL, 0); |
| 66 | cpuinfo_log_debug("%s: %"PRIu32 ", size = %lu", type_specifier, result, size); |
| 67 | } else { |
| 68 | cpuinfo_log_warning("sysctl does not support non-integer lookup for (\"%s\")", type_specifier); |
| 69 | } |
| 70 | return result; |
| 71 | } |
| 72 | |
Marat Dukhan | 44dafc5 | 2018-12-26 20:19:15 -0800 | [diff] [blame] | 73 | static enum cpuinfo_uarch decode_uarch(uint32_t cpu_family, uint32_t cpu_subtype, uint32_t core_index, uint32_t core_count) { |
Hao Lu | 922070c | 2017-10-18 16:29:02 -0700 | [diff] [blame] | 74 | switch (cpu_family) { |
| 75 | case CPUFAMILY_ARM_SWIFT: |
| 76 | return cpuinfo_uarch_swift; |
| 77 | case CPUFAMILY_ARM_CYCLONE: |
| 78 | return cpuinfo_uarch_cyclone; |
| 79 | case CPUFAMILY_ARM_TYPHOON: |
| 80 | return cpuinfo_uarch_typhoon; |
| 81 | case CPUFAMILY_ARM_TWISTER: |
| 82 | return cpuinfo_uarch_twister; |
| 83 | case CPUFAMILY_ARM_HURRICANE: |
| 84 | return cpuinfo_uarch_hurricane; |
| 85 | case CPUFAMILY_ARM_MONSOON_MISTRAL: |
Hao Lu | 922070c | 2017-10-18 16:29:02 -0700 | [diff] [blame] | 86 | /* 2x Monsoon + 4x Mistral cores */ |
| 87 | return core_index < 2 ? cpuinfo_uarch_monsoon : cpuinfo_uarch_mistral; |
Marat Dukhan | 44dafc5 | 2018-12-26 20:19:15 -0800 | [diff] [blame] | 88 | #ifdef CPUFAMILY_ARM_VORTEX_TEMPEST |
| 89 | case CPUFAMILY_ARM_VORTEX_TEMPEST: |
| 90 | #else |
| 91 | case 0xe81e7ef6: |
| 92 | /* Hard-coded value for older SDKs which do not define CPUFAMILY_ARM_VORTEX_TEMPEST */ |
| 93 | #endif |
| 94 | /* Hexa-core: 2x Vortex + 4x Tempest; Octa-core: 4x Cortex + 4x Tempest */ |
| 95 | return core_index + 4 < core_count ? cpuinfo_uarch_vortex : cpuinfo_uarch_tempest; |
Hao Lu | 922070c | 2017-10-18 16:29:02 -0700 | [diff] [blame] | 96 | default: |
| 97 | /* Use hw.cpusubtype for detection */ |
| 98 | break; |
| 99 | } |
| 100 | |
| 101 | switch (cpu_subtype) { |
| 102 | case CPU_SUBTYPE_ARM_V7: |
| 103 | return cpuinfo_uarch_cortex_a8; |
| 104 | case CPU_SUBTYPE_ARM_V7F: |
| 105 | return cpuinfo_uarch_cortex_a9; |
| 106 | case CPU_SUBTYPE_ARM_V7K: |
| 107 | return cpuinfo_uarch_cortex_a7; |
| 108 | default: |
| 109 | return cpuinfo_uarch_unknown; |
| 110 | } |
| 111 | } |
| 112 | |
| 113 | static void decode_package_name(char* package_name) { |
| 114 | size_t size; |
| 115 | if (sysctlbyname("hw.machine", NULL, &size, NULL, 0) != 0) { |
| 116 | cpuinfo_log_warning("sysctlbyname(\"hw.machine\") failed: %s", strerror(errno)); |
| 117 | return; |
| 118 | } |
| 119 | |
| 120 | char *machine_name = alloca(size); |
| 121 | if (sysctlbyname("hw.machine", machine_name, &size, NULL, 0) != 0) { |
| 122 | cpuinfo_log_warning("sysctlbyname(\"hw.machine\") failed: %s", strerror(errno)); |
| 123 | return; |
| 124 | } |
| 125 | cpuinfo_log_debug("hw.machine: %s", machine_name); |
Hao Lu | 8c2a383 | 2018-07-23 23:12:11 -0700 | [diff] [blame] | 126 | |
Hao Lu | 922070c | 2017-10-18 16:29:02 -0700 | [diff] [blame] | 127 | char name[10]; |
| 128 | uint32_t major = 0, minor = 0; |
| 129 | if (sscanf(machine_name, "%9[^,0123456789]%"SCNu32",%"SCNu32, name, &major, &minor) != 3) { |
| 130 | cpuinfo_log_warning("parsing \"hw.machine\" failed: %s", strerror(errno)); |
| 131 | return; |
| 132 | } |
Hao Lu | 8c2a383 | 2018-07-23 23:12:11 -0700 | [diff] [blame] | 133 | |
Hao Lu | 922070c | 2017-10-18 16:29:02 -0700 | [diff] [blame] | 134 | uint32_t chip_model = 0; |
| 135 | char suffix = '\0'; |
| 136 | if (strcmp(name, "iPhone") == 0) { |
| 137 | /* |
| 138 | * iPhone 4 and up are supported: |
| 139 | * - iPhone 4 [A4]: iPhone3,1, iPhone3,2, iPhone3,3 |
| 140 | * - iPhone 4S [A5]: iPhone4,1 |
| 141 | * - iPhone 5 [A6]: iPhone5,1, iPhone5,2 |
| 142 | * - iPhone 5c [A6]: iPhone5,3, iPhone5,4 |
| 143 | * - iPhone 5s [A7]: iPhone6,1, iPhone6,2 |
| 144 | * - iPhone 6 [A8]: iPhone7,2 |
| 145 | * - iPhone 6 Plus [A8]: iPhone7,1 |
| 146 | * - iPhone 6s [A9]: iPhone8,1 |
| 147 | * - iPhone 6s Plus [A9]: iPhone8,2 |
| 148 | * - iPhone SE [A9]: iPhone8,4 |
| 149 | * - iPhone 7 [A10]: iPhone9,1, iPhone9,3 |
| 150 | * - iPhone 7 Plus [A10]: iPhone9,2, iPhone9,4 |
| 151 | * - iPhone 8 [A11]: iPhone10,1, iPhone10,4 |
| 152 | * - iPhone 8 Plus [A11]: iPhone10,2, iPhone10,5 |
| 153 | * - iPhone X [A11]: iPhone10,3, iPhone10,6 |
Marat Dukhan | fd54c3d | 2018-09-30 22:21:20 -0700 | [diff] [blame] | 154 | * - iPhone XS [A12]: iPhone11,2, |
| 155 | * - iPhone XS Max [A12]: iPhone11,4, iPhone11,6 |
| 156 | * - iPhone XR [A12]: iPhone11,8 |
Hao Lu | 922070c | 2017-10-18 16:29:02 -0700 | [diff] [blame] | 157 | */ |
| 158 | chip_model = major + 1; |
| 159 | } else if (strcmp(name, "iPad") == 0) { |
| 160 | switch (major) { |
| 161 | /* iPad 2 and up are supported */ |
| 162 | case 2: |
| 163 | /* |
| 164 | * iPad 2 [A5]: iPad2,1, iPad2,2, iPad2,3, iPad2,4 |
| 165 | * iPad mini [A5]: iPad2,5, iPad2,6, iPad2,7 |
| 166 | */ |
| 167 | chip_model = major + 3; |
| 168 | break; |
| 169 | case 3: |
| 170 | /* |
| 171 | * iPad 3rd Gen [A5X]: iPad3,1, iPad3,2, iPad3,3 |
| 172 | * iPad 4th Gen [A6X]: iPad3,4, iPad3,5, iPad3,6 |
| 173 | */ |
| 174 | chip_model = (minor <= 3) ? 5 : 6; |
| 175 | suffix = 'X'; |
| 176 | break; |
| 177 | case 4: |
| 178 | /* |
| 179 | * iPad Air [A7]: iPad4,1, iPad4,2, iPad4,3 |
| 180 | * iPad mini Retina [A7]: iPad4,4, iPad4,5, iPad4,6 |
| 181 | * iPad mini 3 [A7]: iPad4,7, iPad4,8, iPad4,9 |
| 182 | */ |
| 183 | chip_model = major + 3; |
| 184 | break; |
| 185 | case 5: |
| 186 | /* |
| 187 | * iPad mini 4 [A8]: iPad5,1, iPad5,2 |
| 188 | * iPad Air 2 [A8X]: iPad5,3, iPad5,4 |
| 189 | */ |
| 190 | chip_model = major + 3; |
| 191 | suffix = (minor <= 2) ? '\0' : 'X'; |
| 192 | break; |
| 193 | case 6: |
| 194 | /* |
| 195 | * iPad Pro 9.7" [A9X]: iPad6,3, iPad6,4 |
| 196 | * iPad Pro [A9X]: iPad6,7, iPad6,8 |
| 197 | * iPad 5th Gen [A9]: iPad6,11, iPad6,12 |
| 198 | */ |
| 199 | chip_model = major + 3; |
| 200 | suffix = minor <= 8 ? 'X' : '\0'; |
| 201 | break; |
| 202 | case 7: |
| 203 | /* |
| 204 | * iPad Pro 12.9" [A10X]: iPad7,1, iPad7,2 |
| 205 | * iPad Pro 10.5" [A10X]: iPad7,3, iPad7,4 |
Marat Dukhan | fd54c3d | 2018-09-30 22:21:20 -0700 | [diff] [blame] | 206 | * iPad 6th Gen [A10]: iPad7,5, iPad7,6 |
Hao Lu | 922070c | 2017-10-18 16:29:02 -0700 | [diff] [blame] | 207 | */ |
| 208 | chip_model = major + 3; |
Marat Dukhan | fd54c3d | 2018-09-30 22:21:20 -0700 | [diff] [blame] | 209 | suffix = minor <= 4 ? 'X' : '\0'; |
Hao Lu | 922070c | 2017-10-18 16:29:02 -0700 | [diff] [blame] | 210 | break; |
| 211 | default: |
| 212 | cpuinfo_log_info("unknown iPad: %s", machine_name); |
| 213 | break; |
| 214 | } |
| 215 | } else if (strcmp(name, "iPod") == 0) { |
| 216 | switch (major) { |
| 217 | case 5: |
| 218 | chip_model = 5; |
| 219 | break; |
| 220 | /* iPod touch (5th Gen) [A5]: iPod5,1 */ |
| 221 | case 7: |
| 222 | /* iPod touch (6th Gen, 2015) [A8]: iPod7,1 */ |
| 223 | chip_model = 8; |
| 224 | break; |
| 225 | default: |
| 226 | cpuinfo_log_info("unknown iPod: %s", machine_name); |
| 227 | break; |
| 228 | } |
| 229 | } else { |
| 230 | cpuinfo_log_info("unknown device: %s", machine_name); |
| 231 | } |
| 232 | if (chip_model != 0) { |
| 233 | snprintf(package_name, CPUINFO_PACKAGE_NAME_MAX, "Apple A%"PRIu32"%c", chip_model, suffix); |
| 234 | } |
| 235 | } |
| 236 | |
| 237 | void cpuinfo_arm_mach_init(void) { |
| 238 | struct cpuinfo_processor* processors = NULL; |
| 239 | struct cpuinfo_core* cores = NULL; |
Hao Lu | 8c2a383 | 2018-07-23 23:12:11 -0700 | [diff] [blame] | 240 | struct cpuinfo_cluster* clusters = NULL; |
Hao Lu | 922070c | 2017-10-18 16:29:02 -0700 | [diff] [blame] | 241 | struct cpuinfo_package* packages = NULL; |
| 242 | struct cpuinfo_cache* l1i = NULL; |
| 243 | struct cpuinfo_cache* l1d = NULL; |
| 244 | struct cpuinfo_cache* l2 = NULL; |
| 245 | struct cpuinfo_cache* l3 = NULL; |
| 246 | |
| 247 | struct cpuinfo_mach_topology mach_topology = cpuinfo_mach_detect_topology(); |
| 248 | processors = calloc(mach_topology.threads, sizeof(struct cpuinfo_processor)); |
| 249 | if (processors == NULL) { |
| 250 | cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" logical processors", |
| 251 | mach_topology.threads * sizeof(struct cpuinfo_processor), mach_topology.threads); |
| 252 | goto cleanup; |
| 253 | } |
| 254 | cores = calloc(mach_topology.cores, sizeof(struct cpuinfo_core)); |
| 255 | if (cores == NULL) { |
| 256 | cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" cores", |
| 257 | mach_topology.cores * sizeof(struct cpuinfo_core), mach_topology.cores); |
| 258 | goto cleanup; |
| 259 | } |
| 260 | packages = calloc(mach_topology.packages, sizeof(struct cpuinfo_package)); |
| 261 | if (packages == NULL) { |
| 262 | cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" packages", |
| 263 | mach_topology.packages * sizeof(struct cpuinfo_package), mach_topology.packages); |
| 264 | goto cleanup; |
| 265 | } |
| 266 | |
| 267 | const uint32_t threads_per_core = mach_topology.threads / mach_topology.cores; |
| 268 | const uint32_t threads_per_package = mach_topology.threads / mach_topology.packages; |
| 269 | const uint32_t cores_per_package = mach_topology.cores / mach_topology.packages; |
Hao Lu | 8c2a383 | 2018-07-23 23:12:11 -0700 | [diff] [blame] | 270 | |
Hao Lu | 922070c | 2017-10-18 16:29:02 -0700 | [diff] [blame] | 271 | for (uint32_t i = 0; i < mach_topology.packages; i++) { |
| 272 | packages[i] = (struct cpuinfo_package) { |
| 273 | .processor_start = i * threads_per_package, |
| 274 | .processor_count = threads_per_package, |
| 275 | .core_start = i * cores_per_package, |
| 276 | .core_count = cores_per_package, |
| 277 | }; |
| 278 | decode_package_name(packages[i].name); |
| 279 | } |
Marat Dukhan | ec86214 | 2017-10-18 17:24:46 -0700 | [diff] [blame] | 280 | |
| 281 | |
Hao Lu | 922070c | 2017-10-18 16:29:02 -0700 | [diff] [blame] | 282 | const uint32_t cpu_family = get_sys_info_by_name("hw.cpufamily"); |
Marat Dukhan | ec86214 | 2017-10-18 17:24:46 -0700 | [diff] [blame] | 283 | const uint32_t cpu_type = get_sys_info_by_name("hw.cputype"); |
Hao Lu | 922070c | 2017-10-18 16:29:02 -0700 | [diff] [blame] | 284 | const uint32_t cpu_subtype = get_sys_info_by_name("hw.cpusubtype"); |
Marat Dukhan | ec86214 | 2017-10-18 17:24:46 -0700 | [diff] [blame] | 285 | switch (cpu_type) { |
| 286 | case CPU_TYPE_ARM64: |
| 287 | cpuinfo_isa.aes = true; |
| 288 | cpuinfo_isa.sha1 = true; |
| 289 | cpuinfo_isa.sha2 = true; |
| 290 | cpuinfo_isa.pmull = true; |
| 291 | cpuinfo_isa.crc32 = true; |
| 292 | break; |
| 293 | #if CPUINFO_ARCH_ARM |
| 294 | case CPU_TYPE_ARM: |
| 295 | switch (cpu_subtype) { |
| 296 | case CPU_SUBTYPE_ARM_V8: |
| 297 | cpuinfo_isa.aes = true; |
| 298 | cpuinfo_isa.sha1 = true; |
| 299 | cpuinfo_isa.sha2 = true; |
| 300 | cpuinfo_isa.pmull = true; |
| 301 | cpuinfo_isa.crc32 = true; |
| 302 | /* Fall-through to add ARMv7S features */ |
| 303 | case CPU_SUBTYPE_ARM_V7S: |
| 304 | case CPU_SUBTYPE_ARM_V7K: |
| 305 | cpuinfo_isa.fma = true; |
| 306 | /* Fall-through to add ARMv7F features */ |
| 307 | case CPU_SUBTYPE_ARM_V7F: |
| 308 | cpuinfo_isa.armv7mp = true; |
| 309 | cpuinfo_isa.fp16 = true; |
| 310 | /* Fall-through to add ARMv7 features */ |
| 311 | case CPU_SUBTYPE_ARM_V7: |
| 312 | break; |
| 313 | default: |
| 314 | break; |
| 315 | } |
| 316 | break; |
| 317 | #endif |
| 318 | } |
Marat Dukhan | c82f5e3 | 2018-12-26 20:20:23 -0800 | [diff] [blame^] | 319 | /* |
| 320 | * Support for ARMv8.1 Atomics & FP16 arithmetic instructions is supposed to be detected via |
| 321 | * sysctlbyname calls with "hw.optional.armv8_1_atomics" and "hw.optional.neon_fp16" arguments |
| 322 | * (see https://devstreaming-cdn.apple.com/videos/wwdc/2018/409t8zw7rumablsh/409/409_whats_new_in_llvm.pdf), |
| 323 | * but on new iOS versions these calls just fail with EPERM. |
| 324 | * |
| 325 | * Thus, we whitelist CPUs known to support these instructions. |
| 326 | */ |
| 327 | switch (cpu_family) { |
| 328 | case CPUFAMILY_ARM_MONSOON_MISTRAL: |
| 329 | #ifdef CPUFAMILY_ARM_VORTEX_TEMPEST |
| 330 | case CPUFAMILY_ARM_VORTEX_TEMPEST: |
| 331 | #else |
| 332 | case 0xe81e7ef6: |
| 333 | /* Hard-coded value for older SDKs which do not define CPUFAMILY_ARM_VORTEX_TEMPEST */ |
| 334 | #endif |
| 335 | cpuinfo_isa.atomics = true; |
| 336 | cpuinfo_isa.fp16arith = true; |
| 337 | } |
Marat Dukhan | ec86214 | 2017-10-18 17:24:46 -0700 | [diff] [blame] | 338 | |
Hao Lu | 8c2a383 | 2018-07-23 23:12:11 -0700 | [diff] [blame] | 339 | uint32_t num_clusters = 1; |
Hao Lu | 922070c | 2017-10-18 16:29:02 -0700 | [diff] [blame] | 340 | for (uint32_t i = 0; i < mach_topology.cores; i++) { |
| 341 | cores[i] = (struct cpuinfo_core) { |
| 342 | .processor_start = i * threads_per_core, |
| 343 | .processor_count = threads_per_core, |
| 344 | .core_id = i % cores_per_package, |
| 345 | .package = packages + i / cores_per_package, |
| 346 | .vendor = cpuinfo_vendor_apple, |
Marat Dukhan | 44dafc5 | 2018-12-26 20:19:15 -0800 | [diff] [blame] | 347 | .uarch = decode_uarch(cpu_family, cpu_subtype, i, mach_topology.cores), |
Hao Lu | 922070c | 2017-10-18 16:29:02 -0700 | [diff] [blame] | 348 | }; |
Hao Lu | 8c2a383 | 2018-07-23 23:12:11 -0700 | [diff] [blame] | 349 | if (i != 0 && cores[i].uarch != cores[i - 1].uarch) { |
| 350 | num_clusters++; |
| 351 | } |
Hao Lu | 922070c | 2017-10-18 16:29:02 -0700 | [diff] [blame] | 352 | } |
| 353 | for (uint32_t i = 0; i < mach_topology.threads; i++) { |
| 354 | const uint32_t smt_id = i % threads_per_core; |
| 355 | const uint32_t core_id = i / threads_per_core; |
| 356 | const uint32_t package_id = i / threads_per_package; |
| 357 | |
| 358 | processors[i].smt_id = smt_id; |
Marat Dukhan | 7fcd441 | 2017-11-30 09:46:49 -0800 | [diff] [blame] | 359 | processors[i].core = &cores[core_id]; |
| 360 | processors[i].package = &packages[package_id]; |
Hao Lu | 922070c | 2017-10-18 16:29:02 -0700 | [diff] [blame] | 361 | } |
| 362 | |
Hao Lu | 8c2a383 | 2018-07-23 23:12:11 -0700 | [diff] [blame] | 363 | clusters = calloc(num_clusters, sizeof(struct cpuinfo_cluster)); |
| 364 | if (clusters == NULL) { |
| 365 | cpuinfo_log_error( |
| 366 | "failed to allocate %zu bytes for descriptions of %"PRIu32" clusters", |
| 367 | num_clusters * sizeof(struct cpuinfo_cluster), num_clusters); |
| 368 | goto cleanup; |
| 369 | } |
| 370 | uint32_t cluster_idx = UINT32_MAX; |
| 371 | for (uint32_t i = 0; i < mach_topology.cores; i++) { |
| 372 | if (i == 0 || cores[i].uarch != cores[i - 1].uarch) { |
| 373 | cluster_idx++; |
| 374 | clusters[cluster_idx] = (struct cpuinfo_cluster) { |
| 375 | .processor_start = i * threads_per_core, |
| 376 | .processor_count = 1, |
| 377 | .core_start = i, |
| 378 | .core_count = 1, |
| 379 | .cluster_id = cluster_idx, |
| 380 | .package = cores[i].package, |
| 381 | .vendor = cores[i].vendor, |
| 382 | .uarch = cores[i].uarch, |
| 383 | }; |
| 384 | } else { |
| 385 | clusters[cluster_idx].processor_count++; |
| 386 | clusters[cluster_idx].core_count++; |
| 387 | } |
| 388 | cores[i].cluster = &clusters[cluster_idx]; |
| 389 | } |
| 390 | |
| 391 | for (uint32_t i = 0; i < mach_topology.threads; i++) { |
| 392 | const uint32_t core_id = i / threads_per_core; |
| 393 | processors[i].cluster = cores[core_id].cluster; |
| 394 | } |
| 395 | |
| 396 | for (uint32_t i = 0; i < mach_topology.packages; i++) { |
| 397 | packages[i].cluster_start = 0; |
| 398 | packages[i].cluster_count = num_clusters; |
| 399 | } |
| 400 | |
Hao Lu | 922070c | 2017-10-18 16:29:02 -0700 | [diff] [blame] | 401 | const uint32_t cacheline_size = get_sys_info(HW_CACHELINE, "HW_CACHELINE"); |
| 402 | const uint32_t l1d_cache_size = get_sys_info(HW_L1DCACHESIZE, "HW_L1DCACHESIZE"); |
| 403 | const uint32_t l1i_cache_size = get_sys_info(HW_L1ICACHESIZE, "HW_L1ICACHESIZE"); |
| 404 | const uint32_t l2_cache_size = get_sys_info(HW_L2CACHESIZE, "HW_L2CACHESIZE"); |
| 405 | const uint32_t l3_cache_size = get_sys_info(HW_L3CACHESIZE, "HW_L3CACHESIZE"); |
| 406 | const uint32_t l1_cache_associativity = 4; |
| 407 | const uint32_t l2_cache_associativity = 8; |
| 408 | const uint32_t l3_cache_associativity = 16; |
| 409 | const uint32_t cache_partitions = 1; |
| 410 | const uint32_t cache_flags = 0; |
| 411 | |
| 412 | uint32_t threads_per_l1 = 0, l1_count = 0; |
| 413 | if (l1i_cache_size != 0 || l1d_cache_size != 0) { |
Hao Lu | 3617d5b | 2017-10-23 15:16:50 -0700 | [diff] [blame] | 414 | /* Assume L1 caches are private to each core */ |
Hao Lu | 922070c | 2017-10-18 16:29:02 -0700 | [diff] [blame] | 415 | threads_per_l1 = 1; |
| 416 | l1_count = mach_topology.threads / threads_per_l1; |
| 417 | cpuinfo_log_debug("detected %"PRIu32" L1 caches", l1_count); |
| 418 | } |
| 419 | |
| 420 | uint32_t threads_per_l2 = 0, l2_count = 0; |
| 421 | if (l2_cache_size != 0) { |
Hao Lu | 3617d5b | 2017-10-23 15:16:50 -0700 | [diff] [blame] | 422 | /* Assume L2 cache is shared between all cores */ |
Hao Lu | 922070c | 2017-10-18 16:29:02 -0700 | [diff] [blame] | 423 | threads_per_l2 = mach_topology.cores; |
| 424 | l2_count = 1; |
| 425 | cpuinfo_log_debug("detected %"PRIu32" L2 caches", l2_count); |
| 426 | } |
Hao Lu | 8c2a383 | 2018-07-23 23:12:11 -0700 | [diff] [blame] | 427 | |
Hao Lu | 922070c | 2017-10-18 16:29:02 -0700 | [diff] [blame] | 428 | uint32_t threads_per_l3 = 0, l3_count = 0; |
| 429 | if (l3_cache_size != 0) { |
Hao Lu | 3617d5b | 2017-10-23 15:16:50 -0700 | [diff] [blame] | 430 | /* Assume L3 cache is shared between all cores */ |
Hao Lu | 922070c | 2017-10-18 16:29:02 -0700 | [diff] [blame] | 431 | threads_per_l3 = mach_topology.cores; |
| 432 | l3_count = 1; |
| 433 | cpuinfo_log_debug("detected %"PRIu32" L3 caches", l3_count); |
| 434 | } |
| 435 | |
| 436 | if (l1i_cache_size != 0) { |
| 437 | l1i = calloc(l1_count, sizeof(struct cpuinfo_cache)); |
| 438 | if (l1i == NULL) { |
| 439 | cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" L1I caches", |
| 440 | l1_count * sizeof(struct cpuinfo_cache), l1_count); |
| 441 | goto cleanup; |
| 442 | } |
| 443 | for (uint32_t c = 0; c < l1_count; c++) { |
| 444 | l1i[c] = (struct cpuinfo_cache) { |
| 445 | .size = l1i_cache_size, |
| 446 | .associativity = l1_cache_associativity, |
| 447 | .sets = l1i_cache_size / (l1_cache_associativity * cacheline_size), |
| 448 | .partitions = cache_partitions, |
| 449 | .line_size = cacheline_size, |
| 450 | .flags = cache_flags, |
| 451 | .processor_start = c * threads_per_l1, |
| 452 | .processor_count = threads_per_l1, |
| 453 | }; |
| 454 | } |
| 455 | for (uint32_t t = 0; t < mach_topology.threads; t++) { |
| 456 | processors[t].cache.l1i = &l1i[t / threads_per_l1]; |
| 457 | } |
| 458 | } |
| 459 | |
| 460 | if (l1d_cache_size != 0) { |
| 461 | l1d = calloc(l1_count, sizeof(struct cpuinfo_cache)); |
| 462 | if (l1d == NULL) { |
| 463 | cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" L1D caches", |
| 464 | l1_count * sizeof(struct cpuinfo_cache), l1_count); |
| 465 | goto cleanup; |
| 466 | } |
| 467 | for (uint32_t c = 0; c < l1_count; c++) { |
| 468 | l1d[c] = (struct cpuinfo_cache) { |
| 469 | .size = l1d_cache_size, |
| 470 | .associativity = l1_cache_associativity, |
| 471 | .sets = l1d_cache_size / (l1_cache_associativity * cacheline_size), |
| 472 | .partitions = cache_partitions, |
| 473 | .line_size = cacheline_size, |
| 474 | .flags = cache_flags, |
| 475 | .processor_start = c * threads_per_l1, |
| 476 | .processor_count = threads_per_l1, |
| 477 | }; |
| 478 | } |
| 479 | for (uint32_t t = 0; t < mach_topology.threads; t++) { |
| 480 | processors[t].cache.l1d = &l1d[t / threads_per_l1]; |
| 481 | } |
| 482 | } |
| 483 | |
| 484 | if (l2_count != 0) { |
| 485 | l2 = calloc(l2_count, sizeof(struct cpuinfo_cache)); |
| 486 | if (l2 == NULL) { |
| 487 | cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" L2 caches", |
| 488 | l2_count * sizeof(struct cpuinfo_cache), l2_count); |
| 489 | goto cleanup; |
| 490 | } |
| 491 | for (uint32_t c = 0; c < l2_count; c++) { |
| 492 | l2[c] = (struct cpuinfo_cache) { |
| 493 | .size = l2_cache_size, |
| 494 | .associativity = l2_cache_associativity, |
| 495 | .sets = l2_cache_size / (l2_cache_associativity * cacheline_size), |
| 496 | .partitions = cache_partitions, |
| 497 | .line_size = cacheline_size, |
| 498 | .flags = cache_flags, |
| 499 | .processor_start = c * threads_per_l2, |
| 500 | .processor_count = threads_per_l2, |
| 501 | }; |
| 502 | } |
| 503 | for (uint32_t t = 0; t < mach_topology.threads; t++) { |
| 504 | processors[t].cache.l2 = &l2[0]; |
| 505 | } |
| 506 | } |
Hao Lu | 8c2a383 | 2018-07-23 23:12:11 -0700 | [diff] [blame] | 507 | |
Hao Lu | 922070c | 2017-10-18 16:29:02 -0700 | [diff] [blame] | 508 | if (l3_count != 0) { |
| 509 | l3 = calloc(l3_count, sizeof(struct cpuinfo_cache)); |
| 510 | if (l3 == NULL) { |
| 511 | cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" L3 caches", |
| 512 | l3_count * sizeof(struct cpuinfo_cache), l3_count); |
| 513 | goto cleanup; |
| 514 | } |
| 515 | for (uint32_t c = 0; c < l3_count; c++) { |
| 516 | l3[c] = (struct cpuinfo_cache) { |
| 517 | .size = l3_cache_size, |
| 518 | .associativity = l3_cache_associativity, |
| 519 | .sets = l3_cache_size / (l3_cache_associativity * cacheline_size), |
| 520 | .partitions = cache_partitions, |
| 521 | .line_size = cacheline_size, |
| 522 | .flags = cache_flags, |
| 523 | .processor_start = c * threads_per_l3, |
| 524 | .processor_count = threads_per_l3, |
| 525 | }; |
| 526 | } |
| 527 | for (uint32_t t = 0; t < mach_topology.threads; t++) { |
| 528 | processors[t].cache.l3 = &l3[0]; |
| 529 | } |
| 530 | } |
| 531 | |
| 532 | /* Commit changes */ |
| 533 | cpuinfo_cache[cpuinfo_cache_level_1i] = l1i; |
| 534 | cpuinfo_cache[cpuinfo_cache_level_1d] = l1d; |
| 535 | cpuinfo_cache[cpuinfo_cache_level_2] = l2; |
| 536 | cpuinfo_cache[cpuinfo_cache_level_3] = l3; |
| 537 | |
| 538 | cpuinfo_processors = processors; |
| 539 | cpuinfo_cores = cores; |
Hao Lu | 8c2a383 | 2018-07-23 23:12:11 -0700 | [diff] [blame] | 540 | cpuinfo_clusters = clusters; |
Hao Lu | 922070c | 2017-10-18 16:29:02 -0700 | [diff] [blame] | 541 | cpuinfo_packages = packages; |
| 542 | |
| 543 | cpuinfo_cache_count[cpuinfo_cache_level_1i] = l1_count; |
| 544 | cpuinfo_cache_count[cpuinfo_cache_level_1d] = l1_count; |
| 545 | cpuinfo_cache_count[cpuinfo_cache_level_2] = l2_count; |
| 546 | cpuinfo_cache_count[cpuinfo_cache_level_3] = l3_count; |
| 547 | |
| 548 | cpuinfo_processors_count = mach_topology.threads; |
| 549 | cpuinfo_cores_count = mach_topology.cores; |
Hao Lu | 8c2a383 | 2018-07-23 23:12:11 -0700 | [diff] [blame] | 550 | cpuinfo_clusters_count = num_clusters; |
Hao Lu | 922070c | 2017-10-18 16:29:02 -0700 | [diff] [blame] | 551 | cpuinfo_packages_count = mach_topology.packages; |
| 552 | |
Marat Dukhan | cf70aee | 2018-03-24 23:21:02 -0700 | [diff] [blame] | 553 | __sync_synchronize(); |
| 554 | |
| 555 | cpuinfo_is_initialized = true; |
| 556 | |
Hao Lu | 922070c | 2017-10-18 16:29:02 -0700 | [diff] [blame] | 557 | processors = NULL; |
| 558 | cores = NULL; |
Hao Lu | 8c2a383 | 2018-07-23 23:12:11 -0700 | [diff] [blame] | 559 | clusters = NULL; |
Hao Lu | 922070c | 2017-10-18 16:29:02 -0700 | [diff] [blame] | 560 | packages = NULL; |
| 561 | l1i = l1d = l2 = l3 = NULL; |
| 562 | |
| 563 | cleanup: |
| 564 | free(processors); |
| 565 | free(cores); |
Hao Lu | 8c2a383 | 2018-07-23 23:12:11 -0700 | [diff] [blame] | 566 | free(clusters); |
Hao Lu | 922070c | 2017-10-18 16:29:02 -0700 | [diff] [blame] | 567 | free(packages); |
| 568 | free(l1i); |
| 569 | free(l1d); |
| 570 | free(l2); |
| 571 | free(l3); |
| 572 | } |