Add cluster support on iOS

commit: 8c2a383b33436428d45f76bd0f4d0fefe44141ff [log] [tgz]
author: Hao Lu <hlu@fb.com> Mon Jul 23 23:12:11 2018 -0700
committer: Marat Dukhan <marat@fb.com> Mon Jul 23 23:12:11 2018 -0700
tree: 7d5cca87823ae1f2b253002cd5e019b752027c8a
parent: 8c621ce3f46e51ac1d1a4d878b9ffc2b5dcac0e3 [diff]
diff --git a/src/arm/mach/init.c b/src/arm/mach/init.c
index beb0294..41beabb 100644
--- a/src/arm/mach/init.c
+++ b/src/arm/mach/init.c

@@ -121,14 +121,14 @@
 		return;
 	}
 	cpuinfo_log_debug("hw.machine: %s", machine_name);
-	
+
 	char name[10];
 	uint32_t major = 0, minor = 0;
 	if (sscanf(machine_name, "%9[^,0123456789]%"SCNu32",%"SCNu32, name, &major, &minor) != 3) {
 		cpuinfo_log_warning("parsing \"hw.machine\" failed: %s", strerror(errno));
 		return;
 	}
-	
+
 	uint32_t chip_model = 0;
 	char suffix = '\0';
 	if (strcmp(name, "iPhone") == 0) {
@@ -231,6 +231,7 @@
 void cpuinfo_arm_mach_init(void) {
 	struct cpuinfo_processor* processors = NULL;
 	struct cpuinfo_core* cores = NULL;
+	struct cpuinfo_cluster* clusters = NULL;
 	struct cpuinfo_package* packages = NULL;
 	struct cpuinfo_cache* l1i = NULL;
 	struct cpuinfo_cache* l1d = NULL;
@@ -260,7 +261,7 @@
 	const uint32_t threads_per_core = mach_topology.threads / mach_topology.cores;
 	const uint32_t threads_per_package = mach_topology.threads / mach_topology.packages;
 	const uint32_t cores_per_package = mach_topology.cores / mach_topology.packages;
-	
+
 	for (uint32_t i = 0; i < mach_topology.packages; i++) {
 		packages[i] = (struct cpuinfo_package) {
 			.processor_start = i * threads_per_package,
@@ -311,6 +312,7 @@
 #endif
 	}
 
+	uint32_t num_clusters = 1;
 	for (uint32_t i = 0; i < mach_topology.cores; i++) {
 		cores[i] = (struct cpuinfo_core) {
 			.processor_start = i * threads_per_core,
@@ -320,6 +322,9 @@
 			.vendor = cpuinfo_vendor_apple,
 			.uarch = decode_uarch(cpu_family, cpu_subtype, i),
 		};
+		if (i != 0 && cores[i].uarch != cores[i - 1].uarch) {
+			num_clusters++;
+		}
 	}
 	for (uint32_t i = 0; i < mach_topology.threads; i++) {
 		const uint32_t smt_id = i % threads_per_core;
@@ -331,6 +336,44 @@
 		processors[i].package = &packages[package_id];
 	}
 
+	clusters = calloc(num_clusters, sizeof(struct cpuinfo_cluster));
+	if (clusters == NULL) {
+		cpuinfo_log_error(
+			"failed to allocate %zu bytes for descriptions of %"PRIu32" clusters",
+			num_clusters * sizeof(struct cpuinfo_cluster), num_clusters);
+		goto cleanup;
+	}
+	uint32_t cluster_idx = UINT32_MAX;
+	for (uint32_t i = 0; i < mach_topology.cores; i++) {
+		if (i == 0 || cores[i].uarch != cores[i - 1].uarch) {
+			cluster_idx++;
+			clusters[cluster_idx] = (struct cpuinfo_cluster) {
+				.processor_start = i * threads_per_core,
+				.processor_count = 1,
+				.core_start = i,
+				.core_count = 1,
+				.cluster_id = cluster_idx,
+				.package = cores[i].package,
+				.vendor = cores[i].vendor,
+				.uarch = cores[i].uarch,
+			};
+		} else {
+			clusters[cluster_idx].processor_count++;
+			clusters[cluster_idx].core_count++;
+		}
+		cores[i].cluster = &clusters[cluster_idx];
+	}
+
+	for (uint32_t i = 0; i < mach_topology.threads; i++) {
+		const uint32_t core_id = i / threads_per_core;
+		processors[i].cluster = cores[core_id].cluster;
+	}
+
+	for (uint32_t i = 0; i < mach_topology.packages; i++) {
+		packages[i].cluster_start = 0;
+		packages[i].cluster_count = num_clusters;
+	}
+
 	const uint32_t cacheline_size = get_sys_info(HW_CACHELINE, "HW_CACHELINE");
 	const uint32_t l1d_cache_size = get_sys_info(HW_L1DCACHESIZE, "HW_L1DCACHESIZE");
 	const uint32_t l1i_cache_size = get_sys_info(HW_L1ICACHESIZE, "HW_L1ICACHESIZE");
@@ -357,7 +400,7 @@
 		l2_count = 1;
 		cpuinfo_log_debug("detected %"PRIu32" L2 caches", l2_count);
 	}
-	
+
 	uint32_t threads_per_l3 = 0, l3_count = 0;
 	if (l3_cache_size != 0) {
 		/* Assume L3 cache is shared between all cores */
@@ -437,7 +480,7 @@
 			processors[t].cache.l2 = &l2[0];
 		}
 	}
-	
+
 	if (l3_count != 0) {
 		l3 = calloc(l3_count, sizeof(struct cpuinfo_cache));
 		if (l3 == NULL) {
@@ -470,6 +513,7 @@
 
 	cpuinfo_processors = processors;
 	cpuinfo_cores = cores;
+	cpuinfo_clusters = clusters;
 	cpuinfo_packages = packages;
 
 	cpuinfo_cache_count[cpuinfo_cache_level_1i] = l1_count;
@@ -479,6 +523,7 @@
 
 	cpuinfo_processors_count = mach_topology.threads;
 	cpuinfo_cores_count = mach_topology.cores;
+	cpuinfo_clusters_count = num_clusters;
 	cpuinfo_packages_count = mach_topology.packages;
 
 	__sync_synchronize();
@@ -487,12 +532,14 @@
 
 	processors = NULL;
 	cores = NULL;
+	clusters = NULL;
 	packages = NULL;
 	l1i = l1d = l2 = l3 = NULL;
 
 cleanup:
 	free(processors);
 	free(cores);
+	free(clusters);
 	free(packages);
 	free(l1i);
 	free(l1d);
commit	8c2a383b33436428d45f76bd0f4d0fefe44141ff	[log] [tgz]
author	Hao Lu <hlu@fb.com>	Mon Jul 23 23:12:11 2018 -0700
committer	Marat Dukhan <marat@fb.com>	Mon Jul 23 23:12:11 2018 -0700
tree	7d5cca87823ae1f2b253002cd5e019b752027c8a
parent	8c621ce3f46e51ac1d1a4d878b9ffc2b5dcac0e3 [diff]