[PATCH] x86_64: Support more than 8 cores on AMD systems

Use physical mode instead of logical mode to address more CPUs.  This is also
used in the CPU hotplug case to avoid a race.

Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
diff --git a/arch/x86_64/kernel/genapic.c b/arch/x86_64/kernel/genapic.c
index 69b9c25..30c843a 100644
--- a/arch/x86_64/kernel/genapic.c
+++ b/arch/x86_64/kernel/genapic.c
@@ -31,6 +31,7 @@
 
 extern struct genapic apic_cluster;
 extern struct genapic apic_flat;
+extern struct genapic apic_physflat;
 
 struct genapic *genapic = &apic_flat;
 
@@ -44,12 +45,7 @@
 	u8 clusters, max_cluster;
 	u8 id;
 	u8 cluster_cnt[NUM_APIC_CLUSTERS];
-
-	if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
-		/* AMD always uses flat mode right now */
-		genapic = &apic_flat;
-		goto print;
-	}
+	int num_cpus = 0;
 
 #if defined(CONFIG_ACPI_BUS)
 	/*
@@ -64,15 +60,34 @@
 #endif
 
 	memset(cluster_cnt, 0, sizeof(cluster_cnt));
-
 	for (i = 0; i < NR_CPUS; i++) {
 		id = bios_cpu_apicid[i];
-		if (id != BAD_APICID)
-			cluster_cnt[APIC_CLUSTERID(id)]++;
+		if (id == BAD_APICID)
+			continue;
+		num_cpus++;
+		cluster_cnt[APIC_CLUSTERID(id)]++;
 	}
 
+	/* Don't use clustered mode on AMD platforms. */
+ 	if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
+		genapic = &apic_physflat;
+#ifndef CONFIG_CPU_HOTPLUG
+		/* In the CPU hotplug case we cannot use broadcast mode
+		   because that opens a race when a CPU is removed.
+		   Stay at physflat mode in this case.
+		   It is bad to do this unconditionally though. Once
+		   we have ACPI platform support for CPU hotplug
+		   we should detect hotplug capablity from ACPI tables and
+		   only do this when really needed. -AK */
+		if (num_cpus <= 8)
+			genapic = &apic_flat;
+#endif
+ 		goto print;
+ 	}
+
 	clusters = 0;
 	max_cluster = 0;
+
 	for (i = 0; i < NUM_APIC_CLUSTERS; i++) {
 		if (cluster_cnt[i] > 0) {
 			++clusters;
diff --git a/arch/x86_64/kernel/genapic_flat.c b/arch/x86_64/kernel/genapic_flat.c
index fdfa15f..adc9628 100644
--- a/arch/x86_64/kernel/genapic_flat.c
+++ b/arch/x86_64/kernel/genapic_flat.c
@@ -2,7 +2,7 @@
  * Copyright 2004 James Cleverdon, IBM.
  * Subject to the GNU Public License, v.2
  *
- * Flat APIC subarch code.  Maximum 8 CPUs, logical delivery.
+ * Flat APIC subarch code.
  *
  * Hacked for x86-64 by James Cleverdon from i386 architecture code by
  * Martin Bligh, Andi Kleen, James Bottomley, John Stultz, and
@@ -119,3 +119,63 @@
 	.cpu_mask_to_apicid = flat_cpu_mask_to_apicid,
 	.phys_pkg_id = phys_pkg_id,
 };
+
+/*
+ * Physflat mode is used when there are more than 8 CPUs on a AMD system.
+ * We cannot use logical delivery in this case because the mask
+ * overflows, so use physical mode.
+ */
+
+static cpumask_t physflat_target_cpus(void)
+{
+	return cpumask_of_cpu(0);
+}
+
+static void physflat_send_IPI_mask(cpumask_t cpumask, int vector)
+{
+	send_IPI_mask_sequence(cpumask, vector);
+}
+
+static void physflat_send_IPI_allbutself(int vector)
+{
+	cpumask_t allbutme = cpu_online_map;
+	int me = get_cpu();
+	cpu_clear(me, allbutme);
+	physflat_send_IPI_mask(allbutme, vector);
+	put_cpu();
+}
+
+static void physflat_send_IPI_all(int vector)
+{
+	physflat_send_IPI_mask(cpu_online_map, vector);
+}
+
+static unsigned int physflat_cpu_mask_to_apicid(cpumask_t cpumask)
+{
+	int cpu;
+
+	/*
+	 * We're using fixed IRQ delivery, can only return one phys APIC ID.
+	 * May as well be the first.
+	 */
+	cpu = first_cpu(cpumask);
+	if ((unsigned)cpu < NR_CPUS)
+		return x86_cpu_to_apicid[cpu];
+	else
+		return BAD_APICID;
+}
+
+struct genapic apic_physflat =  {
+	.name = "physical flat",
+	.int_delivery_mode = dest_LowestPrio,
+	.int_dest_mode = (APIC_DEST_PHYSICAL != 0),
+	.int_delivery_dest = APIC_DEST_PHYSICAL | APIC_DM_LOWEST,
+	.target_cpus = physflat_target_cpus,
+	.apic_id_registered = flat_apic_id_registered,
+	.init_apic_ldr = flat_init_apic_ldr,/*not needed, but shouldn't hurt*/
+	.send_IPI_all = physflat_send_IPI_all,
+	.send_IPI_allbutself = physflat_send_IPI_allbutself,
+	.send_IPI_mask = physflat_send_IPI_mask,
+	.cpu_mask_to_apicid = physflat_cpu_mask_to_apicid,
+	.phys_pkg_id = phys_pkg_id,
+};