xen: SMP guest support This is a fairly straightforward Xen implementation of smp_ops. Xen has its own IPI mechanisms, and has no dependency on any APIC-based IPI. The smp_ops hooks and the flush_tlb_others pv_op allow a Xen guest to avoid all APIC code in arch/i386 (the only apic operation is a single apic_read for the apic version number). One subtle point which needs to be addressed is unpinning pagetables when another cpu may have a lazy tlb reference to the pagetable. Xen will not allow an in-use pagetable to be unpinned, so we must find any other cpus with a reference to the pagetable and get them to shoot down their references. Signed-off-by: Jeremy Fitzhardinge <jeremy@xensource.com> Signed-off-by: Chris Wright <chrisw@sous-sol.org> Cc: Benjamin LaHaise <bcrl@kvack.org> Cc: Ingo Molnar <mingo@redhat.com> Cc: Andi Kleen <ak@suse.de>

commit: f87e4cac4f4e940b328d3deb5b53e642e3881f43 [log] [tgz]
author: Jeremy Fitzhardinge <jeremy@xensource.com> Tue Jul 17 18:37:06 2007 -0700
committer: Jeremy Fitzhardinge <jeremy@goop.org> Wed Jul 18 08:47:44 2007 -0700
tree: 7409f86561e5f97459378abd2ae21e9a5c82bfea
parent: ab55028886dd1dd54585f22bf19a00eb23869340 [diff] [blame]
diff --git a/arch/i386/xen/mmu.c b/arch/i386/xen/mmu.c
index 53501ce..bc49ef8 100644
--- a/arch/i386/xen/mmu.c
+++ b/arch/i386/xen/mmu.c

@@ -391,8 +391,12 @@
 
 	xen_mc_batch();
 
-	if (pgd_walk(pgd, pin_page, TASK_SIZE))
+	if (pgd_walk(pgd, pin_page, TASK_SIZE)) {
+		/* re-enable interrupts for kmap_flush_unused */
+		xen_mc_issue(0);
 		kmap_flush_unused();
+		xen_mc_batch();
+	}
 
 	mcs = __xen_mc_entry(sizeof(*op));
 	op = mcs.args;
@@ -474,27 +478,58 @@
 	spin_unlock(&mm->page_table_lock);
 }
 
-void xen_exit_mmap(struct mm_struct *mm)
+
+#ifdef CONFIG_SMP
+/* Another cpu may still have their %cr3 pointing at the pagetable, so
+   we need to repoint it somewhere else before we can unpin it. */
+static void drop_other_mm_ref(void *info)
 {
-	struct task_struct *tsk = current;
+	struct mm_struct *mm = info;
 
-	task_lock(tsk);
+	if (__get_cpu_var(cpu_tlbstate).active_mm == mm)
+		leave_mm(smp_processor_id());
+}
 
-	/*
-	 * We aggressively remove defunct pgd from cr3. We execute unmap_vmas()
-	 * *much* faster this way, as no tlb flushes means bigger wrpt batches.
-	 */
-	if (tsk->active_mm == mm) {
-		tsk->active_mm = &init_mm;
-		atomic_inc(&init_mm.mm_count);
-
-		switch_mm(mm, &init_mm, tsk);
-
-		atomic_dec(&mm->mm_count);
-		BUG_ON(atomic_read(&mm->mm_count) == 0);
+static void drop_mm_ref(struct mm_struct *mm)
+{
+	if (current->active_mm == mm) {
+		if (current->mm == mm)
+			load_cr3(swapper_pg_dir);
+		else
+			leave_mm(smp_processor_id());
 	}
 
-	task_unlock(tsk);
+	if (!cpus_empty(mm->cpu_vm_mask))
+		xen_smp_call_function_mask(mm->cpu_vm_mask, drop_other_mm_ref,
+					   mm, 1);
+}
+#else
+static void drop_mm_ref(struct mm_struct *mm)
+{
+	if (current->active_mm == mm)
+		load_cr3(swapper_pg_dir);
+}
+#endif
+
+/*
+ * While a process runs, Xen pins its pagetables, which means that the
+ * hypervisor forces it to be read-only, and it controls all updates
+ * to it.  This means that all pagetable updates have to go via the
+ * hypervisor, which is moderately expensive.
+ *
+ * Since we're pulling the pagetable down, we switch to use init_mm,
+ * unpin old process pagetable and mark it all read-write, which
+ * allows further operations on it to be simple memory accesses.
+ *
+ * The only subtle point is that another CPU may be still using the
+ * pagetable because of lazy tlb flushing.  This means we need need to
+ * switch all CPUs off this pagetable before we can unpin it.
+ */
+void xen_exit_mmap(struct mm_struct *mm)
+{
+	get_cpu();		/* make sure we don't move around */
+	drop_mm_ref(mm);
+	put_cpu();
 
 	xen_pgd_unpin(mm->pgd);
 }
commit	f87e4cac4f4e940b328d3deb5b53e642e3881f43	[log] [tgz]
author	Jeremy Fitzhardinge <jeremy@xensource.com>	Tue Jul 17 18:37:06 2007 -0700
committer	Jeremy Fitzhardinge <jeremy@goop.org>	Wed Jul 18 08:47:44 2007 -0700
tree	7409f86561e5f97459378abd2ae21e9a5c82bfea
parent	ab55028886dd1dd54585f22bf19a00eb23869340 [diff] [blame]