[PATCH] i386: Convert i386 PDA code to use %fs

Convert the PDA code to use %fs rather than %gs as the segment for
per-processor data.  This is because some processors show a small but
measurable performance gain for reloading a NULL segment selector (as %fs
generally is in user-space) versus a non-NULL one (as %gs generally is).

On modern processors the difference is very small, perhaps undetectable.
Some old AMD "K6 3D+" processors are noticably slower when %fs is used
rather than %gs; I have no idea why this might be, but I think they're
sufficiently rare that it doesn't matter much.

This patch also fixes the math emulator, which had not been adjusted to
match the changed struct pt_regs.

[frederik.deweerdt@gmail.com: fixit with gdb]
[mingo@elte.hu: Fix KVM too]

Signed-off-by: Jeremy Fitzhardinge <jeremy@xensource.com>
Signed-off-by: Andi Kleen <ak@suse.de>
Cc: Ian Campbell <Ian.Campbell@XenSource.com>
Acked-by: Ingo Molnar <mingo@elte.hu>
Acked-by: Zachary Amsden <zach@vmware.com>
Cc: Eric Dumazet <dada1@cosmosbay.com>
Signed-off-by: Frederik Deweerdt <frederik.deweerdt@gmail.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
diff --git a/include/asm-i386/elf.h b/include/asm-i386/elf.h
index 369035d..8d33c9b 100644
--- a/include/asm-i386/elf.h
+++ b/include/asm-i386/elf.h
@@ -90,8 +90,8 @@
 	pr_reg[6] = regs->eax;				\
 	pr_reg[7] = regs->xds;				\
 	pr_reg[8] = regs->xes;				\
-	savesegment(fs,pr_reg[9]);			\
-	pr_reg[10] = regs->xgs;				\
+	pr_reg[9] = regs->xfs;				\
+	savesegment(gs,pr_reg[10]);			\
 	pr_reg[11] = regs->orig_eax;			\
 	pr_reg[12] = regs->eip;				\
 	pr_reg[13] = regs->xcs;				\
diff --git a/include/asm-i386/mmu_context.h b/include/asm-i386/mmu_context.h
index 68ff102..e6aa30f 100644
--- a/include/asm-i386/mmu_context.h
+++ b/include/asm-i386/mmu_context.h
@@ -63,7 +63,7 @@
 }
 
 #define deactivate_mm(tsk, mm)			\
-	asm("movl %0,%%fs": :"r" (0));
+	asm("movl %0,%%gs": :"r" (0));
 
 #define activate_mm(prev, next) \
 	switch_mm((prev),(next),NULL)
diff --git a/include/asm-i386/pda.h b/include/asm-i386/pda.h
index 2ba2736..b12d59a 100644
--- a/include/asm-i386/pda.h
+++ b/include/asm-i386/pda.h
@@ -39,19 +39,19 @@
 		if (0) { T__ tmp__; tmp__ = (val); }			\
 		switch (sizeof(_proxy_pda.field)) {			\
 		case 1:							\
-			asm(op "b %1,%%gs:%c2"				\
+			asm(op "b %1,%%fs:%c2"				\
 			    : "+m" (_proxy_pda.field)			\
 			    :"ri" ((T__)val),				\
 			     "i"(pda_offset(field)));			\
 			break;						\
 		case 2:							\
-			asm(op "w %1,%%gs:%c2"				\
+			asm(op "w %1,%%fs:%c2"				\
 			    : "+m" (_proxy_pda.field)			\
 			    :"ri" ((T__)val),				\
 			     "i"(pda_offset(field)));			\
 			break;						\
 		case 4:							\
-			asm(op "l %1,%%gs:%c2"				\
+			asm(op "l %1,%%fs:%c2"				\
 			    : "+m" (_proxy_pda.field)			\
 			    :"ri" ((T__)val),				\
 			     "i"(pda_offset(field)));			\
@@ -65,19 +65,19 @@
 		typeof(_proxy_pda.field) ret__;				\
 		switch (sizeof(_proxy_pda.field)) {			\
 		case 1:							\
-			asm(op "b %%gs:%c1,%0"				\
+			asm(op "b %%fs:%c1,%0"				\
 			    : "=r" (ret__)				\
 			    : "i" (pda_offset(field)),			\
 			      "m" (_proxy_pda.field));			\
 			break;						\
 		case 2:							\
-			asm(op "w %%gs:%c1,%0"				\
+			asm(op "w %%fs:%c1,%0"				\
 			    : "=r" (ret__)				\
 			    : "i" (pda_offset(field)),			\
 			      "m" (_proxy_pda.field));			\
 			break;						\
 		case 4:							\
-			asm(op "l %%gs:%c1,%0"				\
+			asm(op "l %%fs:%c1,%0"				\
 			    : "=r" (ret__)				\
 			    : "i" (pda_offset(field)),			\
 			      "m" (_proxy_pda.field));			\
diff --git a/include/asm-i386/processor.h b/include/asm-i386/processor.h
index 359f10b..11bf899 100644
--- a/include/asm-i386/processor.h
+++ b/include/asm-i386/processor.h
@@ -424,7 +424,7 @@
 	.vm86_info = NULL,						\
 	.sysenter_cs = __KERNEL_CS,					\
 	.io_bitmap_ptr = NULL,						\
-	.gs = __KERNEL_PDA,						\
+	.fs = __KERNEL_PDA,						\
 }
 
 /*
@@ -442,8 +442,8 @@
 }
 
 #define start_thread(regs, new_eip, new_esp) do {		\
-	__asm__("movl %0,%%fs": :"r" (0));			\
-	regs->xgs = 0;						\
+	__asm__("movl %0,%%gs": :"r" (0));			\
+	regs->xfs = 0;						\
 	set_fs(USER_DS);					\
 	regs->xds = __USER_DS;					\
 	regs->xes = __USER_DS;					\
diff --git a/include/asm-i386/ptrace.h b/include/asm-i386/ptrace.h
index bdbc894..1646996 100644
--- a/include/asm-i386/ptrace.h
+++ b/include/asm-i386/ptrace.h
@@ -16,8 +16,8 @@
 	long eax;
 	int  xds;
 	int  xes;
-	/* int  xfs; */
-	int  xgs;
+	int  xfs;
+	/* int  xgs; */
 	long orig_eax;
 	long eip;
 	int  xcs;