x86: make lazy %gs optional on x86_32

Impact: pt_regs changed, lazy gs handling made optional, add slight
        overhead to SAVE_ALL, simplifies error_code path a bit

On x86_32, %gs hasn't been used by kernel and handled lazily.  pt_regs
doesn't have place for it and gs is saved/loaded only when necessary.
In preparation for stack protector support, this patch makes lazy %gs
handling optional by doing the followings.

* Add CONFIG_X86_32_LAZY_GS and place for gs in pt_regs.

* Save and restore %gs along with other registers in entry_32.S unless
  LAZY_GS.  Note that this unfortunately adds "pushl $0" on SAVE_ALL
  even when LAZY_GS.  However, it adds no overhead to common exit path
  and simplifies entry path with error code.

* Define different user_gs accessors depending on LAZY_GS and add
  lazy_save_gs() and lazy_load_gs() which are noop if !LAZY_GS.  The
  lazy_*_gs() ops are used to save, load and clear %gs lazily.

* Define ELF_CORE_COPY_KERNEL_REGS() which always read %gs directly.

xen and lguest changes need to be verified.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Jeremy Fitzhardinge <jeremy@xensource.com>
Cc: Rusty Russell <rusty@rustcorp.com.au>

Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h
index 39b0aac..83c1bc8 100644
--- a/arch/x86/include/asm/elf.h
+++ b/arch/x86/include/asm/elf.h
@@ -112,7 +112,7 @@
  * now struct_user_regs, they are different)
  */
 
-#define ELF_CORE_COPY_REGS(pr_reg, regs)	\
+#define ELF_CORE_COPY_REGS_COMMON(pr_reg, regs)	\
 do {						\
 	pr_reg[0] = regs->bx;			\
 	pr_reg[1] = regs->cx;			\
@@ -124,7 +124,6 @@
 	pr_reg[7] = regs->ds & 0xffff;		\
 	pr_reg[8] = regs->es & 0xffff;		\
 	pr_reg[9] = regs->fs & 0xffff;		\
-	pr_reg[10] = get_user_gs(regs);		\
 	pr_reg[11] = regs->orig_ax;		\
 	pr_reg[12] = regs->ip;			\
 	pr_reg[13] = regs->cs & 0xffff;		\
@@ -133,6 +132,18 @@
 	pr_reg[16] = regs->ss & 0xffff;		\
 } while (0);
 
+#define ELF_CORE_COPY_REGS(pr_reg, regs)	\
+do {						\
+	ELF_CORE_COPY_REGS_COMMON(pr_reg, regs);\
+	pr_reg[10] = get_user_gs(regs);		\
+} while (0);
+
+#define ELF_CORE_COPY_KERNEL_REGS(pr_reg, regs)	\
+do {						\
+	ELF_CORE_COPY_REGS_COMMON(pr_reg, regs);\
+	savesegment(gs, pr_reg[10]);		\
+} while (0);
+
 #define ELF_PLATFORM	(utsname()->machine)
 #define set_personality_64bit()	do { } while (0)
 
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
index 4955165..f923203 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -79,7 +79,7 @@
 #ifdef CONFIG_X86_32
 #define deactivate_mm(tsk, mm)			\
 do {						\
-	set_user_gs(task_pt_regs(tsk), 0);	\
+	lazy_load_gs(0);			\
 } while (0)
 #else
 #define deactivate_mm(tsk, mm)			\
diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h
index 6d34d95..e304b66 100644
--- a/arch/x86/include/asm/ptrace.h
+++ b/arch/x86/include/asm/ptrace.h
@@ -28,7 +28,7 @@
 	int  xds;
 	int  xes;
 	int  xfs;
-	/* int  gs; */
+	int  xgs;
 	long orig_eax;
 	long eip;
 	int  xcs;
@@ -50,7 +50,7 @@
 	unsigned long ds;
 	unsigned long es;
 	unsigned long fs;
-	/* int  gs; */
+	unsigned long gs;
 	unsigned long orig_ax;
 	unsigned long ip;
 	unsigned long cs;
diff --git a/arch/x86/include/asm/system.h b/arch/x86/include/asm/system.h
index 70c74b8..79b98e5 100644
--- a/arch/x86/include/asm/system.h
+++ b/arch/x86/include/asm/system.h
@@ -186,10 +186,20 @@
  * x86_32 user gs accessors.
  */
 #ifdef CONFIG_X86_32
+#ifdef CONFIG_X86_32_LAZY_GS
 #define get_user_gs(regs)	(u16)({unsigned long v; savesegment(gs, v); v;})
 #define set_user_gs(regs, v)	loadsegment(gs, (unsigned long)(v))
 #define task_user_gs(tsk)	((tsk)->thread.gs)
-#endif
+#define lazy_save_gs(v)		savesegment(gs, (v))
+#define lazy_load_gs(v)		loadsegment(gs, (v))
+#else	/* X86_32_LAZY_GS */
+#define get_user_gs(regs)	(u16)((regs)->gs)
+#define set_user_gs(regs, v)	do { (regs)->gs = (v); } while (0)
+#define task_user_gs(tsk)	(task_pt_regs(tsk)->gs)
+#define lazy_save_gs(v)		do { } while (0)
+#define lazy_load_gs(v)		do { } while (0)
+#endif	/* X86_32_LAZY_GS */
+#endif	/* X86_32 */
 
 static inline unsigned long get_limit(unsigned long segment)
 {