x86, fpu: split FPU state from task struct - v5

Split the FPU save area from the task struct. This allows easy migration
of FPU context, and it's generally cleaner. It also allows the following
two optimizations:

1) only allocate when the application actually uses FPU, so in the first
lazy FPU trap. This could save memory for non-fpu using apps. Next patch
does this lazy allocation.

2) allocate the right size for the actual cpu rather than 512 bytes always.
Patches enabling xsave/xrstor support (coming shortly) will take advantage
of this.

Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
diff --git a/include/asm-x86/i387.h b/include/asm-x86/i387.h
index 54522b8..382a5fa 100644
--- a/include/asm-x86/i387.h
+++ b/include/asm-x86/i387.h
@@ -23,6 +23,7 @@
 extern void mxcsr_feature_mask_init(void);
 extern void init_fpu(struct task_struct *child);
 extern asmlinkage void math_state_restore(void);
+extern void init_thread_xstate(void);
 
 extern user_regset_active_fn fpregs_active, xfpregs_active;
 extern user_regset_get_fn fpregs_get, xfpregs_get, fpregs_soft_get;
@@ -117,24 +118,22 @@
 	/* Using "fxsaveq %0" would be the ideal choice, but is only supported
 	   starting with gas 2.16. */
 	__asm__ __volatile__("fxsaveq %0"
-			     : "=m" (tsk->thread.i387.fxsave));
+			     : "=m" (tsk->thread.xstate->fxsave));
 #elif 0
 	/* Using, as a workaround, the properly prefixed form below isn't
 	   accepted by any binutils version so far released, complaining that
 	   the same type of prefix is used twice if an extended register is
 	   needed for addressing (fix submitted to mainline 2005-11-21). */
 	__asm__ __volatile__("rex64/fxsave %0"
-			     : "=m" (tsk->thread.i387.fxsave));
+			     : "=m" (tsk->thread.xstate->fxsave));
 #else
 	/* This, however, we can work around by forcing the compiler to select
 	   an addressing mode that doesn't require extended registers. */
-	__asm__ __volatile__("rex64/fxsave %P2(%1)"
-			     : "=m" (tsk->thread.i387.fxsave)
-			     : "cdaSDb" (tsk),
-				"i" (offsetof(__typeof__(*tsk),
-					      thread.i387.fxsave)));
+	__asm__ __volatile__("rex64/fxsave (%1)"
+			     : "=m" (tsk->thread.xstate->fxsave)
+			     : "cdaSDb" (&tsk->thread.xstate->fxsave));
 #endif
-	clear_fpu_state(&tsk->thread.i387.fxsave);
+	clear_fpu_state(&tsk->thread.xstate->fxsave);
 	task_thread_info(tsk)->status &= ~TS_USEDFPU;
 }
 
@@ -148,7 +147,7 @@
 	int err = 0;
 
 	BUILD_BUG_ON(sizeof(struct user_i387_struct) !=
-			sizeof(tsk->thread.i387.fxsave));
+			sizeof(tsk->thread.xstate->fxsave));
 
 	if ((unsigned long)buf % 16)
 		printk("save_i387: bad fpstate %p\n", buf);
@@ -164,7 +163,7 @@
 		task_thread_info(tsk)->status &= ~TS_USEDFPU;
 		stts();
 	} else {
-		if (__copy_to_user(buf, &tsk->thread.i387.fxsave,
+		if (__copy_to_user(buf, &tsk->thread.xstate->fxsave,
 				   sizeof(struct i387_fxsave_struct)))
 			return -1;
 	}
@@ -201,7 +200,7 @@
 		"nop ; frstor %1",
 		"fxrstor %1",
 		X86_FEATURE_FXSR,
-		"m" ((tsk)->thread.i387.fxsave));
+		"m" (tsk->thread.xstate->fxsave));
 }
 
 /* We need a safe address that is cheap to find and that is already
@@ -225,8 +224,8 @@
 		"fxsave %[fx]\n"
 		"bt $7,%[fsw] ; jnc 1f ; fnclex\n1:",
 		X86_FEATURE_FXSR,
-		[fx] "m" (tsk->thread.i387.fxsave),
-		[fsw] "m" (tsk->thread.i387.fxsave.swd) : "memory");
+		[fx] "m" (tsk->thread.xstate->fxsave),
+		[fsw] "m" (tsk->thread.xstate->fxsave.swd) : "memory");
 	/* AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception
 	   is pending.  Clear the x87 state here by setting it to fixed
 	   values. safe_address is a random variable that should be in L1 */
@@ -327,25 +326,25 @@
 static inline unsigned short get_fpu_cwd(struct task_struct *tsk)
 {
 	if (cpu_has_fxsr) {
-		return tsk->thread.i387.fxsave.cwd;
+		return tsk->thread.xstate->fxsave.cwd;
 	} else {
-		return (unsigned short)tsk->thread.i387.fsave.cwd;
+		return (unsigned short) tsk->thread.xstate->fsave.cwd;
 	}
 }
 
 static inline unsigned short get_fpu_swd(struct task_struct *tsk)
 {
 	if (cpu_has_fxsr) {
-		return tsk->thread.i387.fxsave.swd;
+		return tsk->thread.xstate->fxsave.swd;
 	} else {
-		return (unsigned short)tsk->thread.i387.fsave.swd;
+		return (unsigned short) tsk->thread.xstate->fsave.swd;
 	}
 }
 
 static inline unsigned short get_fpu_mxcsr(struct task_struct *tsk)
 {
 	if (cpu_has_xmm) {
-		return tsk->thread.i387.fxsave.mxcsr;
+		return tsk->thread.xstate->fxsave.mxcsr;
 	} else {
 		return MXCSR_DEFAULT;
 	}