x86, fpu: split FPU state from task struct - v5

Split the FPU save area from the task struct. This allows easy migration
of FPU context, and it's generally cleaner. It also allows the following
two optimizations:

1) only allocate when the application actually uses FPU, so in the first
lazy FPU trap. This could save memory for non-fpu using apps. Next patch
does this lazy allocation.

2) allocate the right size for the actual cpu rather than 512 bytes always.
Patches enabling xsave/xrstor support (coming shortly) will take advantage
of this.

Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
diff --git a/include/asm-x86/processor.h b/include/asm-x86/processor.h
index eaf4548..99d2978 100644
--- a/include/asm-x86/processor.h
+++ b/include/asm-x86/processor.h
@@ -354,7 +354,7 @@
 	u32			entry_eip;
 };
 
-union i387_union {
+union thread_xstate {
 	struct i387_fsave_struct	fsave;
 	struct i387_fxsave_struct	fxsave;
 	struct i387_soft_struct		soft;
@@ -365,6 +365,7 @@
 #endif
 
 extern void print_cpu_info(struct cpuinfo_x86 *);
+extern unsigned int xstate_size;
 extern void init_scattered_cpuid_features(struct cpuinfo_x86 *c);
 extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c);
 extern unsigned short num_cache_leaves;
@@ -397,8 +398,8 @@
 	unsigned long		cr2;
 	unsigned long		trap_no;
 	unsigned long		error_code;
-	/* Floating point info: */
-	union i387_union	i387 __attribute__((aligned(16)));;
+	/* floating point and extended processor state */
+	union thread_xstate	*xstate;
 #ifdef CONFIG_X86_32
 	/* Virtual 86 mode info */
 	struct vm86_struct __user *vm86_info;