x86, fpu: lazy allocation of FPU area - v5

Only allocate the FPU area when the application actually uses FPU, i.e., in the
first lazy FPU trap. This could save memory for non-fpu using apps.

for example: on my system after boot, there are around 300 processes, with
only 17 using FPU.

Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
diff --git a/include/asm-x86/i387.h b/include/asm-x86/i387.h
index 382a5fa..4be7b58 100644
--- a/include/asm-x86/i387.h
+++ b/include/asm-x86/i387.h
@@ -21,7 +21,7 @@
 
 extern void fpu_init(void);
 extern void mxcsr_feature_mask_init(void);
-extern void init_fpu(struct task_struct *child);
+extern int init_fpu(struct task_struct *child);
 extern asmlinkage void math_state_restore(void);
 extern void init_thread_xstate(void);
 
diff --git a/include/asm-x86/processor.h b/include/asm-x86/processor.h
index 99d2978..e6bf92d 100644
--- a/include/asm-x86/processor.h
+++ b/include/asm-x86/processor.h
@@ -366,6 +366,8 @@
 
 extern void print_cpu_info(struct cpuinfo_x86 *);
 extern unsigned int xstate_size;
+extern void free_thread_xstate(struct task_struct *);
+extern struct kmem_cache *task_xstate_cachep;
 extern void init_scattered_cpuid_features(struct cpuinfo_x86 *c);
 extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c);
 extern unsigned short num_cache_leaves;