x86, fpu: decouple non-lazy/eager fpu restore from xsave

Decouple non-lazy/eager fpu restore policy from the existence of the xsave
feature. Introduce a synthetic CPUID flag to represent the eagerfpu
policy. "eagerfpu=on" boot paramter will enable the policy.

Requested-by: H. Peter Anvin <hpa@zytor.com>
Requested-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Link: http://lkml.kernel.org/r/1347300665-6209-2-git-send-email-suresh.b.siddha@intel.com
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
diff --git a/arch/x86/include/asm/fpu-internal.h b/arch/x86/include/asm/fpu-internal.h
index 8ca0f9f..0ca72f0 100644
--- a/arch/x86/include/asm/fpu-internal.h
+++ b/arch/x86/include/asm/fpu-internal.h
@@ -38,6 +38,7 @@
 
 extern unsigned int mxcsr_feature_mask;
 extern void fpu_init(void);
+extern void eager_fpu_init(void);
 
 DECLARE_PER_CPU(struct task_struct *, fpu_owner_task);
 
@@ -84,6 +85,11 @@
 
 #define X87_FSW_ES (1 << 7)	/* Exception Summary */
 
+static __always_inline __pure bool use_eager_fpu(void)
+{
+	return static_cpu_has(X86_FEATURE_EAGER_FPU);
+}
+
 static __always_inline __pure bool use_xsaveopt(void)
 {
 	return static_cpu_has(X86_FEATURE_XSAVEOPT);
@@ -99,6 +105,14 @@
         return static_cpu_has(X86_FEATURE_FXSR);
 }
 
+static inline void fx_finit(struct i387_fxsave_struct *fx)
+{
+	memset(fx, 0, xstate_size);
+	fx->cwd = 0x37f;
+	if (cpu_has_xmm)
+		fx->mxcsr = MXCSR_DEFAULT;
+}
+
 extern void __sanitize_i387_state(struct task_struct *);
 
 static inline void sanitize_i387_state(struct task_struct *tsk)
@@ -291,13 +305,13 @@
 static inline void __thread_fpu_end(struct task_struct *tsk)
 {
 	__thread_clear_has_fpu(tsk);
-	if (!use_xsave())
+	if (!use_eager_fpu())
 		stts();
 }
 
 static inline void __thread_fpu_begin(struct task_struct *tsk)
 {
-	if (!use_xsave())
+	if (!use_eager_fpu())
 		clts();
 	__thread_set_has_fpu(tsk);
 }
@@ -327,10 +341,14 @@
 
 static inline void drop_init_fpu(struct task_struct *tsk)
 {
-	if (!use_xsave())
+	if (!use_eager_fpu())
 		drop_fpu(tsk);
-	else
-		xrstor_state(init_xstate_buf, -1);
+	else {
+		if (use_xsave())
+			xrstor_state(init_xstate_buf, -1);
+		else
+			fxrstor_checking(&init_xstate_buf->i387);
+	}
 }
 
 /*
@@ -370,7 +388,7 @@
 	 * If the task has used the math, pre-load the FPU on xsave processors
 	 * or if the past 5 consecutive context-switches used math.
 	 */
-	fpu.preload = tsk_used_math(new) && (use_xsave() ||
+	fpu.preload = tsk_used_math(new) && (use_eager_fpu() ||
 					     new->fpu_counter > 5);
 	if (__thread_has_fpu(old)) {
 		if (!__save_init_fpu(old))
@@ -383,14 +401,14 @@
 			new->fpu_counter++;
 			__thread_set_has_fpu(new);
 			prefetch(new->thread.fpu.state);
-		} else if (!use_xsave())
+		} else if (!use_eager_fpu())
 			stts();
 	} else {
 		old->fpu_counter = 0;
 		old->thread.fpu.last_cpu = ~0;
 		if (fpu.preload) {
 			new->fpu_counter++;
-			if (!use_xsave() && fpu_lazy_restore(new, cpu))
+			if (!use_eager_fpu() && fpu_lazy_restore(new, cpu))
 				fpu.preload = 0;
 			else
 				prefetch(new->thread.fpu.state);
@@ -452,6 +470,14 @@
 	preempt_enable();
 }
 
+static inline void __save_fpu(struct task_struct *tsk)
+{
+	if (use_xsave())
+		xsave_state(&tsk->thread.fpu.state->xsave, -1);
+	else
+		fpu_fxsave(&tsk->thread.fpu);
+}
+
 /*
  * These disable preemption on their own and are safe
  */
@@ -459,8 +485,8 @@
 {
 	WARN_ON_ONCE(!__thread_has_fpu(tsk));
 
-	if (use_xsave()) {
-		xsave_state(&tsk->thread.fpu.state->xsave, -1);
+	if (use_eager_fpu()) {
+		__save_fpu(tsk);
 		return;
 	}
 
@@ -526,11 +552,9 @@
 
 static inline void fpu_copy(struct task_struct *dst, struct task_struct *src)
 {
-	if (use_xsave()) {
-		struct xsave_struct *xsave = &dst->thread.fpu.state->xsave;
-
-		memset(&xsave->xsave_hdr, 0, sizeof(struct xsave_hdr_struct));
-		xsave_state(xsave, -1);
+	if (use_eager_fpu()) {
+		memset(&dst->thread.fpu.state->xsave, 0, xstate_size);
+		__save_fpu(dst);
 	} else {
 		struct fpu *dfpu = &dst->thread.fpu;
 		struct fpu *sfpu = &src->thread.fpu;