diff mbox

[4/4] x86, fpu: don't save fpu state when switching from a task

Message ID 1276441427-31514-5-git-send-email-avi@redhat.com (mailing list archive)
State New, archived
Headers show

Commit Message

Avi Kivity June 13, 2010, 3:03 p.m. UTC
None
diff mbox

Patch

diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 8d12878..4cb5bc4 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -302,10 +302,12 @@  __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 	 * If the task has used fpu the last 5 timeslices, just do a full
 	 * restore of the math state immediately to avoid the trap; the
 	 * chances of needing FPU soon are obviously high now
+	 *
+	 * If the fpu is remote, we can't preload it since that requires an
+	 * IPI.  Let a math execption move it locally.
 	 */
-	preload_fpu = tsk_used_math(next_p) && next_p->fpu_counter > 5;
-
-	__unlazy_fpu(prev_p);
+	preload_fpu = tsk_used_math(next_p) && next_p->fpu_counter > 5
+		&& !fpu_remote(&next->fpu);
 
 	/* we're going to use this soon, after a few expensive things */
 	if (preload_fpu)
@@ -351,8 +353,10 @@  __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 
 	/* If we're going to preload the fpu context, make sure clts
 	   is run while we're batching the cpu state updates. */
-	if (preload_fpu)
+	if (preload_fpu || fpu_loaded(&next->fpu))
 		clts();
+	else
+		stts();
 
 	/*
 	 * Leave lazy mode, flushing any hypercalls made here.
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 3c2422a..65d2130 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -383,8 +383,12 @@  __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 	 * If the task has used fpu the last 5 timeslices, just do a full
 	 * restore of the math state immediately to avoid the trap; the
 	 * chances of needing FPU soon are obviously high now
+	 *
+	 * If the fpu is remote, we can't preload it since that requires an
+	 * IPI.  Let a math execption move it locally.
 	 */
-	preload_fpu = tsk_used_math(next_p) && next_p->fpu_counter > 5;
+	preload_fpu = tsk_used_math(next_p) && next_p->fpu_counter > 5
+		&& !fpu_remote(&next->fpu);
 
 	/* we're going to use this soon, after a few expensive things */
 	if (preload_fpu)
@@ -418,12 +422,11 @@  __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 
 	load_TLS(next, cpu);
 
-	/* Must be after DS reload */
-	unlazy_fpu(prev_p);
-
 	/* Make sure cpu is ready for new context */
-	if (preload_fpu)
+	if (preload_fpu || fpu_loaded(&next->fpu))
 		clts();
+	else
+		stts();
 
 	/*
 	 * Leave lazy mode, flushing any hypercalls made here.