@@ -302,10 +302,12 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
* If the task has used fpu the last 5 timeslices, just do a full
* restore of the math state immediately to avoid the trap; the
* chances of needing FPU soon are obviously high now
+ *
+ * If the fpu is remote, we can't preload it since that requires an
+ * IPI. Let a math execption move it locally.
*/
- preload_fpu = tsk_used_math(next_p) && next_p->fpu_counter > 5;
-
- __unlazy_fpu(prev_p);
+ preload_fpu = tsk_used_math(next_p) && next_p->fpu_counter > 5
+ && !fpu_remote(&next->fpu);
/* we're going to use this soon, after a few expensive things */
if (preload_fpu)
@@ -351,8 +353,10 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
/* If we're going to preload the fpu context, make sure clts
is run while we're batching the cpu state updates. */
- if (preload_fpu)
+ if (preload_fpu || fpu_loaded(&next->fpu))
clts();
+ else
+ stts();
/*
* Leave lazy mode, flushing any hypercalls made here.
@@ -383,8 +383,12 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
* If the task has used fpu the last 5 timeslices, just do a full
* restore of the math state immediately to avoid the trap; the
* chances of needing FPU soon are obviously high now
+ *
+ * If the fpu is remote, we can't preload it since that requires an
+ * IPI. Let a math execption move it locally.
*/
- preload_fpu = tsk_used_math(next_p) && next_p->fpu_counter > 5;
+ preload_fpu = tsk_used_math(next_p) && next_p->fpu_counter > 5
+ && !fpu_remote(&next->fpu);
/* we're going to use this soon, after a few expensive things */
if (preload_fpu)
@@ -418,12 +422,11 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
load_TLS(next, cpu);
- /* Must be after DS reload */
- unlazy_fpu(prev_p);
-
/* Make sure cpu is ready for new context */
- if (preload_fpu)
+ if (preload_fpu || fpu_loaded(&next->fpu))
clts();
+ else
+ stts();
/*
* Leave lazy mode, flushing any hypercalls made here.