diff mbox

[1/2] x86,kvm: move qemu/guest FPU switching out to vcpu_run

Message ID 20171114215424.32214-2-riel@redhat.com (mailing list archive)
State New, archived
Headers show

Commit Message

Rik van Riel Nov. 14, 2017, 9:54 p.m. UTC
From: Rik van Riel <riel@redhat.com>

Currently, every time a VCPU is scheduled out, the host kernel will
first save the guest FPU/xstate context, then load the qemu userspace
FPU context, only to then immediately save the qemu userspace FPU
context back to memory. When scheduling in a VCPU, the same extraneous
FPU loads and saves are done.

This could be avoided by moving from a model where the guest FPU is
loaded and stored with preemption disabled, to a model where the
qemu userspace FPU is swapped out for the guest FPU context for
the duration of the KVM_RUN ioctl.

This is done under the VCPU mutex, which is also taken when other
tasks inspect the VCPU FPU context, so the code should already be
safe for this change. That should come as no surprise, given that
s390 already has this optimization.

No performance changes were detected in quick ping-pong tests on
my 4 socket system, which is expected since an FPU+xstate load is
on the order of 0.1us, while ping-ponging between CPUs is on the
order of 20us, and somewhat noisy.

There may be other tests where performance changes are noticeable.

Signed-off-by: Rik van Riel <riel@redhat.com>
Suggested-by: Christian Borntraeger <borntraeger@de.ibm.com>
---
 arch/x86/include/asm/kvm_host.h | 13 +++++++++++++
 arch/x86/kvm/x86.c              | 34 +++++++++++++---------------------
 include/linux/kvm_host.h        |  2 +-
 3 files changed, 27 insertions(+), 22 deletions(-)

Comments

Wanpeng Li Nov. 15, 2017, 12:47 a.m. UTC | #1
2017-11-15 5:54 GMT+08:00  <riel@redhat.com>:
> From: Rik van Riel <riel@redhat.com>
>
> Currently, every time a VCPU is scheduled out, the host kernel will
> first save the guest FPU/xstate context, then load the qemu userspace
> FPU context, only to then immediately save the qemu userspace FPU
> context back to memory. When scheduling in a VCPU, the same extraneous
> FPU loads and saves are done.
>
> This could be avoided by moving from a model where the guest FPU is
> loaded and stored with preemption disabled, to a model where the
> qemu userspace FPU is swapped out for the guest FPU context for
> the duration of the KVM_RUN ioctl.

What will happen if CONFIG_PREEMPT is enabled?

Regards,
Wanpeng Li

>
> This is done under the VCPU mutex, which is also taken when other
> tasks inspect the VCPU FPU context, so the code should already be
> safe for this change. That should come as no surprise, given that
> s390 already has this optimization.
>
> No performance changes were detected in quick ping-pong tests on
> my 4 socket system, which is expected since an FPU+xstate load is
> on the order of 0.1us, while ping-ponging between CPUs is on the
> order of 20us, and somewhat noisy.
>
> There may be other tests where performance changes are noticeable.
>
> Signed-off-by: Rik van Riel <riel@redhat.com>
> Suggested-by: Christian Borntraeger <borntraeger@de.ibm.com>
> ---
>  arch/x86/include/asm/kvm_host.h | 13 +++++++++++++
>  arch/x86/kvm/x86.c              | 34 +++++++++++++---------------------
>  include/linux/kvm_host.h        |  2 +-
>  3 files changed, 27 insertions(+), 22 deletions(-)
>
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index 9d7d856b2d89..ffe54958491f 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -536,7 +536,20 @@ struct kvm_vcpu_arch {
>         struct kvm_mmu_memory_cache mmu_page_cache;
>         struct kvm_mmu_memory_cache mmu_page_header_cache;
>
> +       /*
> +        * QEMU userspace and the guest each have their own FPU state.
> +        * In vcpu_run, we switch between the user and guest FPU contexts.
> +        * While running a VCPU, the VCPU thread will have the guest FPU
> +        * context.
> +        *
> +        * Note that while the PKRU state lives inside the fpu registers,
> +        * it is switched out separately at VMENTER and VMEXIT time. The
> +        * "guest_fpu" state here contains the guest FPU context, with the
> +        * host PRKU bits.
> +        */
> +       struct fpu user_fpu;
>         struct fpu guest_fpu;
> +
>         u64 xcr0;
>         u64 guest_supported_xcr0;
>         u32 guest_xstate_size;
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 03869eb7fcd6..aad5181ed4e9 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -2917,7 +2917,6 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
>         srcu_read_unlock(&vcpu->kvm->srcu, idx);
>         pagefault_enable();
>         kvm_x86_ops->vcpu_put(vcpu);
> -       kvm_put_guest_fpu(vcpu);
>         vcpu->arch.last_host_tsc = rdtsc();
>  }
>
> @@ -5228,13 +5227,10 @@ static void emulator_halt(struct x86_emulate_ctxt *ctxt)
>
>  static void emulator_get_fpu(struct x86_emulate_ctxt *ctxt)
>  {
> -       preempt_disable();
> -       kvm_load_guest_fpu(emul_to_vcpu(ctxt));
>  }
>
>  static void emulator_put_fpu(struct x86_emulate_ctxt *ctxt)
>  {
> -       preempt_enable();
>  }
>
>  static int emulator_intercept(struct x86_emulate_ctxt *ctxt,
> @@ -6908,7 +6904,6 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
>         preempt_disable();
>
>         kvm_x86_ops->prepare_guest_switch(vcpu);
> -       kvm_load_guest_fpu(vcpu);
>
>         /*
>          * Disable IRQs before setting IN_GUEST_MODE.  Posted interrupt
> @@ -7255,12 +7250,14 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
>                 }
>         }
>
> +       kvm_load_guest_fpu(vcpu);
> +
>         if (unlikely(vcpu->arch.complete_userspace_io)) {
>                 int (*cui)(struct kvm_vcpu *) = vcpu->arch.complete_userspace_io;
>                 vcpu->arch.complete_userspace_io = NULL;
>                 r = cui(vcpu);
>                 if (r <= 0)
> -                       goto out;
> +                       goto out_fpu;
>         } else
>                 WARN_ON(vcpu->arch.pio.count || vcpu->mmio_needed);
>
> @@ -7269,6 +7266,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
>         else
>                 r = vcpu_run(vcpu);
>
> +out_fpu:
> +       kvm_put_guest_fpu(vcpu);
>  out:
>         post_kvm_run_save(vcpu);
>         if (vcpu->sigset_active)
> @@ -7663,32 +7662,25 @@ static void fx_init(struct kvm_vcpu *vcpu)
>         vcpu->arch.cr0 |= X86_CR0_ET;
>  }
>
> +/* Swap (qemu) user FPU context for the guest FPU context. */
>  void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
>  {
> -       if (vcpu->guest_fpu_loaded)
> -               return;
> -
> -       /*
> -        * Restore all possible states in the guest,
> -        * and assume host would use all available bits.
> -        * Guest xcr0 would be loaded later.
> -        */
> -       vcpu->guest_fpu_loaded = 1;
> -       __kernel_fpu_begin();
> +       preempt_disable();
> +       copy_fpregs_to_fpstate(&vcpu->arch.user_fpu);
>         /* PKRU is separately restored in kvm_x86_ops->run.  */
>         __copy_kernel_to_fpregs(&vcpu->arch.guest_fpu.state,
>                                 ~XFEATURE_MASK_PKRU);
> +       preempt_enable();
>         trace_kvm_fpu(1);
>  }
>
> +/* When vcpu_run ends, restore user space FPU context. */
>  void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
>  {
> -       if (!vcpu->guest_fpu_loaded)
> -               return;
> -
> -       vcpu->guest_fpu_loaded = 0;
> +       preempt_disable();
>         copy_fpregs_to_fpstate(&vcpu->arch.guest_fpu);
> -       __kernel_fpu_end();
> +       copy_kernel_to_fpregs(&vcpu->arch.user_fpu.state);
> +       preempt_enable();
>         ++vcpu->stat.fpu_reload;
>         trace_kvm_fpu(0);
>  }
> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> index 6882538eda32..354608487b8d 100644
> --- a/include/linux/kvm_host.h
> +++ b/include/linux/kvm_host.h
> @@ -232,7 +232,7 @@ struct kvm_vcpu {
>         struct mutex mutex;
>         struct kvm_run *run;
>
> -       int guest_fpu_loaded, guest_xcr0_loaded;
> +       int guest_xcr0_loaded;
>         struct swait_queue_head wq;
>         struct pid __rcu *pid;
>         int sigset_active;
> --
> 2.9.4
>
Rik van Riel Nov. 15, 2017, 3:03 a.m. UTC | #2
On Wed, 2017-11-15 at 08:47 +0800, Wanpeng Li wrote:
> 2017-11-15 5:54 GMT+08:00  <riel@redhat.com>:
> > From: Rik van Riel <riel@redhat.com>
> > 
> > Currently, every time a VCPU is scheduled out, the host kernel will
> > first save the guest FPU/xstate context, then load the qemu
> > userspace
> > FPU context, only to then immediately save the qemu userspace FPU
> > context back to memory. When scheduling in a VCPU, the same
> > extraneous
> > FPU loads and saves are done.
> > 
> > This could be avoided by moving from a model where the guest FPU is
> > loaded and stored with preemption disabled, to a model where the
> > qemu userspace FPU is swapped out for the guest FPU context for
> > the duration of the KVM_RUN ioctl.
> 
> What will happen if CONFIG_PREEMPT is enabled?

The scheduler will save the guest FPU context when a
VCPU thread is preempted, and restore it when it is
scheduled back in.

While inside kvm_arch_vcpu_ioctl_run, the task FPU
context will be the guest FPU context, instead of the
qemu userspace FPU context.
Wanpeng Li Nov. 15, 2017, 4:33 a.m. UTC | #3
2017-11-15 11:03 GMT+08:00 Rik van Riel <riel@redhat.com>:
> On Wed, 2017-11-15 at 08:47 +0800, Wanpeng Li wrote:
>> 2017-11-15 5:54 GMT+08:00  <riel@redhat.com>:
>> > From: Rik van Riel <riel@redhat.com>
>> >
>> > Currently, every time a VCPU is scheduled out, the host kernel will
>> > first save the guest FPU/xstate context, then load the qemu
>> > userspace
>> > FPU context, only to then immediately save the qemu userspace FPU
>> > context back to memory. When scheduling in a VCPU, the same
>> > extraneous
>> > FPU loads and saves are done.
>> >
>> > This could be avoided by moving from a model where the guest FPU is
>> > loaded and stored with preemption disabled, to a model where the
>> > qemu userspace FPU is swapped out for the guest FPU context for
>> > the duration of the KVM_RUN ioctl.
>>
>> What will happen if CONFIG_PREEMPT is enabled?
>
> The scheduler will save the guest FPU context when a
> VCPU thread is preempted, and restore it when it is
> scheduled back in.

I mean all the involved processes will use fpu. Before patch if kernel
preempt occur:

context_switch
  -> prepare_task_switch
        -> fire_sched_out_preempt_notifiers
              -> kvm_sched_out
                    -> kvm_arch_vcpu_put
                          -> kvm_put_guest_fpu
                               -> copy_fpregs_to_fpstate(&vcpu->arch.guest_fpu)
                                    save xsave area to guest fpu buffer
                               -> __kernel_fpu_end
                                     ->
copy_kernel_to_fpregs(&current->thread.fpu.state)
                                         restore prev vCPU qemu
userspace FPU to the xsave area
  -> switch_to
        -> __switch_to
            -> switch_fpu_prepare
                  -> copy_fpregs_to_fpstate => save xsave area to prev
vCPU qemu userspace FPU
            -> switch_fpu_finish
                  -> copy_kernel_to_fpgregs => restore next task FPU
to xsave area


After the patch:

context_switch
  -> prepare_task_switch
        -> fire_sched_out_preempt_notifiers
              -> kvm_sched_out

 -> switch_to
        -> __switch_to
            -> switch_fpu_prepare
                  -> copy_fpregs_to_fpstate         => Oops
                  save xsave area to prev vCPU qemu userspace FPU,
actually the guest FPU buffer is loaded in xsave area, you transmit
guest FPU in xsave area into the prev vCPU qemu userspace FPU

Regards,
Wanpeng Li
Quan Xu Nov. 15, 2017, 6:53 a.m. UTC | #4
On 2017/11/15 05:54, riel@redhat.com wrote:
> From: Rik van Riel <riel@redhat.com>
>
> Currently, every time a VCPU is scheduled out, the host kernel will
> first save the guest FPU/xstate context, then load the qemu userspace
> FPU context, only to then immediately save the qemu userspace FPU
> context back to memory. When scheduling in a VCPU, the same extraneous
> FPU loads and saves are done.

Rik, be careful with VM migration. with you patch, I don't think you 
could load fpu/xstate
   context accurately after VM migration.


Quan
Alibaba Cloud
> This could be avoided by moving from a model where the guest FPU is
> loaded and stored with preemption disabled, to a model where the
> qemu userspace FPU is swapped out for the guest FPU context for
> the duration of the KVM_RUN ioctl.
>
> This is done under the VCPU mutex, which is also taken when other
> tasks inspect the VCPU FPU context, so the code should already be
> safe for this change. That should come as no surprise, given that
> s390 already has this optimization.
>
> No performance changes were detected in quick ping-pong tests on
> my 4 socket system, which is expected since an FPU+xstate load is
> on the order of 0.1us, while ping-ponging between CPUs is on the
> order of 20us, and somewhat noisy.
>
> There may be other tests where performance changes are noticeable.
>
> Signed-off-by: Rik van Riel <riel@redhat.com>
> Suggested-by: Christian Borntraeger <borntraeger@de.ibm.com>
> ---
>   arch/x86/include/asm/kvm_host.h | 13 +++++++++++++
>   arch/x86/kvm/x86.c              | 34 +++++++++++++---------------------
>   include/linux/kvm_host.h        |  2 +-
>   3 files changed, 27 insertions(+), 22 deletions(-)
>
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index 9d7d856b2d89..ffe54958491f 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -536,7 +536,20 @@ struct kvm_vcpu_arch {
>   	struct kvm_mmu_memory_cache mmu_page_cache;
>   	struct kvm_mmu_memory_cache mmu_page_header_cache;
>   
> +	/*
> +	 * QEMU userspace and the guest each have their own FPU state.
> +	 * In vcpu_run, we switch between the user and guest FPU contexts.
> +	 * While running a VCPU, the VCPU thread will have the guest FPU
> +	 * context.
> +	 *
> +	 * Note that while the PKRU state lives inside the fpu registers,
> +	 * it is switched out separately at VMENTER and VMEXIT time. The
> +	 * "guest_fpu" state here contains the guest FPU context, with the
> +	 * host PRKU bits.
> +	 */
> +	struct fpu user_fpu;
>   	struct fpu guest_fpu;
> +
>   	u64 xcr0;
>   	u64 guest_supported_xcr0;
>   	u32 guest_xstate_size;
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 03869eb7fcd6..aad5181ed4e9 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -2917,7 +2917,6 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
>   	srcu_read_unlock(&vcpu->kvm->srcu, idx);
>   	pagefault_enable();
>   	kvm_x86_ops->vcpu_put(vcpu);
> -	kvm_put_guest_fpu(vcpu);
>   	vcpu->arch.last_host_tsc = rdtsc();
>   }
>   
> @@ -5228,13 +5227,10 @@ static void emulator_halt(struct x86_emulate_ctxt *ctxt)
>   
>   static void emulator_get_fpu(struct x86_emulate_ctxt *ctxt)
>   {
> -	preempt_disable();
> -	kvm_load_guest_fpu(emul_to_vcpu(ctxt));
>   }
>   
>   static void emulator_put_fpu(struct x86_emulate_ctxt *ctxt)
>   {
> -	preempt_enable();
>   }
>   
>   static int emulator_intercept(struct x86_emulate_ctxt *ctxt,
> @@ -6908,7 +6904,6 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
>   	preempt_disable();
>   
>   	kvm_x86_ops->prepare_guest_switch(vcpu);
> -	kvm_load_guest_fpu(vcpu);
>   
>   	/*
>   	 * Disable IRQs before setting IN_GUEST_MODE.  Posted interrupt
> @@ -7255,12 +7250,14 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
>   		}
>   	}
>   
> +	kvm_load_guest_fpu(vcpu);
> +
>   	if (unlikely(vcpu->arch.complete_userspace_io)) {
>   		int (*cui)(struct kvm_vcpu *) = vcpu->arch.complete_userspace_io;
>   		vcpu->arch.complete_userspace_io = NULL;
>   		r = cui(vcpu);
>   		if (r <= 0)
> -			goto out;
> +			goto out_fpu;
>   	} else
>   		WARN_ON(vcpu->arch.pio.count || vcpu->mmio_needed);
>   
> @@ -7269,6 +7266,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
>   	else
>   		r = vcpu_run(vcpu);
>   
> +out_fpu:
> +	kvm_put_guest_fpu(vcpu);
>   out:
>   	post_kvm_run_save(vcpu);
>   	if (vcpu->sigset_active)
> @@ -7663,32 +7662,25 @@ static void fx_init(struct kvm_vcpu *vcpu)
>   	vcpu->arch.cr0 |= X86_CR0_ET;
>   }
>   
> +/* Swap (qemu) user FPU context for the guest FPU context. */
>   void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
>   {
> -	if (vcpu->guest_fpu_loaded)
> -		return;
> -
> -	/*
> -	 * Restore all possible states in the guest,
> -	 * and assume host would use all available bits.
> -	 * Guest xcr0 would be loaded later.
> -	 */
> -	vcpu->guest_fpu_loaded = 1;
> -	__kernel_fpu_begin();
> +	preempt_disable();
> +	copy_fpregs_to_fpstate(&vcpu->arch.user_fpu);
>   	/* PKRU is separately restored in kvm_x86_ops->run.  */
>   	__copy_kernel_to_fpregs(&vcpu->arch.guest_fpu.state,
>   				~XFEATURE_MASK_PKRU);
> +	preempt_enable();
>   	trace_kvm_fpu(1);
>   }
>   
> +/* When vcpu_run ends, restore user space FPU context. */
>   void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
>   {
> -	if (!vcpu->guest_fpu_loaded)
> -		return;
> -
> -	vcpu->guest_fpu_loaded = 0;
> +	preempt_disable();
>   	copy_fpregs_to_fpstate(&vcpu->arch.guest_fpu);
> -	__kernel_fpu_end();
> +	copy_kernel_to_fpregs(&vcpu->arch.user_fpu.state);
> +	preempt_enable();
>   	++vcpu->stat.fpu_reload;
>   	trace_kvm_fpu(0);
>   }
> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> index 6882538eda32..354608487b8d 100644
> --- a/include/linux/kvm_host.h
> +++ b/include/linux/kvm_host.h
> @@ -232,7 +232,7 @@ struct kvm_vcpu {
>   	struct mutex mutex;
>   	struct kvm_run *run;
>   
> -	int guest_fpu_loaded, guest_xcr0_loaded;
> +	int guest_xcr0_loaded;
>   	struct swait_queue_head wq;
>   	struct pid __rcu *pid;
>   	int sigset_active;
Rik van Riel Nov. 15, 2017, 2:40 p.m. UTC | #5
On Wed, 2017-11-15 at 12:33 +0800, Wanpeng Li wrote:
> 2017-11-15 11:03 GMT+08:00 Rik van Riel <riel@redhat.com>:
> > On Wed, 2017-11-15 at 08:47 +0800, Wanpeng Li wrote:
> > > 2017-11-15 5:54 GMT+08:00  <riel@redhat.com>:
> > > > From: Rik van Riel <riel@redhat.com>
> > > > 
> > > > Currently, every time a VCPU is scheduled out, the host kernel
> > > > will
> > > > first save the guest FPU/xstate context, then load the qemu
> > > > userspace
> > > > FPU context, only to then immediately save the qemu userspace
> > > > FPU
> > > > context back to memory. When scheduling in a VCPU, the same
> > > > extraneous
> > > > FPU loads and saves are done.
> > > > 
> > > > This could be avoided by moving from a model where the guest
> > > > FPU is
> > > > loaded and stored with preemption disabled, to a model where
> > > > the
> > > > qemu userspace FPU is swapped out for the guest FPU context for
> > > > the duration of the KVM_RUN ioctl.
> > > 
> > > What will happen if CONFIG_PREEMPT is enabled?
> > 
> > The scheduler will save the guest FPU context when a
> > VCPU thread is preempted, and restore it when it is
> > scheduled back in.
> 
> I mean all the involved processes will use fpu. Before patch if
> kernel
> preempt occur:
> 
> context_switch
>   -> prepare_task_switch
>         -> fire_sched_out_preempt_notifiers
>               -> kvm_sched_out
>                     -> kvm_arch_vcpu_put
>                           -> kvm_put_guest_fpu
>                                -> copy_fpregs_to_fpstate(&vcpu-
> >arch.guest_fpu)
>                                     save xsave area to guest fpu
> buffer
>                                -> __kernel_fpu_end
>                                      ->
> copy_kernel_to_fpregs(&current->thread.fpu.state)
>                                          restore prev vCPU qemu
> userspace FPU to the xsave area
>   -> switch_to
>         -> __switch_to
>             -> switch_fpu_prepare
>                   -> copy_fpregs_to_fpstate => save xsave area to
> prev
> vCPU qemu userspace FPU
>             -> switch_fpu_finish
>                   -> copy_kernel_to_fpgregs => restore next task FPU
> to xsave area
> 
> 
> After the patch:
> 
> context_switch
>   -> prepare_task_switch
>         -> fire_sched_out_preempt_notifiers
>               -> kvm_sched_out
> 
>  -> switch_to
>         -> __switch_to
>             -> switch_fpu_prepare
>                   -> copy_fpregs_to_fpstate         => Oops
>                   save xsave area to prev vCPU qemu userspace FPU,
> actually the guest FPU buffer is loaded in xsave area, you transmit
> guest FPU in xsave area into the prev vCPU qemu userspace FPU

When entering kvm_arch_vcpu_ioctl_run we save the qemu userspace
FPU context in &vcpu->arch.user_fpu, and we restore that before
leaving kvm_arch_vcpu_ioctl_run.

Userspace should always see the userspace FPU context, no?

Am I overlooking anything?
Rik van Riel Nov. 15, 2017, 2:43 p.m. UTC | #6
On Wed, 2017-11-15 at 14:53 +0800, quan.xu04@gmail.com wrote:
> 
> On 2017/11/15 05:54, riel@redhat.com wrote:
> > From: Rik van Riel <riel@redhat.com>
> > 
> > Currently, every time a VCPU is scheduled out, the host kernel will
> > first save the guest FPU/xstate context, then load the qemu
> > userspace
> > FPU context, only to then immediately save the qemu userspace FPU
> > context back to memory. When scheduling in a VCPU, the same
> > extraneous
> > FPU loads and saves are done.
> 
> Rik, be careful with VM migration. with you patch, I don't think you 
> could load fpu/xstate
>    context accurately after VM migration.

Can you explain why you believe that?

Getting the guest FPU or XSTATE is done under the vcpu->mutex.

This patch switches out guest and userspace FPU/XSTATE under the
vcpu->mutex, and switches it back before releasing the vcpu->mutex.

By the time a KVM_GET_FPU has obtained the vcpu->mutex, the guest
FPU state will be in vcpu->arch.guest_fpu.state, where you expect
it to be.

What am I missing?
Paolo Bonzini Nov. 15, 2017, 3:09 p.m. UTC | #7
On 15/11/2017 15:43, Rik van Riel wrote:
>> Rik, be careful with VM migration. with you patch, I don't think you 
>> could load fpu/xstate
>>    context accurately after VM migration.
> Can you explain why you believe that?
> 
> Getting the guest FPU or XSTATE is done under the vcpu->mutex.
> 
> This patch switches out guest and userspace FPU/XSTATE under the
> vcpu->mutex, and switches it back before releasing the vcpu->mutex.
> 
> By the time a KVM_GET_FPU has obtained the vcpu->mutex, the guest
> FPU state will be in vcpu->arch.guest_fpu.state, where you expect
> it to be.
> 
> What am I missing?

Nothing as far as I can see. :)

Paolo
Quan Xu Nov. 16, 2017, 2:50 a.m. UTC | #8
On 2017-11-15 22:43, Rik van Riel wrote:
> Can you explain why you believe that?

for example, a vcpu thread is running in kvm mode under cretical
condition to stop. QEMU send an IPI to cause a VM-exit to happen
immediately, and this IPI doesn't make vcpu return to QEMU. IIUC
this vcpu thread will still continue to run in kvm mode when is
waked up at targer machine. with your patch, I don't see a chance
to load guest FPU or XSTATE, until return to QEMU and run kvm mode again.

then the FPU or XSTATE status is inconsistent for a small window, what's 
even
worse is that the vcpu is running.

Did I misunderstand?

Quan
Alibaba Cloud



> Getting the guest FPU or XSTATE is done under the vcpu->mutex.
>
> This patch switches out guest and userspace FPU/XSTATE under the
> vcpu->mutex, and switches it back before releasing the vcpu->mutex.
>
> By the time a KVM_GET_FPU has obtained the vcpu->mutex, the guest
> FPU state will be in vcpu->arch.guest_fpu.state, where you expect
> it to be.
>
> What am I missing?
>
Rik van Riel Nov. 16, 2017, 4:21 a.m. UTC | #9
On Thu, 2017-11-16 at 10:50 +0800, Quan Xu wrote:
> 
> On 2017-11-15 22:43, Rik van Riel wrote:
> > Can you explain why you believe that?
> 
> for example, a vcpu thread is running in kvm mode under cretical
> condition to stop. QEMU send an IPI to cause a VM-exit to happen
> immediately, and this IPI doesn't make vcpu return to QEMU. IIUC
> this vcpu thread will still continue to run in kvm mode when is
> waked up at targer machine. with your patch, I don't see a chance
> to load guest FPU or XSTATE, until return to QEMU and run kvm mode
> again.
> 
> then the FPU or XSTATE status is inconsistent for a small window,
> what's 
> even
> worse is that the vcpu is running.
> 
> Did I misunderstand?

At context switch time, the context switch code will save
the guest FPU state to current->thread.fpu when the
VCPU thread is scheduled out.

When the VCPU thread is scheduled back in, the context
switch code will restore current->thread.fpu to the FPU
registers.

The VCPU thread will never run with anything else than
the guest FPU state, while inside the KVM_RUN code.
Quan Xu Nov. 16, 2017, 5:06 a.m. UTC | #10
On 2017-11-16 12:21, Rik van Riel wrote:
> On Thu, 2017-11-16 at 10:50 +0800, Quan Xu wrote:
>> On 2017-11-15 22:43, Rik van Riel wrote:
>>> Can you explain why you believe that?
>> for example, a vcpu thread is running in kvm mode under cretical
>> condition to stop. QEMU send an IPI to cause a VM-exit to happen
>> immediately, and this IPI doesn't make vcpu return to QEMU. IIUC
>> this vcpu thread will still continue to run in kvm mode when is
>> waked up at targer machine. with your patch, I don't see a chance
>> to load guest FPU or XSTATE, until return to QEMU and run kvm mode
>> again.
>>
>> then the FPU or XSTATE status is inconsistent for a small window,
>> what's
>> even
>> worse is that the vcpu is running.
>>
>> Did I misunderstand?
> At context switch time, the context switch code will save
> the guest FPU state to current->thread.fpu when the
> VCPU thread is scheduled out.
>
> When the VCPU thread is scheduled back in, the context
> switch code will restore current->thread.fpu to the FPU
> registers.


good catch!
Also as your comment, PKRU is switched out separately at
VMENTER and VMEXIT time, but with a lots of IF conditions..

the pkru may be restored with host pkru after VMEXIT. when
vcpu thread is scheduled out, the pkru value in current->thread.fpu.state
may be the host pkru value, instead of guest pkru value (of course,
this _assumes_ that the pkru is in current->thread.fpu.state as well).
in this way, the pkru may be a coner case.

VM migration again, in case,
            source_host_pkru_value != guest_pkru_value,
            target_host_pkru_value == guest_pkru_value..

the pkru status would be inconsistent..



Quan
Alibaba Cloud

> The VCPU thread will never run with anything else than
> the guest FPU state, while inside the KVM_RUN code.
>
Wanpeng Li Nov. 16, 2017, 5:54 a.m. UTC | #11
2017-11-15 22:40 GMT+08:00 Rik van Riel <riel@redhat.com>:
> On Wed, 2017-11-15 at 12:33 +0800, Wanpeng Li wrote:
>> 2017-11-15 11:03 GMT+08:00 Rik van Riel <riel@redhat.com>:
>> > On Wed, 2017-11-15 at 08:47 +0800, Wanpeng Li wrote:
>> > > 2017-11-15 5:54 GMT+08:00  <riel@redhat.com>:
>> > > > From: Rik van Riel <riel@redhat.com>
>> > > >
>> > > > Currently, every time a VCPU is scheduled out, the host kernel
>> > > > will
>> > > > first save the guest FPU/xstate context, then load the qemu
>> > > > userspace
>> > > > FPU context, only to then immediately save the qemu userspace
>> > > > FPU
>> > > > context back to memory. When scheduling in a VCPU, the same
>> > > > extraneous
>> > > > FPU loads and saves are done.
>> > > >
>> > > > This could be avoided by moving from a model where the guest
>> > > > FPU is
>> > > > loaded and stored with preemption disabled, to a model where
>> > > > the
>> > > > qemu userspace FPU is swapped out for the guest FPU context for
>> > > > the duration of the KVM_RUN ioctl.
>> > >
>> > > What will happen if CONFIG_PREEMPT is enabled?
>> >
>> > The scheduler will save the guest FPU context when a
>> > VCPU thread is preempted, and restore it when it is
>> > scheduled back in.
>>
>> I mean all the involved processes will use fpu. Before patch if
>> kernel
>> preempt occur:
>>
>> context_switch
>>   -> prepare_task_switch
>>         -> fire_sched_out_preempt_notifiers
>>               -> kvm_sched_out
>>                     -> kvm_arch_vcpu_put
>>                           -> kvm_put_guest_fpu
>>                                -> copy_fpregs_to_fpstate(&vcpu-
>> >arch.guest_fpu)
>>                                     save xsave area to guest fpu
>> buffer
>>                                -> __kernel_fpu_end
>>                                      ->
>> copy_kernel_to_fpregs(&current->thread.fpu.state)
>>                                          restore prev vCPU qemu
>> userspace FPU to the xsave area
>>   -> switch_to
>>         -> __switch_to
>>             -> switch_fpu_prepare
>>                   -> copy_fpregs_to_fpstate => save xsave area to
>> prev
>> vCPU qemu userspace FPU
>>             -> switch_fpu_finish
>>                   -> copy_kernel_to_fpgregs => restore next task FPU
>> to xsave area
>>
>>
>> After the patch:
>>
>> context_switch
>>   -> prepare_task_switch
>>         -> fire_sched_out_preempt_notifiers
>>               -> kvm_sched_out
>>
>>  -> switch_to
>>         -> __switch_to
>>             -> switch_fpu_prepare
>>                   -> copy_fpregs_to_fpstate         => Oops
>>                   save xsave area to prev vCPU qemu userspace FPU,
>> actually the guest FPU buffer is loaded in xsave area, you transmit
>> guest FPU in xsave area into the prev vCPU qemu userspace FPU
>
> When entering kvm_arch_vcpu_ioctl_run we save the qemu userspace
> FPU context in &vcpu->arch.user_fpu, and we restore that before
> leaving kvm_arch_vcpu_ioctl_run.
>
> Userspace should always see the userspace FPU context, no?

You are right.

Regards,
Wanpeng Li
Paolo Bonzini Nov. 16, 2017, 10:21 a.m. UTC | #12
On 16/11/2017 06:06, Quan Xu wrote:
> when vcpu thread is scheduled out, the pkru value in
> current->thread.fpu.state may be the host pkru value, instead of
> guest pkru value (of course, this _assumes_ that the pkru is in
> current->thread.fpu.state as well). in this way, the pkru may be a
> coner case.

Rik may correct me, but I think this is not possible.  Preemption is
disabled all the time while PKRU = guest_pkru (which is only during
vmx_vcpu_run).

Context switching will only happen in vcpu_enter_guest() after
preempt_enable() for a preemptible kernel, or in vcpu_run via
cond_resched() for a non-preemptible kernel.

Thanks,

Paolo

> 
> VM migration again, in case,
>            source_host_pkru_value != guest_pkru_value,
>            target_host_pkru_value == guest_pkru_value..
> 
> the pkru status would be inconsistent..
Quan Xu Nov. 16, 2017, 12:12 p.m. UTC | #13
On 2017-11-16 18:21, Paolo Bonzini wrote:
> On 16/11/2017 06:06, Quan Xu wrote:
>> when vcpu thread is scheduled out, the pkru value in
>> current->thread.fpu.state may be the host pkru value, instead of
>> guest pkru value (of course, this _assumes_ that the pkru is in
>> current->thread.fpu.state as well). in this way, the pkru may be a
>> coner case.
> Rik may correct me, but I think this is not possible.  Preemption is
> disabled all the time while PKRU = guest_pkru (which is only during
> vmx_vcpu_run).

refer to the following code,

vcpu_enter_guest()
{
     ...
     preempt_disable()
        ...
        kvm_x86_ops->run(vcpu);  (actually it is vmx_vcpu_run())
        ...
     ...
     preempt_enable();
}



when preempt_disable before to run kvm_x86_ops->run..
in kvm_x86_ops->run, the pkru is restored with host_pkru (IF guest_pkru 
!= host_pkru).
all this happened under preempt_disable().

it's not true that preemtion is disable all the time while PKRU  = 
guest_pkru..



However it seems there is still some gap..

as Rik said, "at context switch time, the context switch code will save 
the guest
FPU state to current->thread.fpu when the VCPU thread is scheduled out."

after preempt_enable() in vcpu_enter_guest(), the vcpu thread is 
scheduled out,
in kvm_x86_ops->run, the PKRU has been restored with host_pkru (IF 
guest_pkru != host_pkru)..
instead of guest_pkru..

then the PKRU is host_pkru, how to save guest_pkru current->thread.fpu?

as mentioned, all this _assumes_ that the pkru is in 
current->thread.fpu.state as well.


thanks,

Quan
Alibaba Cloud

> Context switching will only happen in vcpu_enter_guest() after
> preempt_enable() for a preemptible kernel, or in vcpu_run via
> cond_resched() for a non-preemptible kernel.
>
> Thanks,
>
> Paolo
>
>> VM migration again, in case,
>>             source_host_pkru_value != guest_pkru_value,
>>             target_host_pkru_value == guest_pkru_value..
>>
>> the pkru status would be inconsistent..
>
Paolo Bonzini Nov. 16, 2017, 12:18 p.m. UTC | #14
On 16/11/2017 13:12, Quan Xu wrote:
> However it seems there is still some gap..
> 
> as Rik said, "at context switch time, the context switch code will save
> the guest FPU state to current->thread.fpu when the VCPU thread is scheduled out."

By "guest FPU state" Rik means "guest FPU with host PKRU".

Guest PKRU is always stored in vcpu->arch.pkru rather than in the guest
FPU state, so guest PKRU will never be in current->thread.fpu.state either.

KVM_GET_XSAVE will the guest FPU state with vcpu->arch.pkru and
migration will work properly.

Thanks,

Paolo

> after preempt_enable() in vcpu_enter_guest(), the vcpu thread is
> scheduled out,
> in kvm_x86_ops->run, the PKRU has been restored with host_pkru (IF
> guest_pkru != host_pkru)..
> instead of guest_pkru..
> 
> then the PKRU is host_pkru, how to save guest_pkru current->thread.fpu?
> 
> as mentioned, all this _assumes_ that the pkru is in
> current->thread.fpu.state as well.
> 
> 
> thanks,
> 
> Quan
> Alibaba Cloud
> 
>> Context switching will only happen in vcpu_enter_guest() after
>> preempt_enable() for a preemptible kernel, or in vcpu_run via
>> cond_resched() for a non-preemptible kernel.
>>
>> Thanks,
>>
>> Paolo
>>
>>> VM migration again, in case,
>>>             source_host_pkru_value != guest_pkru_value,
>>>             target_host_pkru_value == guest_pkru_value..
>>>
>>> the pkru status would be inconsistent..
>>
>
Quan Xu Nov. 16, 2017, 1:35 p.m. UTC | #15
On 2017-11-16 20:18, Paolo Bonzini wrote:
> On 16/11/2017 13:12, Quan Xu wrote:
>> However it seems there is still some gap..
>>
>> as Rik said, "at context switch time, the context switch code will save
>> the guest FPU state to current->thread.fpu when the VCPU thread is scheduled out."
> By "guest FPU state" Rik means "guest FPU with host PKRU".
   :( actually it is host_pkru, just with different names..

> Guest PKRU is always stored in vcpu->arch.pkru rather than in the guest
> FPU state, so guest PKRU will never be in current->thread.fpu.state either.
>
> KVM_GET_XSAVE will the guest FPU state with vcpu->arch.pkru and
> migration will work properly.
   agreed, I fix it.. that's why I concern.. there are so much methods to
   write PKRU with host_pkru/guest_pkru..

  after migration, KVM_GET|SET_XSAVE restore the right pkru.. but we 
introduce
  another method:

       -- When the VCPU thread is scheduled back in, the context
          switch code will restore current->thread.fpu to the FPU
          registers.


there is still a window to restore current->thread.fpu to the FPU 
registers before enter guest mode and

preempt_disable().

on target machine, after migration, the pkru value is source_host_pkru 
in current->thread.fpu.

in case,

             source_host_pkru_value != guest_pkru_value,
             target_host_pkru_value == guest_pkru_value..

source_host_pkru_value may be restored to PKRU.. make pkru status inconsistent..


thanks

Quan
Alibaba Cloud
> Thanks,
>
> Paolo
>
>> after preempt_enable() in vcpu_enter_guest(), the vcpu thread is
>> scheduled out,
>> in kvm_x86_ops->run, the PKRU has been restored with host_pkru (IF
>> guest_pkru != host_pkru)..
>> instead of guest_pkru..
>>
>> then the PKRU is host_pkru, how to save guest_pkru current->thread.fpu?
>>
>> as mentioned, all this _assumes_ that the pkru is in
>> current->thread.fpu.state as well.
>>
>>
>> thanks,
>>
>> Quan
>> Alibaba Cloud
>>
>>> Context switching will only happen in vcpu_enter_guest() after
>>> preempt_enable() for a preemptible kernel, or in vcpu_run via
>>> cond_resched() for a non-preemptible kernel.
>>>
>>> Thanks,
>>>
>>> Paolo
>>>
>>>> VM migration again, in case,
>>>>              source_host_pkru_value != guest_pkru_value,
>>>>              target_host_pkru_value == guest_pkru_value..
>>>>
>>>> the pkru status would be inconsistent..
>
Paolo Bonzini Nov. 16, 2017, 1:39 p.m. UTC | #16
On 16/11/2017 14:35, Quan Xu wrote:
> but we introduce another method:
> 
>       -- When the VCPU thread is scheduled back in, the context
>          switch code will restore current->thread.fpu to the FPU
>          registers.
> 
> 
> there is still a window to restore current->thread.fpu to the FPU
> registers before enter guest mode and
> 
> preempt_disable().

That will always use the host PKRU.  The guest PKRU is _never_ visible
to the context switch code, because it's only ever used in a section
that runs with preemption disabled.

It's actually much simpler than before.

Paolo

> on target machine, after migration, the pkru value is source_host_pkru
> in current->thread.fpu.
> 
> in case,
> 
>             source_host_pkru_value != guest_pkru_value,
>             target_host_pkru_value == guest_pkru_value..
> 
> source_host_pkru_value may be restored to PKRU.. make pkru status
> inconsistent..
Paolo Bonzini Nov. 16, 2017, 5:50 p.m. UTC | #17
On 16/11/2017 15:28, Quan Xu wrote:
>  	vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
>  
> + kvm_load_guest_fpu(vcpu);
> +
>  	for (;;) {
>  		if (kvm_vcpu_running(vcpu)) {
>  			r = vcpu_enter_guest(vcpu);
> 
> <<
> 
> 
> 
>   as Rik dropped  kvm_load_guest_fpu(vcpu) in  vcpu_enter_guest() ..
>   in case still in kvm mode, how to make sure to pkru is always the
> right one before enter guest mode, not be preempted before
> preempt_disable()  after migration? :(

As you know:

1) kvm_load_guest_fpu doesn't load the guest PKRU, it keeps the
userspace PKRU.

2) the guest PKRU is only ever set in a preemption-disabled area

Thus, context switch always sees the userspace PKRU.  The guest PKRU is
only set the next time vmx_vcpu_run executes.

Paolo
Quan Xu Nov. 17, 2017, 2:54 a.m. UTC | #18
On 2017-11-17 01:50, Paolo Bonzini wrote:
> On 16/11/2017 15:28, Quan Xu wrote:
>>   	vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
>>   
>> + kvm_load_guest_fpu(vcpu);
>> +
>>   	for (;;) {
>>   		if (kvm_vcpu_running(vcpu)) {
>>   			r = vcpu_enter_guest(vcpu);
>>
>> <<
>>
>>
>>
>>    as Rik dropped  kvm_load_guest_fpu(vcpu) in  vcpu_enter_guest() ..
>>    in case still in kvm mode, how to make sure to pkru is always the
>> right one before enter guest mode, not be preempted before
>> preempt_disable()  after migration? :(
> As you know:
>
> 1) kvm_load_guest_fpu doesn't load the guest PKRU, it keeps the
> userspace PKRU.
>
> 2) the guest PKRU is only ever set in a preemption-disabled area
>
> Thus, context switch always sees the userspace PKRU.  The guest PKRU is
> only set the next time vmx_vcpu_run executes.
>
> Paolo
>

Paolo, thanks for your explanation!!:-)
Rik, could you cc me in v2?

Quan
diff mbox

Patch

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 9d7d856b2d89..ffe54958491f 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -536,7 +536,20 @@  struct kvm_vcpu_arch {
 	struct kvm_mmu_memory_cache mmu_page_cache;
 	struct kvm_mmu_memory_cache mmu_page_header_cache;
 
+	/*
+	 * QEMU userspace and the guest each have their own FPU state.
+	 * In vcpu_run, we switch between the user and guest FPU contexts.
+	 * While running a VCPU, the VCPU thread will have the guest FPU
+	 * context.
+	 *
+	 * Note that while the PKRU state lives inside the fpu registers,
+	 * it is switched out separately at VMENTER and VMEXIT time. The
+	 * "guest_fpu" state here contains the guest FPU context, with the
+	 * host PRKU bits.
+	 */
+	struct fpu user_fpu;
 	struct fpu guest_fpu;
+
 	u64 xcr0;
 	u64 guest_supported_xcr0;
 	u32 guest_xstate_size;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 03869eb7fcd6..aad5181ed4e9 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2917,7 +2917,6 @@  void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
 	srcu_read_unlock(&vcpu->kvm->srcu, idx);
 	pagefault_enable();
 	kvm_x86_ops->vcpu_put(vcpu);
-	kvm_put_guest_fpu(vcpu);
 	vcpu->arch.last_host_tsc = rdtsc();
 }
 
@@ -5228,13 +5227,10 @@  static void emulator_halt(struct x86_emulate_ctxt *ctxt)
 
 static void emulator_get_fpu(struct x86_emulate_ctxt *ctxt)
 {
-	preempt_disable();
-	kvm_load_guest_fpu(emul_to_vcpu(ctxt));
 }
 
 static void emulator_put_fpu(struct x86_emulate_ctxt *ctxt)
 {
-	preempt_enable();
 }
 
 static int emulator_intercept(struct x86_emulate_ctxt *ctxt,
@@ -6908,7 +6904,6 @@  static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 	preempt_disable();
 
 	kvm_x86_ops->prepare_guest_switch(vcpu);
-	kvm_load_guest_fpu(vcpu);
 
 	/*
 	 * Disable IRQs before setting IN_GUEST_MODE.  Posted interrupt
@@ -7255,12 +7250,14 @@  int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 		}
 	}
 
+	kvm_load_guest_fpu(vcpu);
+
 	if (unlikely(vcpu->arch.complete_userspace_io)) {
 		int (*cui)(struct kvm_vcpu *) = vcpu->arch.complete_userspace_io;
 		vcpu->arch.complete_userspace_io = NULL;
 		r = cui(vcpu);
 		if (r <= 0)
-			goto out;
+			goto out_fpu;
 	} else
 		WARN_ON(vcpu->arch.pio.count || vcpu->mmio_needed);
 
@@ -7269,6 +7266,8 @@  int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 	else
 		r = vcpu_run(vcpu);
 
+out_fpu:
+	kvm_put_guest_fpu(vcpu);
 out:
 	post_kvm_run_save(vcpu);
 	if (vcpu->sigset_active)
@@ -7663,32 +7662,25 @@  static void fx_init(struct kvm_vcpu *vcpu)
 	vcpu->arch.cr0 |= X86_CR0_ET;
 }
 
+/* Swap (qemu) user FPU context for the guest FPU context. */
 void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
 {
-	if (vcpu->guest_fpu_loaded)
-		return;
-
-	/*
-	 * Restore all possible states in the guest,
-	 * and assume host would use all available bits.
-	 * Guest xcr0 would be loaded later.
-	 */
-	vcpu->guest_fpu_loaded = 1;
-	__kernel_fpu_begin();
+	preempt_disable();
+	copy_fpregs_to_fpstate(&vcpu->arch.user_fpu);
 	/* PKRU is separately restored in kvm_x86_ops->run.  */
 	__copy_kernel_to_fpregs(&vcpu->arch.guest_fpu.state,
 				~XFEATURE_MASK_PKRU);
+	preempt_enable();
 	trace_kvm_fpu(1);
 }
 
+/* When vcpu_run ends, restore user space FPU context. */
 void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
 {
-	if (!vcpu->guest_fpu_loaded)
-		return;
-
-	vcpu->guest_fpu_loaded = 0;
+	preempt_disable();
 	copy_fpregs_to_fpstate(&vcpu->arch.guest_fpu);
-	__kernel_fpu_end();
+	copy_kernel_to_fpregs(&vcpu->arch.user_fpu.state);
+	preempt_enable();
 	++vcpu->stat.fpu_reload;
 	trace_kvm_fpu(0);
 }
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 6882538eda32..354608487b8d 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -232,7 +232,7 @@  struct kvm_vcpu {
 	struct mutex mutex;
 	struct kvm_run *run;
 
-	int guest_fpu_loaded, guest_xcr0_loaded;
+	int guest_xcr0_loaded;
 	struct swait_queue_head wq;
 	struct pid __rcu *pid;
 	int sigset_active;