diff mbox series

KVM: arm64: Don't save FP traps in default cptr_el2 value

Message ID 20241112105032.793274-1-james.clark@linaro.org (mailing list archive)
State New
Headers show
Series KVM: arm64: Don't save FP traps in default cptr_el2 value | expand

Commit Message

James Clark Nov. 12, 2024, 10:50 a.m. UTC
kvm_get_reset_cptr_el2() is called at vcpu init before the vcpu is
loaded. Since the linked commit, the fp state was moved from the vcpu to
host data but it shouldn't be accessed at this point.

Move the bits that require guest_owns_fp_regs() out of the default value
and into just before they're used in activate and deactivate traps. This
fixes the following bug when nvhe && vcpu_has_sve() == true:

 BUG: using smp_processor_id() in preemptible [00000000] code: lkvm/118
 caller is debug_smp_processor_id+0x20/0x30
 CPU: 0 UID: 0 PID: 118 Comm: lkvm Not tainted 6.12.0-rc1+ #35
 Hardware name: FVP Base RevC (DT)
 Call trace:
  dump_backtrace+0xfc/0x120
  show_stack+0x24/0x38
  dump_stack_lvl+0x3c/0x98
  dump_stack+0x18/0x28
  check_preemption_disabled+0xe0/0xe8
  debug_smp_processor_id+0x20/0x30
  guest_owns_fp_regs+0x1c/0xb0
  kvm_arch_vcpu_ioctl+0xcfc/0xe10
  kvm_vcpu_ioctl+0x6c4/0x8a0
  __arm64_sys_ioctl+0x9c/0xe0
  invoke_syscall+0x4c/0x110
  el0_svc_common+0xb8/0xf0
  do_el0_svc+0x28/0x40
  el0_svc+0x4c/0xc0
  el0t_64_sync_handler+0x84/0x100
  el0t_64_sync+0x190/0x198

Fixes: 5294afdbf45a ("KVM: arm64: Exclude FP ownership from kvm_vcpu_arch")
Signed-off-by: James Clark <james.clark@linaro.org>
---

I'm only mildly confident that the logic here is equivalent to before.
Someone with a bit more context about the FP stuff can say, or if there
is a neater way to fix this issue altogether.

 arch/arm64/include/asm/kvm_emulate.h | 15 +++++++++------
 arch/arm64/kvm/hyp/nvhe/switch.c     |  3 ++-
 2 files changed, 11 insertions(+), 7 deletions(-)

Comments

Marc Zyngier Nov. 12, 2024, 3:01 p.m. UTC | #1
On Tue, 12 Nov 2024 10:50:31 +0000,
James Clark <james.clark@linaro.org> wrote:
> 
> kvm_get_reset_cptr_el2() is called at vcpu init before the vcpu is
> loaded. Since the linked commit, the fp state was moved from the vcpu to
> host data but it shouldn't be accessed at this point.
> 
> Move the bits that require guest_owns_fp_regs() out of the default value
> and into just before they're used in activate and deactivate traps. This
> fixes the following bug when nvhe && vcpu_has_sve() == true:
> 
>  BUG: using smp_processor_id() in preemptible [00000000] code: lkvm/118
>  caller is debug_smp_processor_id+0x20/0x30
>  CPU: 0 UID: 0 PID: 118 Comm: lkvm Not tainted 6.12.0-rc1+ #35
>  Hardware name: FVP Base RevC (DT)
>  Call trace:
>   dump_backtrace+0xfc/0x120
>   show_stack+0x24/0x38
>   dump_stack_lvl+0x3c/0x98
>   dump_stack+0x18/0x28
>   check_preemption_disabled+0xe0/0xe8
>   debug_smp_processor_id+0x20/0x30
>   guest_owns_fp_regs+0x1c/0xb0
>   kvm_arch_vcpu_ioctl+0xcfc/0xe10
>   kvm_vcpu_ioctl+0x6c4/0x8a0
>   __arm64_sys_ioctl+0x9c/0xe0
>   invoke_syscall+0x4c/0x110
>   el0_svc_common+0xb8/0xf0
>   do_el0_svc+0x28/0x40
>   el0_svc+0x4c/0xc0
>   el0t_64_sync_handler+0x84/0x100
>   el0t_64_sync+0x190/0x198
> 
> Fixes: 5294afdbf45a ("KVM: arm64: Exclude FP ownership from kvm_vcpu_arch")
> Signed-off-by: James Clark <james.clark@linaro.org>
> ---
> 
> I'm only mildly confident that the logic here is equivalent to before.
> Someone with a bit more context about the FP stuff can say, or if there
> is a neater way to fix this issue altogether.
> 
>  arch/arm64/include/asm/kvm_emulate.h | 15 +++++++++------
>  arch/arm64/kvm/hyp/nvhe/switch.c     |  3 ++-
>  2 files changed, 11 insertions(+), 7 deletions(-)
> 
> diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
> index cf811009a33c..0eefb9fb08a0 100644
> --- a/arch/arm64/include/asm/kvm_emulate.h
> +++ b/arch/arm64/include/asm/kvm_emulate.h
> @@ -629,16 +629,12 @@ static __always_inline u64 kvm_get_reset_cptr_el2(struct kvm_vcpu *vcpu)
>  			val |= CPACR_EL1_SMEN_EL1EN;
>  	} else if (has_hvhe()) {
>  		val = CPACR_ELx_FPEN;
> -
> -		if (!vcpu_has_sve(vcpu) || !guest_owns_fp_regs())
> +		if (!vcpu_has_sve(vcpu))
>  			val |= CPACR_ELx_ZEN;
>  		if (cpus_have_final_cap(ARM64_SME))
>  			val |= CPACR_ELx_SMEN;
>  	} else {
>  		val = CPTR_NVHE_EL2_RES1;
> -
> -		if (vcpu_has_sve(vcpu) && guest_owns_fp_regs())
> -			val |= CPTR_EL2_TZ;
>  		if (cpus_have_final_cap(ARM64_SME))
>  			val &= ~CPTR_EL2_TSM;
>  	}
> @@ -648,8 +644,15 @@ static __always_inline u64 kvm_get_reset_cptr_el2(struct kvm_vcpu *vcpu)
>  
>  static __always_inline void kvm_reset_cptr_el2(struct kvm_vcpu *vcpu)
>  {
> -	u64 val = kvm_get_reset_cptr_el2(vcpu);
> +	u64 val = vcpu->arch.cptr_el2;
>  
> +	if (has_hvhe()) {
> +		if (!guest_owns_fp_regs())
> +			val |= CPACR_ELx_ZEN;
> +	} else if (!has_vhe()) {
> +		if (vcpu_has_sve(vcpu) && guest_owns_fp_regs())
> +			val |= CPTR_EL2_TZ;
> +	}
>  	kvm_write_cptr_el2(val);
>  }
>  
> diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c
> index cc69106734ca..296c4155e1fc 100644
> --- a/arch/arm64/kvm/hyp/nvhe/switch.c
> +++ b/arch/arm64/kvm/hyp/nvhe/switch.c
> @@ -60,7 +60,8 @@ static void __activate_traps(struct kvm_vcpu *vcpu)
>  			val |= CPTR_EL2_TFP | CPTR_EL2_TZ;
>  
>  		__activate_traps_fpsimd32(vcpu);
> -	}
> +	} else if (!has_hvhe() && vcpu_has_sve(vcpu))
> +		val |= CPTR_EL2_TZ;
>  
>  	kvm_write_cptr_el2(val);
>  	write_sysreg(__this_cpu_read(kvm_hyp_vector), vbar_el2);

I think this is papering over the real issue, which is that we
conflate reset value for the host and what is required for the guest
to run.

CPTR_EL2 is state-dependent, as you found out. And that really only
means one single thing: it cannot be initialised outside of the vcpu
being either loaded or run, both of which require being in a
non-preemptible section.

There is also another thing: VHE rebuilds the guest's CPTR_EL2 view
from scratch, while the nVHE takes the saved state, mutates it in
funny ways before applying it, and pKVM does all sorts of interesting
manipulations before hitting the nVHE code.

What I would really like to see is:

- when entering the guest, we recompute the run-time value of CPTR_EL2
  from scratch, just like VHE does.

- when exiting the guest, we reset the value using the current helper,
  which takes the guest state into account.

- pKVM should be converted to using the plain nVHE code.

- vcpu->arch.cptr_el2 should be killed.

I think Fuad has already started on some of that. Fuad, do you mind
adding that to your current rework and post something shortly?

Thanks,

	M.
Fuad Tabba Nov. 12, 2024, 3:11 p.m. UTC | #2
Hi Marc and James,

On Tue, 12 Nov 2024 at 15:01, Marc Zyngier <maz@kernel.org> wrote:
>
> On Tue, 12 Nov 2024 10:50:31 +0000,
> James Clark <james.clark@linaro.org> wrote:
> >
> > kvm_get_reset_cptr_el2() is called at vcpu init before the vcpu is
> > loaded. Since the linked commit, the fp state was moved from the vcpu to
> > host data but it shouldn't be accessed at this point.
> >
> > Move the bits that require guest_owns_fp_regs() out of the default value
> > and into just before they're used in activate and deactivate traps. This
> > fixes the following bug when nvhe && vcpu_has_sve() == true:
> >
> >  BUG: using smp_processor_id() in preemptible [00000000] code: lkvm/118
> >  caller is debug_smp_processor_id+0x20/0x30
> >  CPU: 0 UID: 0 PID: 118 Comm: lkvm Not tainted 6.12.0-rc1+ #35
> >  Hardware name: FVP Base RevC (DT)
> >  Call trace:
> >   dump_backtrace+0xfc/0x120
> >   show_stack+0x24/0x38
> >   dump_stack_lvl+0x3c/0x98
> >   dump_stack+0x18/0x28
> >   check_preemption_disabled+0xe0/0xe8
> >   debug_smp_processor_id+0x20/0x30
> >   guest_owns_fp_regs+0x1c/0xb0
> >   kvm_arch_vcpu_ioctl+0xcfc/0xe10
> >   kvm_vcpu_ioctl+0x6c4/0x8a0
> >   __arm64_sys_ioctl+0x9c/0xe0
> >   invoke_syscall+0x4c/0x110
> >   el0_svc_common+0xb8/0xf0
> >   do_el0_svc+0x28/0x40
> >   el0_svc+0x4c/0xc0
> >   el0t_64_sync_handler+0x84/0x100
> >   el0t_64_sync+0x190/0x198
> >
> > Fixes: 5294afdbf45a ("KVM: arm64: Exclude FP ownership from kvm_vcpu_arch")
> > Signed-off-by: James Clark <james.clark@linaro.org>
> > ---
> >
> > I'm only mildly confident that the logic here is equivalent to before.
> > Someone with a bit more context about the FP stuff can say, or if there
> > is a neater way to fix this issue altogether.
> >
> >  arch/arm64/include/asm/kvm_emulate.h | 15 +++++++++------
> >  arch/arm64/kvm/hyp/nvhe/switch.c     |  3 ++-
> >  2 files changed, 11 insertions(+), 7 deletions(-)
> >
> > diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
> > index cf811009a33c..0eefb9fb08a0 100644
> > --- a/arch/arm64/include/asm/kvm_emulate.h
> > +++ b/arch/arm64/include/asm/kvm_emulate.h
> > @@ -629,16 +629,12 @@ static __always_inline u64 kvm_get_reset_cptr_el2(struct kvm_vcpu *vcpu)
> >                       val |= CPACR_EL1_SMEN_EL1EN;
> >       } else if (has_hvhe()) {
> >               val = CPACR_ELx_FPEN;
> > -
> > -             if (!vcpu_has_sve(vcpu) || !guest_owns_fp_regs())
> > +             if (!vcpu_has_sve(vcpu))
> >                       val |= CPACR_ELx_ZEN;
> >               if (cpus_have_final_cap(ARM64_SME))
> >                       val |= CPACR_ELx_SMEN;
> >       } else {
> >               val = CPTR_NVHE_EL2_RES1;
> > -
> > -             if (vcpu_has_sve(vcpu) && guest_owns_fp_regs())
> > -                     val |= CPTR_EL2_TZ;
> >               if (cpus_have_final_cap(ARM64_SME))
> >                       val &= ~CPTR_EL2_TSM;
> >       }
> > @@ -648,8 +644,15 @@ static __always_inline u64 kvm_get_reset_cptr_el2(struct kvm_vcpu *vcpu)
> >
> >  static __always_inline void kvm_reset_cptr_el2(struct kvm_vcpu *vcpu)
> >  {
> > -     u64 val = kvm_get_reset_cptr_el2(vcpu);
> > +     u64 val = vcpu->arch.cptr_el2;
> >
> > +     if (has_hvhe()) {
> > +             if (!guest_owns_fp_regs())
> > +                     val |= CPACR_ELx_ZEN;
> > +     } else if (!has_vhe()) {
> > +             if (vcpu_has_sve(vcpu) && guest_owns_fp_regs())
> > +                     val |= CPTR_EL2_TZ;
> > +     }
> >       kvm_write_cptr_el2(val);
> >  }
> >
> > diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c
> > index cc69106734ca..296c4155e1fc 100644
> > --- a/arch/arm64/kvm/hyp/nvhe/switch.c
> > +++ b/arch/arm64/kvm/hyp/nvhe/switch.c
> > @@ -60,7 +60,8 @@ static void __activate_traps(struct kvm_vcpu *vcpu)
> >                       val |= CPTR_EL2_TFP | CPTR_EL2_TZ;
> >
> >               __activate_traps_fpsimd32(vcpu);
> > -     }
> > +     } else if (!has_hvhe() && vcpu_has_sve(vcpu))
> > +             val |= CPTR_EL2_TZ;
> >
> >       kvm_write_cptr_el2(val);
> >       write_sysreg(__this_cpu_read(kvm_hyp_vector), vbar_el2);
>
> I think this is papering over the real issue, which is that we
> conflate reset value for the host and what is required for the guest
> to run.
>
> CPTR_EL2 is state-dependent, as you found out. And that really only
> means one single thing: it cannot be initialised outside of the vcpu
> being either loaded or run, both of which require being in a
> non-preemptible section.
>
> There is also another thing: VHE rebuilds the guest's CPTR_EL2 view
> from scratch, while the nVHE takes the saved state, mutates it in
> funny ways before applying it, and pKVM does all sorts of interesting
> manipulations before hitting the nVHE code.
>
> What I would really like to see is:
>
> - when entering the guest, we recompute the run-time value of CPTR_EL2
>   from scratch, just like VHE does.
>
> - when exiting the guest, we reset the value using the current helper,
>   which takes the guest state into account.
>
> - pKVM should be converted to using the plain nVHE code.
>
> - vcpu->arch.cptr_el2 should be killed.
>
> I think Fuad has already started on some of that. Fuad, do you mind
> adding that to your current rework and post something shortly?

Yes, this is a bit of a mess currently. I've been reworking it as part
of the work on pKVM. I'll post something shortly that will make it
more inline with how VHE handles this.

Cheers,
/fuad

> Thanks,
>
>         M.
>
> --
> Without deviation from the norm, progress is not possible.
diff mbox series

Patch

diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
index cf811009a33c..0eefb9fb08a0 100644
--- a/arch/arm64/include/asm/kvm_emulate.h
+++ b/arch/arm64/include/asm/kvm_emulate.h
@@ -629,16 +629,12 @@  static __always_inline u64 kvm_get_reset_cptr_el2(struct kvm_vcpu *vcpu)
 			val |= CPACR_EL1_SMEN_EL1EN;
 	} else if (has_hvhe()) {
 		val = CPACR_ELx_FPEN;
-
-		if (!vcpu_has_sve(vcpu) || !guest_owns_fp_regs())
+		if (!vcpu_has_sve(vcpu))
 			val |= CPACR_ELx_ZEN;
 		if (cpus_have_final_cap(ARM64_SME))
 			val |= CPACR_ELx_SMEN;
 	} else {
 		val = CPTR_NVHE_EL2_RES1;
-
-		if (vcpu_has_sve(vcpu) && guest_owns_fp_regs())
-			val |= CPTR_EL2_TZ;
 		if (cpus_have_final_cap(ARM64_SME))
 			val &= ~CPTR_EL2_TSM;
 	}
@@ -648,8 +644,15 @@  static __always_inline u64 kvm_get_reset_cptr_el2(struct kvm_vcpu *vcpu)
 
 static __always_inline void kvm_reset_cptr_el2(struct kvm_vcpu *vcpu)
 {
-	u64 val = kvm_get_reset_cptr_el2(vcpu);
+	u64 val = vcpu->arch.cptr_el2;
 
+	if (has_hvhe()) {
+		if (!guest_owns_fp_regs())
+			val |= CPACR_ELx_ZEN;
+	} else if (!has_vhe()) {
+		if (vcpu_has_sve(vcpu) && guest_owns_fp_regs())
+			val |= CPTR_EL2_TZ;
+	}
 	kvm_write_cptr_el2(val);
 }
 
diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c
index cc69106734ca..296c4155e1fc 100644
--- a/arch/arm64/kvm/hyp/nvhe/switch.c
+++ b/arch/arm64/kvm/hyp/nvhe/switch.c
@@ -60,7 +60,8 @@  static void __activate_traps(struct kvm_vcpu *vcpu)
 			val |= CPTR_EL2_TFP | CPTR_EL2_TZ;
 
 		__activate_traps_fpsimd32(vcpu);
-	}
+	} else if (!has_hvhe() && vcpu_has_sve(vcpu))
+		val |= CPTR_EL2_TZ;
 
 	kvm_write_cptr_el2(val);
 	write_sysreg(__this_cpu_read(kvm_hyp_vector), vbar_el2);