diff mbox series

[RFC,v7,44/64] KVM: SVM: Remove the long-lived GHCB host map

Message ID 20221214194056.161492-45-michael.roth@amd.com (mailing list archive)
State New
Headers show
Series Add AMD Secure Nested Paging (SEV-SNP) Hypervisor Support | expand

Commit Message

Michael Roth Dec. 14, 2022, 7:40 p.m. UTC
From: Brijesh Singh <brijesh.singh@amd.com>

On VMGEXIT, sev_handle_vmgexit() creates a host mapping for the GHCB GPA,
and unmaps it just before VM-entry. This long-lived GHCB map is used by
the VMGEXIT handler through accessors such as ghcb_{set_get}_xxx().

A long-lived GHCB map can cause issue when SEV-SNP is enabled. When
SEV-SNP is enabled the mapped GPA needs to be protected against a page
state change.

To eliminate the long-lived GHCB mapping, update the GHCB sync operations
to explicitly map the GHCB before access and unmap it after access is
complete. This requires that the setting of the GHCBs sw_exit_info_{1,2}
fields be done during sev_es_sync_to_ghcb(), so create two new fields in
the vcpu_svm struct to hold these values when required to be set outside
of the GHCB mapping.

Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
Signed-off-by: Ashish Kalra <ashish.kalra@amd.com>
[mdr: defer per_cpu() assignment and order it with barrier() to fix case
      where kvm_vcpu_map() causes reschedule on different CPU]
Signed-off-by: Michael Roth <michael.roth@amd.com>
---
 arch/x86/kvm/svm/sev.c | 131 ++++++++++++++++++++++++++---------------
 arch/x86/kvm/svm/svm.c |  18 +++---
 arch/x86/kvm/svm/svm.h |  24 +++++++-
 3 files changed, 116 insertions(+), 57 deletions(-)

Comments

Jeremi Piotrowski Jan. 18, 2023, 3:27 p.m. UTC | #1
On Wed, Dec 14, 2022 at 01:40:36PM -0600, Michael Roth wrote:
> From: Brijesh Singh <brijesh.singh@amd.com>
> 
> On VMGEXIT, sev_handle_vmgexit() creates a host mapping for the GHCB GPA,
> and unmaps it just before VM-entry. This long-lived GHCB map is used by
> the VMGEXIT handler through accessors such as ghcb_{set_get}_xxx().
> 
> A long-lived GHCB map can cause issue when SEV-SNP is enabled. When
> SEV-SNP is enabled the mapped GPA needs to be protected against a page
> state change.
> 
> To eliminate the long-lived GHCB mapping, update the GHCB sync operations
> to explicitly map the GHCB before access and unmap it after access is
> complete. This requires that the setting of the GHCBs sw_exit_info_{1,2}
> fields be done during sev_es_sync_to_ghcb(), so create two new fields in
> the vcpu_svm struct to hold these values when required to be set outside
> of the GHCB mapping.
> 
> Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
> Signed-off-by: Ashish Kalra <ashish.kalra@amd.com>
> [mdr: defer per_cpu() assignment and order it with barrier() to fix case
>       where kvm_vcpu_map() causes reschedule on different CPU]
> Signed-off-by: Michael Roth <michael.roth@amd.com>
> ---
>  arch/x86/kvm/svm/sev.c | 131 ++++++++++++++++++++++++++---------------
>  arch/x86/kvm/svm/svm.c |  18 +++---
>  arch/x86/kvm/svm/svm.h |  24 +++++++-
>  3 files changed, 116 insertions(+), 57 deletions(-)
> 
> diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
> index d5c6e48055fb..6ac0cb6e3484 100644
> --- a/arch/x86/kvm/svm/sev.c
> +++ b/arch/x86/kvm/svm/sev.c
> @@ -2921,15 +2921,40 @@ void sev_free_vcpu(struct kvm_vcpu *vcpu)
>  	kvfree(svm->sev_es.ghcb_sa);
>  }
>  
> +static inline int svm_map_ghcb(struct vcpu_svm *svm, struct kvm_host_map *map)
> +{
> +	struct vmcb_control_area *control = &svm->vmcb->control;
> +	u64 gfn = gpa_to_gfn(control->ghcb_gpa);
> +
> +	if (kvm_vcpu_map(&svm->vcpu, gfn, map)) {
> +		/* Unable to map GHCB from guest */
> +		pr_err("error mapping GHCB GFN [%#llx] from guest\n", gfn);
> +		return -EFAULT;
> +	}
> +
> +	return 0;
> +}
> +
> +static inline void svm_unmap_ghcb(struct vcpu_svm *svm, struct kvm_host_map *map)
> +{
> +	kvm_vcpu_unmap(&svm->vcpu, map, true);
> +}
> +
>  static void dump_ghcb(struct vcpu_svm *svm)
>  {
> -	struct ghcb *ghcb = svm->sev_es.ghcb;
> +	struct kvm_host_map map;
>  	unsigned int nbits;
> +	struct ghcb *ghcb;
> +
> +	if (svm_map_ghcb(svm, &map))
> +		return;
> +
> +	ghcb = map.hva;

dump_ghcb() is called from sev_es_validate_vmgexit() with the ghcb already
mapped. How about passing 'struct kvm_host_map *' (or struct ghcb *) as a
param to avoid double mapping?

>  
>  	/* Re-use the dump_invalid_vmcb module parameter */
>  	if (!dump_invalid_vmcb) {
>  		pr_warn_ratelimited("set kvm_amd.dump_invalid_vmcb=1 to dump internal KVM state.\n");
> -		return;
> +		goto e_unmap;
>  	}
>  
>  	nbits = sizeof(ghcb->save.valid_bitmap) * 8;
> @@ -2944,12 +2969,21 @@ static void dump_ghcb(struct vcpu_svm *svm)
>  	pr_err("%-20s%016llx is_valid: %u\n", "sw_scratch",
>  	       ghcb->save.sw_scratch, ghcb_sw_scratch_is_valid(ghcb));
>  	pr_err("%-20s%*pb\n", "valid_bitmap", nbits, ghcb->save.valid_bitmap);
> +
> +e_unmap:
> +	svm_unmap_ghcb(svm, &map);
>  }
>  
> -static void sev_es_sync_to_ghcb(struct vcpu_svm *svm)
> +static bool sev_es_sync_to_ghcb(struct vcpu_svm *svm)
>  {
>  	struct kvm_vcpu *vcpu = &svm->vcpu;
> -	struct ghcb *ghcb = svm->sev_es.ghcb;
> +	struct kvm_host_map map;
> +	struct ghcb *ghcb;
> +
> +	if (svm_map_ghcb(svm, &map))
> +		return false;
> +
> +	ghcb = map.hva;
>  
>  	/*
>  	 * The GHCB protocol so far allows for the following data
> @@ -2963,13 +2997,24 @@ static void sev_es_sync_to_ghcb(struct vcpu_svm *svm)
>  	ghcb_set_rbx(ghcb, vcpu->arch.regs[VCPU_REGS_RBX]);
>  	ghcb_set_rcx(ghcb, vcpu->arch.regs[VCPU_REGS_RCX]);
>  	ghcb_set_rdx(ghcb, vcpu->arch.regs[VCPU_REGS_RDX]);
> +
> +	/*
> +	 * Copy the return values from the exit_info_{1,2}.
> +	 */
> +	ghcb_set_sw_exit_info_1(ghcb, svm->sev_es.ghcb_sw_exit_info_1);
> +	ghcb_set_sw_exit_info_2(ghcb, svm->sev_es.ghcb_sw_exit_info_2);
> +
> +	trace_kvm_vmgexit_exit(svm->vcpu.vcpu_id, ghcb);
> +
> +	svm_unmap_ghcb(svm, &map);
> +
> +	return true;
>  }
>  
> -static void sev_es_sync_from_ghcb(struct vcpu_svm *svm)
> +static void sev_es_sync_from_ghcb(struct vcpu_svm *svm, struct ghcb *ghcb)
>  {
>  	struct vmcb_control_area *control = &svm->vmcb->control;
>  	struct kvm_vcpu *vcpu = &svm->vcpu;
> -	struct ghcb *ghcb = svm->sev_es.ghcb;
>  	u64 exit_code;
>  
>  	/*
> @@ -3013,20 +3058,25 @@ static void sev_es_sync_from_ghcb(struct vcpu_svm *svm)
>  	memset(ghcb->save.valid_bitmap, 0, sizeof(ghcb->save.valid_bitmap));
>  }
>  
> -static int sev_es_validate_vmgexit(struct vcpu_svm *svm)
> +static int sev_es_validate_vmgexit(struct vcpu_svm *svm, u64 *exit_code)
>  {
> -	struct kvm_vcpu *vcpu;
> +	struct kvm_vcpu *vcpu = &svm->vcpu;
> +	struct kvm_host_map map;
>  	struct ghcb *ghcb;
> -	u64 exit_code;
>  	u64 reason;
>  
> -	ghcb = svm->sev_es.ghcb;
> +	if (svm_map_ghcb(svm, &map))
> +		return -EFAULT;
> +
> +	ghcb = map.hva;
> +
> +	trace_kvm_vmgexit_enter(vcpu->vcpu_id, ghcb);
>  
>  	/*
>  	 * Retrieve the exit code now even though it may not be marked valid
>  	 * as it could help with debugging.
>  	 */
> -	exit_code = ghcb_get_sw_exit_code(ghcb);
> +	*exit_code = ghcb_get_sw_exit_code(ghcb);
>  
>  	/* Only GHCB Usage code 0 is supported */
>  	if (ghcb->ghcb_usage) {
> @@ -3119,6 +3169,9 @@ static int sev_es_validate_vmgexit(struct vcpu_svm *svm)
>  		goto vmgexit_err;
>  	}
>  
> +	sev_es_sync_from_ghcb(svm, ghcb);
> +
> +	svm_unmap_ghcb(svm, &map);
>  	return 0;
>  
>  vmgexit_err:
> @@ -3129,10 +3182,10 @@ static int sev_es_validate_vmgexit(struct vcpu_svm *svm)
>  			    ghcb->ghcb_usage);
>  	} else if (reason == GHCB_ERR_INVALID_EVENT) {
>  		vcpu_unimpl(vcpu, "vmgexit: exit code %#llx is not valid\n",
> -			    exit_code);
> +			    *exit_code);
>  	} else {
>  		vcpu_unimpl(vcpu, "vmgexit: exit code %#llx input is not valid\n",
> -			    exit_code);
> +			    *exit_code);
>  		dump_ghcb(svm);
>  	}
>  
> @@ -3142,6 +3195,8 @@ static int sev_es_validate_vmgexit(struct vcpu_svm *svm)
>  	ghcb_set_sw_exit_info_1(ghcb, 2);
>  	ghcb_set_sw_exit_info_2(ghcb, reason);
>  
> +	svm_unmap_ghcb(svm, &map);
> +
>  	/* Resume the guest to "return" the error code. */
>  	return 1;
>  }
Alper Gun Jan. 18, 2023, 6:15 p.m. UTC | #2
On Wed, Jan 18, 2023 at 7:27 AM Jeremi Piotrowski
<jpiotrowski@linux.microsoft.com> wrote:
>
> On Wed, Dec 14, 2022 at 01:40:36PM -0600, Michael Roth wrote:
> > From: Brijesh Singh <brijesh.singh@amd.com>
> >
> > On VMGEXIT, sev_handle_vmgexit() creates a host mapping for the GHCB GPA,
> > and unmaps it just before VM-entry. This long-lived GHCB map is used by
> > the VMGEXIT handler through accessors such as ghcb_{set_get}_xxx().
> >
> > A long-lived GHCB map can cause issue when SEV-SNP is enabled. When
> > SEV-SNP is enabled the mapped GPA needs to be protected against a page
> > state change.
> >
> > To eliminate the long-lived GHCB mapping, update the GHCB sync operations
> > to explicitly map the GHCB before access and unmap it after access is
> > complete. This requires that the setting of the GHCBs sw_exit_info_{1,2}
> > fields be done during sev_es_sync_to_ghcb(), so create two new fields in
> > the vcpu_svm struct to hold these values when required to be set outside
> > of the GHCB mapping.
> >
> > Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
> > Signed-off-by: Ashish Kalra <ashish.kalra@amd.com>
> > [mdr: defer per_cpu() assignment and order it with barrier() to fix case
> >       where kvm_vcpu_map() causes reschedule on different CPU]
> > Signed-off-by: Michael Roth <michael.roth@amd.com>
> > ---
> >  arch/x86/kvm/svm/sev.c | 131 ++++++++++++++++++++++++++---------------
> >  arch/x86/kvm/svm/svm.c |  18 +++---
> >  arch/x86/kvm/svm/svm.h |  24 +++++++-
> >  3 files changed, 116 insertions(+), 57 deletions(-)
> >
> > diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
> > index d5c6e48055fb..6ac0cb6e3484 100644
> > --- a/arch/x86/kvm/svm/sev.c
> > +++ b/arch/x86/kvm/svm/sev.c
> > @@ -2921,15 +2921,40 @@ void sev_free_vcpu(struct kvm_vcpu *vcpu)
> >       kvfree(svm->sev_es.ghcb_sa);
> >  }
> >
> > +static inline int svm_map_ghcb(struct vcpu_svm *svm, struct kvm_host_map *map)
> > +{
> > +     struct vmcb_control_area *control = &svm->vmcb->control;
> > +     u64 gfn = gpa_to_gfn(control->ghcb_gpa);
> > +
> > +     if (kvm_vcpu_map(&svm->vcpu, gfn, map)) {
> > +             /* Unable to map GHCB from guest */
> > +             pr_err("error mapping GHCB GFN [%#llx] from guest\n", gfn);
> > +             return -EFAULT;
> > +     }
> > +
> > +     return 0;
> > +}
> > +
> > +static inline void svm_unmap_ghcb(struct vcpu_svm *svm, struct kvm_host_map *map)
> > +{
> > +     kvm_vcpu_unmap(&svm->vcpu, map, true);
> > +}
> > +
> >  static void dump_ghcb(struct vcpu_svm *svm)
> >  {
> > -     struct ghcb *ghcb = svm->sev_es.ghcb;
> > +     struct kvm_host_map map;
> >       unsigned int nbits;
> > +     struct ghcb *ghcb;
> > +
> > +     if (svm_map_ghcb(svm, &map))
> > +             return;
> > +
> > +     ghcb = map.hva;
>
> dump_ghcb() is called from sev_es_validate_vmgexit() with the ghcb already
> mapped. How about passing 'struct kvm_host_map *' (or struct ghcb *) as a
> param to avoid double mapping?

This also causes a soft lockup, PSC spin lock is already acquired in
sev_es_validate_vmgexit. dump_ghcb will try to acquire the same lock
again. So a guest can send an invalid ghcb page  and cause a host soft
lockup.

>
> >
> >       /* Re-use the dump_invalid_vmcb module parameter */
> >       if (!dump_invalid_vmcb) {
> >               pr_warn_ratelimited("set kvm_amd.dump_invalid_vmcb=1 to dump internal KVM state.\n");
> > -             return;
> > +             goto e_unmap;
> >       }
> >
> >       nbits = sizeof(ghcb->save.valid_bitmap) * 8;
> > @@ -2944,12 +2969,21 @@ static void dump_ghcb(struct vcpu_svm *svm)
> >       pr_err("%-20s%016llx is_valid: %u\n", "sw_scratch",
> >              ghcb->save.sw_scratch, ghcb_sw_scratch_is_valid(ghcb));
> >       pr_err("%-20s%*pb\n", "valid_bitmap", nbits, ghcb->save.valid_bitmap);
> > +
> > +e_unmap:
> > +     svm_unmap_ghcb(svm, &map);
> >  }
> >
> > -static void sev_es_sync_to_ghcb(struct vcpu_svm *svm)
> > +static bool sev_es_sync_to_ghcb(struct vcpu_svm *svm)
> >  {
> >       struct kvm_vcpu *vcpu = &svm->vcpu;
> > -     struct ghcb *ghcb = svm->sev_es.ghcb;
> > +     struct kvm_host_map map;
> > +     struct ghcb *ghcb;
> > +
> > +     if (svm_map_ghcb(svm, &map))
> > +             return false;
> > +
> > +     ghcb = map.hva;
> >
> >       /*
> >        * The GHCB protocol so far allows for the following data
> > @@ -2963,13 +2997,24 @@ static void sev_es_sync_to_ghcb(struct vcpu_svm *svm)
> >       ghcb_set_rbx(ghcb, vcpu->arch.regs[VCPU_REGS_RBX]);
> >       ghcb_set_rcx(ghcb, vcpu->arch.regs[VCPU_REGS_RCX]);
> >       ghcb_set_rdx(ghcb, vcpu->arch.regs[VCPU_REGS_RDX]);
> > +
> > +     /*
> > +      * Copy the return values from the exit_info_{1,2}.
> > +      */
> > +     ghcb_set_sw_exit_info_1(ghcb, svm->sev_es.ghcb_sw_exit_info_1);
> > +     ghcb_set_sw_exit_info_2(ghcb, svm->sev_es.ghcb_sw_exit_info_2);
> > +
> > +     trace_kvm_vmgexit_exit(svm->vcpu.vcpu_id, ghcb);
> > +
> > +     svm_unmap_ghcb(svm, &map);
> > +
> > +     return true;
> >  }
> >
> > -static void sev_es_sync_from_ghcb(struct vcpu_svm *svm)
> > +static void sev_es_sync_from_ghcb(struct vcpu_svm *svm, struct ghcb *ghcb)
> >  {
> >       struct vmcb_control_area *control = &svm->vmcb->control;
> >       struct kvm_vcpu *vcpu = &svm->vcpu;
> > -     struct ghcb *ghcb = svm->sev_es.ghcb;
> >       u64 exit_code;
> >
> >       /*
> > @@ -3013,20 +3058,25 @@ static void sev_es_sync_from_ghcb(struct vcpu_svm *svm)
> >       memset(ghcb->save.valid_bitmap, 0, sizeof(ghcb->save.valid_bitmap));
> >  }
> >
> > -static int sev_es_validate_vmgexit(struct vcpu_svm *svm)
> > +static int sev_es_validate_vmgexit(struct vcpu_svm *svm, u64 *exit_code)
> >  {
> > -     struct kvm_vcpu *vcpu;
> > +     struct kvm_vcpu *vcpu = &svm->vcpu;
> > +     struct kvm_host_map map;
> >       struct ghcb *ghcb;
> > -     u64 exit_code;
> >       u64 reason;
> >
> > -     ghcb = svm->sev_es.ghcb;
> > +     if (svm_map_ghcb(svm, &map))
> > +             return -EFAULT;
> > +
> > +     ghcb = map.hva;
> > +
> > +     trace_kvm_vmgexit_enter(vcpu->vcpu_id, ghcb);
> >
> >       /*
> >        * Retrieve the exit code now even though it may not be marked valid
> >        * as it could help with debugging.
> >        */
> > -     exit_code = ghcb_get_sw_exit_code(ghcb);
> > +     *exit_code = ghcb_get_sw_exit_code(ghcb);
> >
> >       /* Only GHCB Usage code 0 is supported */
> >       if (ghcb->ghcb_usage) {
> > @@ -3119,6 +3169,9 @@ static int sev_es_validate_vmgexit(struct vcpu_svm *svm)
> >               goto vmgexit_err;
> >       }
> >
> > +     sev_es_sync_from_ghcb(svm, ghcb);
> > +
> > +     svm_unmap_ghcb(svm, &map);
> >       return 0;
> >
> >  vmgexit_err:
> > @@ -3129,10 +3182,10 @@ static int sev_es_validate_vmgexit(struct vcpu_svm *svm)
> >                           ghcb->ghcb_usage);
> >       } else if (reason == GHCB_ERR_INVALID_EVENT) {
> >               vcpu_unimpl(vcpu, "vmgexit: exit code %#llx is not valid\n",
> > -                         exit_code);
> > +                         *exit_code);
> >       } else {
> >               vcpu_unimpl(vcpu, "vmgexit: exit code %#llx input is not valid\n",
> > -                         exit_code);
> > +                         *exit_code);
> >               dump_ghcb(svm);
> >       }
> >
> > @@ -3142,6 +3195,8 @@ static int sev_es_validate_vmgexit(struct vcpu_svm *svm)
> >       ghcb_set_sw_exit_info_1(ghcb, 2);
> >       ghcb_set_sw_exit_info_2(ghcb, reason);
> >
> > +     svm_unmap_ghcb(svm, &map);
> > +
> >       /* Resume the guest to "return" the error code. */
> >       return 1;
> >  }
>
Michael Roth Jan. 20, 2023, 8:10 p.m. UTC | #3
On Wed, Jan 18, 2023 at 10:15:38AM -0800, Alper Gun wrote:
> On Wed, Jan 18, 2023 at 7:27 AM Jeremi Piotrowski
> <jpiotrowski@linux.microsoft.com> wrote:
> >
> > On Wed, Dec 14, 2022 at 01:40:36PM -0600, Michael Roth wrote:
> > > From: Brijesh Singh <brijesh.singh@amd.com>
> > >
> > > On VMGEXIT, sev_handle_vmgexit() creates a host mapping for the GHCB GPA,
> > > and unmaps it just before VM-entry. This long-lived GHCB map is used by
> > > the VMGEXIT handler through accessors such as ghcb_{set_get}_xxx().
> > >
> > > A long-lived GHCB map can cause issue when SEV-SNP is enabled. When
> > > SEV-SNP is enabled the mapped GPA needs to be protected against a page
> > > state change.
> > >
> > > To eliminate the long-lived GHCB mapping, update the GHCB sync operations
> > > to explicitly map the GHCB before access and unmap it after access is
> > > complete. This requires that the setting of the GHCBs sw_exit_info_{1,2}
> > > fields be done during sev_es_sync_to_ghcb(), so create two new fields in
> > > the vcpu_svm struct to hold these values when required to be set outside
> > > of the GHCB mapping.
> > >
> > > Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
> > > Signed-off-by: Ashish Kalra <ashish.kalra@amd.com>
> > > [mdr: defer per_cpu() assignment and order it with barrier() to fix case
> > >       where kvm_vcpu_map() causes reschedule on different CPU]
> > > Signed-off-by: Michael Roth <michael.roth@amd.com>
> > > ---
> > >  arch/x86/kvm/svm/sev.c | 131 ++++++++++++++++++++++++++---------------
> > >  arch/x86/kvm/svm/svm.c |  18 +++---
> > >  arch/x86/kvm/svm/svm.h |  24 +++++++-
> > >  3 files changed, 116 insertions(+), 57 deletions(-)
> > >
> > > diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
> > > index d5c6e48055fb..6ac0cb6e3484 100644
> > > --- a/arch/x86/kvm/svm/sev.c
> > > +++ b/arch/x86/kvm/svm/sev.c
> > > @@ -2921,15 +2921,40 @@ void sev_free_vcpu(struct kvm_vcpu *vcpu)
> > >       kvfree(svm->sev_es.ghcb_sa);
> > >  }
> > >
> > > +static inline int svm_map_ghcb(struct vcpu_svm *svm, struct kvm_host_map *map)
> > > +{
> > > +     struct vmcb_control_area *control = &svm->vmcb->control;
> > > +     u64 gfn = gpa_to_gfn(control->ghcb_gpa);
> > > +
> > > +     if (kvm_vcpu_map(&svm->vcpu, gfn, map)) {
> > > +             /* Unable to map GHCB from guest */
> > > +             pr_err("error mapping GHCB GFN [%#llx] from guest\n", gfn);
> > > +             return -EFAULT;
> > > +     }
> > > +
> > > +     return 0;
> > > +}
> > > +
> > > +static inline void svm_unmap_ghcb(struct vcpu_svm *svm, struct kvm_host_map *map)
> > > +{
> > > +     kvm_vcpu_unmap(&svm->vcpu, map, true);
> > > +}
> > > +
> > >  static void dump_ghcb(struct vcpu_svm *svm)
> > >  {
> > > -     struct ghcb *ghcb = svm->sev_es.ghcb;
> > > +     struct kvm_host_map map;
> > >       unsigned int nbits;
> > > +     struct ghcb *ghcb;
> > > +
> > > +     if (svm_map_ghcb(svm, &map))
> > > +             return;
> > > +
> > > +     ghcb = map.hva;
> >
> > dump_ghcb() is called from sev_es_validate_vmgexit() with the ghcb already
> > mapped. How about passing 'struct kvm_host_map *' (or struct ghcb *) as a
> > param to avoid double mapping?
> 
> This also causes a soft lockup, PSC spin lock is already acquired in
> sev_es_validate_vmgexit. dump_ghcb will try to acquire the same lock
> again. So a guest can send an invalid ghcb page  and cause a host soft
> lockup.

We did notice that issue with v6, and had a fix similar to what Jeremi
suggested, but in this patchset the psc_lock spinlock has been replaced
in favor of taking a read_lock() on kvm->mmu_lock.

The logic there is that userspace drives the page state changes via
kvm_vm_ioctl_set_mem_attributes() now, and it does so while holding a
write_lock() on kvm->mmu_lock, so if we want to guard the GHCB page from
page state changes while the kernel is accessing it, it makes sense to
use the same lock rather than relying on some separate SNP-specific lock.

And because it's now a read_lock(), I don't think the deadlock issue is
present anymore in v7, but it probably does still make sense to avoid the
double-mapping as Jeremi suggested, so we'll plan to make that change for
v8.

Thanks,

Mike
diff mbox series

Patch

diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index d5c6e48055fb..6ac0cb6e3484 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -2921,15 +2921,40 @@  void sev_free_vcpu(struct kvm_vcpu *vcpu)
 	kvfree(svm->sev_es.ghcb_sa);
 }
 
+static inline int svm_map_ghcb(struct vcpu_svm *svm, struct kvm_host_map *map)
+{
+	struct vmcb_control_area *control = &svm->vmcb->control;
+	u64 gfn = gpa_to_gfn(control->ghcb_gpa);
+
+	if (kvm_vcpu_map(&svm->vcpu, gfn, map)) {
+		/* Unable to map GHCB from guest */
+		pr_err("error mapping GHCB GFN [%#llx] from guest\n", gfn);
+		return -EFAULT;
+	}
+
+	return 0;
+}
+
+static inline void svm_unmap_ghcb(struct vcpu_svm *svm, struct kvm_host_map *map)
+{
+	kvm_vcpu_unmap(&svm->vcpu, map, true);
+}
+
 static void dump_ghcb(struct vcpu_svm *svm)
 {
-	struct ghcb *ghcb = svm->sev_es.ghcb;
+	struct kvm_host_map map;
 	unsigned int nbits;
+	struct ghcb *ghcb;
+
+	if (svm_map_ghcb(svm, &map))
+		return;
+
+	ghcb = map.hva;
 
 	/* Re-use the dump_invalid_vmcb module parameter */
 	if (!dump_invalid_vmcb) {
 		pr_warn_ratelimited("set kvm_amd.dump_invalid_vmcb=1 to dump internal KVM state.\n");
-		return;
+		goto e_unmap;
 	}
 
 	nbits = sizeof(ghcb->save.valid_bitmap) * 8;
@@ -2944,12 +2969,21 @@  static void dump_ghcb(struct vcpu_svm *svm)
 	pr_err("%-20s%016llx is_valid: %u\n", "sw_scratch",
 	       ghcb->save.sw_scratch, ghcb_sw_scratch_is_valid(ghcb));
 	pr_err("%-20s%*pb\n", "valid_bitmap", nbits, ghcb->save.valid_bitmap);
+
+e_unmap:
+	svm_unmap_ghcb(svm, &map);
 }
 
-static void sev_es_sync_to_ghcb(struct vcpu_svm *svm)
+static bool sev_es_sync_to_ghcb(struct vcpu_svm *svm)
 {
 	struct kvm_vcpu *vcpu = &svm->vcpu;
-	struct ghcb *ghcb = svm->sev_es.ghcb;
+	struct kvm_host_map map;
+	struct ghcb *ghcb;
+
+	if (svm_map_ghcb(svm, &map))
+		return false;
+
+	ghcb = map.hva;
 
 	/*
 	 * The GHCB protocol so far allows for the following data
@@ -2963,13 +2997,24 @@  static void sev_es_sync_to_ghcb(struct vcpu_svm *svm)
 	ghcb_set_rbx(ghcb, vcpu->arch.regs[VCPU_REGS_RBX]);
 	ghcb_set_rcx(ghcb, vcpu->arch.regs[VCPU_REGS_RCX]);
 	ghcb_set_rdx(ghcb, vcpu->arch.regs[VCPU_REGS_RDX]);
+
+	/*
+	 * Copy the return values from the exit_info_{1,2}.
+	 */
+	ghcb_set_sw_exit_info_1(ghcb, svm->sev_es.ghcb_sw_exit_info_1);
+	ghcb_set_sw_exit_info_2(ghcb, svm->sev_es.ghcb_sw_exit_info_2);
+
+	trace_kvm_vmgexit_exit(svm->vcpu.vcpu_id, ghcb);
+
+	svm_unmap_ghcb(svm, &map);
+
+	return true;
 }
 
-static void sev_es_sync_from_ghcb(struct vcpu_svm *svm)
+static void sev_es_sync_from_ghcb(struct vcpu_svm *svm, struct ghcb *ghcb)
 {
 	struct vmcb_control_area *control = &svm->vmcb->control;
 	struct kvm_vcpu *vcpu = &svm->vcpu;
-	struct ghcb *ghcb = svm->sev_es.ghcb;
 	u64 exit_code;
 
 	/*
@@ -3013,20 +3058,25 @@  static void sev_es_sync_from_ghcb(struct vcpu_svm *svm)
 	memset(ghcb->save.valid_bitmap, 0, sizeof(ghcb->save.valid_bitmap));
 }
 
-static int sev_es_validate_vmgexit(struct vcpu_svm *svm)
+static int sev_es_validate_vmgexit(struct vcpu_svm *svm, u64 *exit_code)
 {
-	struct kvm_vcpu *vcpu;
+	struct kvm_vcpu *vcpu = &svm->vcpu;
+	struct kvm_host_map map;
 	struct ghcb *ghcb;
-	u64 exit_code;
 	u64 reason;
 
-	ghcb = svm->sev_es.ghcb;
+	if (svm_map_ghcb(svm, &map))
+		return -EFAULT;
+
+	ghcb = map.hva;
+
+	trace_kvm_vmgexit_enter(vcpu->vcpu_id, ghcb);
 
 	/*
 	 * Retrieve the exit code now even though it may not be marked valid
 	 * as it could help with debugging.
 	 */
-	exit_code = ghcb_get_sw_exit_code(ghcb);
+	*exit_code = ghcb_get_sw_exit_code(ghcb);
 
 	/* Only GHCB Usage code 0 is supported */
 	if (ghcb->ghcb_usage) {
@@ -3119,6 +3169,9 @@  static int sev_es_validate_vmgexit(struct vcpu_svm *svm)
 		goto vmgexit_err;
 	}
 
+	sev_es_sync_from_ghcb(svm, ghcb);
+
+	svm_unmap_ghcb(svm, &map);
 	return 0;
 
 vmgexit_err:
@@ -3129,10 +3182,10 @@  static int sev_es_validate_vmgexit(struct vcpu_svm *svm)
 			    ghcb->ghcb_usage);
 	} else if (reason == GHCB_ERR_INVALID_EVENT) {
 		vcpu_unimpl(vcpu, "vmgexit: exit code %#llx is not valid\n",
-			    exit_code);
+			    *exit_code);
 	} else {
 		vcpu_unimpl(vcpu, "vmgexit: exit code %#llx input is not valid\n",
-			    exit_code);
+			    *exit_code);
 		dump_ghcb(svm);
 	}
 
@@ -3142,6 +3195,8 @@  static int sev_es_validate_vmgexit(struct vcpu_svm *svm)
 	ghcb_set_sw_exit_info_1(ghcb, 2);
 	ghcb_set_sw_exit_info_2(ghcb, reason);
 
+	svm_unmap_ghcb(svm, &map);
+
 	/* Resume the guest to "return" the error code. */
 	return 1;
 }
@@ -3151,23 +3206,20 @@  void sev_es_unmap_ghcb(struct vcpu_svm *svm)
 	/* Clear any indication that the vCPU is in a type of AP Reset Hold */
 	svm->sev_es.ap_reset_hold_type = AP_RESET_HOLD_NONE;
 
-	if (!svm->sev_es.ghcb)
+	if (!svm->sev_es.ghcb_in_use)
 		return;
 
 	 /* Sync the scratch buffer area. */
 	if (svm->sev_es.ghcb_sa_sync) {
 		kvm_write_guest(svm->vcpu.kvm,
-				ghcb_get_sw_scratch(svm->sev_es.ghcb),
+				svm->sev_es.ghcb_sa_gpa,
 				svm->sev_es.ghcb_sa, svm->sev_es.ghcb_sa_len);
 		svm->sev_es.ghcb_sa_sync = false;
 	}
 
-	trace_kvm_vmgexit_exit(svm->vcpu.vcpu_id, svm->sev_es.ghcb);
-
 	sev_es_sync_to_ghcb(svm);
 
-	kvm_vcpu_unmap(&svm->vcpu, &svm->sev_es.ghcb_map, true);
-	svm->sev_es.ghcb = NULL;
+	svm->sev_es.ghcb_in_use = false;
 }
 
 void pre_sev_run(struct vcpu_svm *svm, int cpu)
@@ -3197,7 +3249,6 @@  void pre_sev_run(struct vcpu_svm *svm, int cpu)
 static int setup_vmgexit_scratch(struct vcpu_svm *svm, bool sync, u64 len)
 {
 	struct vmcb_control_area *control = &svm->vmcb->control;
-	struct ghcb *ghcb = svm->sev_es.ghcb;
 	u64 ghcb_scratch_beg, ghcb_scratch_end;
 	u64 scratch_gpa_beg, scratch_gpa_end;
 
@@ -3276,8 +3327,8 @@  static int setup_vmgexit_scratch(struct vcpu_svm *svm, bool sync, u64 len)
 	return 0;
 
 e_scratch:
-	ghcb_set_sw_exit_info_1(ghcb, 2);
-	ghcb_set_sw_exit_info_2(ghcb, GHCB_ERR_INVALID_SCRATCH_AREA);
+	svm_set_ghcb_sw_exit_info_1(&svm->vcpu, 2);
+	svm_set_ghcb_sw_exit_info_2(&svm->vcpu, GHCB_ERR_INVALID_SCRATCH_AREA);
 
 	return 1;
 }
@@ -3413,7 +3464,6 @@  int sev_handle_vmgexit(struct kvm_vcpu *vcpu)
 	struct vcpu_svm *svm = to_svm(vcpu);
 	struct vmcb_control_area *control = &svm->vmcb->control;
 	u64 ghcb_gpa, exit_code;
-	struct ghcb *ghcb;
 	int ret;
 
 	/* Validate the GHCB */
@@ -3428,29 +3478,14 @@  int sev_handle_vmgexit(struct kvm_vcpu *vcpu)
 		return 1;
 	}
 
-	if (kvm_vcpu_map(vcpu, ghcb_gpa >> PAGE_SHIFT, &svm->sev_es.ghcb_map)) {
-		/* Unable to map GHCB from guest */
-		vcpu_unimpl(vcpu, "vmgexit: error mapping GHCB [%#llx] from guest\n",
-			    ghcb_gpa);
-
-		/* Without a GHCB, just return right back to the guest */
-		return 1;
-	}
-
-	svm->sev_es.ghcb = svm->sev_es.ghcb_map.hva;
-	ghcb = svm->sev_es.ghcb_map.hva;
-
-	trace_kvm_vmgexit_enter(vcpu->vcpu_id, ghcb);
-
-	exit_code = ghcb_get_sw_exit_code(ghcb);
-
-	ret = sev_es_validate_vmgexit(svm);
+	ret = sev_es_validate_vmgexit(svm, &exit_code);
 	if (ret)
 		return ret;
 
-	sev_es_sync_from_ghcb(svm);
-	ghcb_set_sw_exit_info_1(ghcb, 0);
-	ghcb_set_sw_exit_info_2(ghcb, 0);
+	svm->sev_es.ghcb_in_use = true;
+
+	svm_set_ghcb_sw_exit_info_1(vcpu, 0);
+	svm_set_ghcb_sw_exit_info_2(vcpu, 0);
 
 	switch (exit_code) {
 	case SVM_VMGEXIT_MMIO_READ:
@@ -3490,20 +3525,20 @@  int sev_handle_vmgexit(struct kvm_vcpu *vcpu)
 			break;
 		case 1:
 			/* Get AP jump table address */
-			ghcb_set_sw_exit_info_2(ghcb, sev->ap_jump_table);
+			svm_set_ghcb_sw_exit_info_2(vcpu, sev->ap_jump_table);
 			break;
 		default:
 			pr_err("svm: vmgexit: unsupported AP jump table request - exit_info_1=%#llx\n",
 			       control->exit_info_1);
-			ghcb_set_sw_exit_info_1(ghcb, 2);
-			ghcb_set_sw_exit_info_2(ghcb, GHCB_ERR_INVALID_INPUT);
+			svm_set_ghcb_sw_exit_info_1(vcpu, 2);
+			svm_set_ghcb_sw_exit_info_2(vcpu, GHCB_ERR_INVALID_INPUT);
 		}
 
 		ret = 1;
 		break;
 	}
 	case SVM_VMGEXIT_HV_FEATURES: {
-		ghcb_set_sw_exit_info_2(ghcb, GHCB_HV_FT_SUPPORTED);
+		svm_set_ghcb_sw_exit_info_2(vcpu, GHCB_HV_FT_SUPPORTED);
 
 		ret = 1;
 		break;
@@ -3651,7 +3686,7 @@  void sev_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector)
 		 * Return from an AP Reset Hold VMGEXIT, where the guest will
 		 * set the CS and RIP. Set SW_EXIT_INFO_2 to a non-zero value.
 		 */
-		ghcb_set_sw_exit_info_2(svm->sev_es.ghcb, 1);
+		svm_set_ghcb_sw_exit_info_2(vcpu, 1);
 		break;
 	case AP_RESET_HOLD_MSR_PROTO:
 		/*
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 2dfa150bcb09..1826946a2f43 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -1445,7 +1445,7 @@  static void svm_vcpu_free(struct kvm_vcpu *vcpu)
 static void svm_prepare_switch_to_guest(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_svm *svm = to_svm(vcpu);
-	struct svm_cpu_data *sd = per_cpu_ptr(&svm_data, vcpu->cpu);
+	struct svm_cpu_data *sd;
 
 	if (sev_es_guest(vcpu->kvm))
 		sev_es_unmap_ghcb(svm);
@@ -1453,6 +1453,10 @@  static void svm_prepare_switch_to_guest(struct kvm_vcpu *vcpu)
 	if (svm->guest_state_loaded)
 		return;
 
+	/* sev_es_unmap_ghcb() can resched, so grab per-cpu pointer afterward. */
+	barrier();
+	sd = per_cpu_ptr(&svm_data, vcpu->cpu);
+
 	/*
 	 * Save additional host state that will be restored on VMEXIT (sev-es)
 	 * or subsequent vmload of host save area.
@@ -2818,14 +2822,14 @@  static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 static int svm_complete_emulated_msr(struct kvm_vcpu *vcpu, int err)
 {
 	struct vcpu_svm *svm = to_svm(vcpu);
-	if (!err || !sev_es_guest(vcpu->kvm) || WARN_ON_ONCE(!svm->sev_es.ghcb))
+	if (!err || !sev_es_guest(vcpu->kvm) || WARN_ON_ONCE(!svm->sev_es.ghcb_in_use))
 		return kvm_complete_insn_gp(vcpu, err);
 
-	ghcb_set_sw_exit_info_1(svm->sev_es.ghcb, 1);
-	ghcb_set_sw_exit_info_2(svm->sev_es.ghcb,
-				X86_TRAP_GP |
-				SVM_EVTINJ_TYPE_EXEPT |
-				SVM_EVTINJ_VALID);
+	svm_set_ghcb_sw_exit_info_1(vcpu, 1);
+	svm_set_ghcb_sw_exit_info_2(vcpu,
+				    X86_TRAP_GP |
+				    SVM_EVTINJ_TYPE_EXEPT |
+				    SVM_EVTINJ_VALID);
 	return 1;
 }
 
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index f53a41e13033..c462dfac0a0d 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -201,8 +201,7 @@  struct svm_nested_state {
 struct vcpu_sev_es_state {
 	/* SEV-ES support */
 	struct sev_es_save_area *vmsa;
-	struct ghcb *ghcb;
-	struct kvm_host_map ghcb_map;
+	bool ghcb_in_use;
 	bool received_first_sipi;
 	unsigned int ap_reset_hold_type;
 
@@ -212,6 +211,13 @@  struct vcpu_sev_es_state {
 	u64 ghcb_sa_gpa;
 	u32 ghcb_sa_alloc_len;
 	bool ghcb_sa_sync;
+
+	/*
+	 * SEV-ES support to hold the sw_exit_info return values to be
+	 * sync'ed to the GHCB when mapped.
+	 */
+	u64 ghcb_sw_exit_info_1;
+	u64 ghcb_sw_exit_info_2;
 };
 
 struct vcpu_svm {
@@ -640,6 +646,20 @@  void nested_sync_control_from_vmcb02(struct vcpu_svm *svm);
 void nested_vmcb02_compute_g_pat(struct vcpu_svm *svm);
 void svm_switch_vmcb(struct vcpu_svm *svm, struct kvm_vmcb_info *target_vmcb);
 
+static inline void svm_set_ghcb_sw_exit_info_1(struct kvm_vcpu *vcpu, u64 val)
+{
+	struct vcpu_svm *svm = to_svm(vcpu);
+
+	svm->sev_es.ghcb_sw_exit_info_1 = val;
+}
+
+static inline void svm_set_ghcb_sw_exit_info_2(struct kvm_vcpu *vcpu, u64 val)
+{
+	struct vcpu_svm *svm = to_svm(vcpu);
+
+	svm->sev_es.ghcb_sw_exit_info_2 = val;
+}
+
 extern struct kvm_x86_nested_ops svm_nested_ops;
 
 /* avic.c */