diff mbox series

[v3,3/9] KVM: x86: MMU: Rename get_cr3() --> get_pgd() and clear high bits for pgd

Message ID 20221209044557.1496580-4-robert.hu@linux.intel.com (mailing list archive)
State New, archived
Headers show
Series Linear Address Masking (LAM) KVM Enabling | expand

Commit Message

Robert Hoo Dec. 9, 2022, 4:45 a.m. UTC
The get_cr3() is the implementation of kvm_mmu::get_guest_pgd(), well, CR3
cannot be naturally equivalent to pgd, SDM says CR3 high bits are reserved,
must be zero.
And now, with LAM feature's introduction, bit 61 ~ 62 are used.
So, rename get_cr3() --> get_pgd() to better indicate function purpose and
in it, filtered out CR3 high bits.

Signed-off-by: Robert Hoo <robert.hu@linux.intel.com>
Reviewed-by: Jingqi Liu <jingqi.liu@intel.com>
---
 arch/x86/include/asm/processor-flags.h |  1 +
 arch/x86/kvm/mmu/mmu.c                 | 12 ++++++++----
 2 files changed, 9 insertions(+), 4 deletions(-)

Comments

Yuan Yao Dec. 19, 2022, 6:44 a.m. UTC | #1
On Fri, Dec 09, 2022 at 12:45:51PM +0800, Robert Hoo wrote:
> The get_cr3() is the implementation of kvm_mmu::get_guest_pgd(), well, CR3
> cannot be naturally equivalent to pgd, SDM says CR3 high bits are reserved,
> must be zero.
> And now, with LAM feature's introduction, bit 61 ~ 62 are used.
> So, rename get_cr3() --> get_pgd() to better indicate function purpose and
> in it, filtered out CR3 high bits.
>
> Signed-off-by: Robert Hoo <robert.hu@linux.intel.com>
> Reviewed-by: Jingqi Liu <jingqi.liu@intel.com>
> ---
>  arch/x86/include/asm/processor-flags.h |  1 +
>  arch/x86/kvm/mmu/mmu.c                 | 12 ++++++++----
>  2 files changed, 9 insertions(+), 4 deletions(-)
>
> diff --git a/arch/x86/include/asm/processor-flags.h b/arch/x86/include/asm/processor-flags.h
> index d8cccadc83a6..bb0f8dd16956 100644
> --- a/arch/x86/include/asm/processor-flags.h
> +++ b/arch/x86/include/asm/processor-flags.h
> @@ -38,6 +38,7 @@
>  #ifdef CONFIG_X86_64
>  /* Mask off the address space ID and SME encryption bits. */
>  #define CR3_ADDR_MASK	__sme_clr(PHYSICAL_PAGE_MASK)
> +#define CR3_HIGH_RSVD_MASK	GENMASK_ULL(63, 52)
>  #define CR3_PCID_MASK	0xFFFull
>  #define CR3_NOFLUSH	BIT_ULL(63)
>
> diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
> index b6f96d47e596..d433c8923b18 100644
> --- a/arch/x86/kvm/mmu/mmu.c
> +++ b/arch/x86/kvm/mmu/mmu.c
> @@ -4488,9 +4488,13 @@ void kvm_mmu_new_pgd(struct kvm_vcpu *vcpu, gpa_t new_pgd)
>  }
>  EXPORT_SYMBOL_GPL(kvm_mmu_new_pgd);
>
> -static unsigned long get_cr3(struct kvm_vcpu *vcpu)
> +static unsigned long get_pgd(struct kvm_vcpu *vcpu)
>  {
> +#ifdef CONFIG_X86_64
> +	return kvm_read_cr3(vcpu) & ~CR3_HIGH_RSVD_MASK;

CR3_HIGH_RSVD_MASK is used to extract the guest pgd, may
need to use guest's MAXPHYADDR but not hard code to 52.
Or easily, just mask out the LAM bits.

> +#else
>  	return kvm_read_cr3(vcpu);
> +#endif
>  }
>
>  static bool sync_mmio_spte(struct kvm_vcpu *vcpu, u64 *sptep, gfn_t gfn,
> @@ -5043,7 +5047,7 @@ static void init_kvm_tdp_mmu(struct kvm_vcpu *vcpu,
>  	context->page_fault = kvm_tdp_page_fault;
>  	context->sync_page = nonpaging_sync_page;
>  	context->invlpg = NULL;
> -	context->get_guest_pgd = get_cr3;
> +	context->get_guest_pgd = get_pgd;
>  	context->get_pdptr = kvm_pdptr_read;
>  	context->inject_page_fault = kvm_inject_page_fault;
>
> @@ -5193,7 +5197,7 @@ static void init_kvm_softmmu(struct kvm_vcpu *vcpu,
>
>  	kvm_init_shadow_mmu(vcpu, cpu_role);
>
> -	context->get_guest_pgd     = get_cr3;
> +	context->get_guest_pgd     = get_pgd;
>  	context->get_pdptr         = kvm_pdptr_read;
>  	context->inject_page_fault = kvm_inject_page_fault;
>  }
> @@ -5207,7 +5211,7 @@ static void init_kvm_nested_mmu(struct kvm_vcpu *vcpu,
>  		return;
>
>  	g_context->cpu_role.as_u64   = new_mode.as_u64;
> -	g_context->get_guest_pgd     = get_cr3;
> +	g_context->get_guest_pgd     = get_pgd;
>  	g_context->get_pdptr         = kvm_pdptr_read;
>  	g_context->inject_page_fault = kvm_inject_page_fault;
>
> --
> 2.31.1
>
Robert Hoo Dec. 20, 2022, 2:07 p.m. UTC | #2
On Mon, 2022-12-19 at 14:44 +0800, Yuan Yao wrote:
> On Fri, Dec 09, 2022 at 12:45:51PM +0800, Robert Hoo wrote:
> > The get_cr3() is the implementation of kvm_mmu::get_guest_pgd(),
> > well, CR3
> > cannot be naturally equivalent to pgd, SDM says CR3 high bits are
> > reserved,
> > must be zero.
> > And now, with LAM feature's introduction, bit 61 ~ 62 are used.
> > So, rename get_cr3() --> get_pgd() to better indicate function
> > purpose and
> > in it, filtered out CR3 high bits.
> > 
> > Signed-off-by: Robert Hoo <robert.hu@linux.intel.com>
> > Reviewed-by: Jingqi Liu <jingqi.liu@intel.com>
> > ---
> >  arch/x86/include/asm/processor-flags.h |  1 +
> >  arch/x86/kvm/mmu/mmu.c                 | 12 ++++++++----
> >  2 files changed, 9 insertions(+), 4 deletions(-)
> > 
> > diff --git a/arch/x86/include/asm/processor-flags.h
> > b/arch/x86/include/asm/processor-flags.h
> > index d8cccadc83a6..bb0f8dd16956 100644
> > --- a/arch/x86/include/asm/processor-flags.h
> > +++ b/arch/x86/include/asm/processor-flags.h
> > @@ -38,6 +38,7 @@
> >  #ifdef CONFIG_X86_64
> >  /* Mask off the address space ID and SME encryption bits. */
> >  #define CR3_ADDR_MASK	__sme_clr(PHYSICAL_PAGE_MASK)
> > +#define CR3_HIGH_RSVD_MASK	GENMASK_ULL(63, 52)
> >  #define CR3_PCID_MASK	0xFFFull
> >  #define CR3_NOFLUSH	BIT_ULL(63)
> > 
> > diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
> > index b6f96d47e596..d433c8923b18 100644
> > --- a/arch/x86/kvm/mmu/mmu.c
> > +++ b/arch/x86/kvm/mmu/mmu.c
> > @@ -4488,9 +4488,13 @@ void kvm_mmu_new_pgd(struct kvm_vcpu *vcpu,
> > gpa_t new_pgd)
> >  }
> >  EXPORT_SYMBOL_GPL(kvm_mmu_new_pgd);
> > 
> > -static unsigned long get_cr3(struct kvm_vcpu *vcpu)
> > +static unsigned long get_pgd(struct kvm_vcpu *vcpu)
> >  {
> > +#ifdef CONFIG_X86_64
> > +	return kvm_read_cr3(vcpu) & ~CR3_HIGH_RSVD_MASK;
> 
> CR3_HIGH_RSVD_MASK is used to extract the guest pgd, may
> need to use guest's MAXPHYADDR but not hard code to 52.
> Or easily, just mask out the LAM bits.
> 
I define this CR3_HIGH_RSVD_MASK for extracting possible feature
control bits in [63, 52], now we already have LAM bits (bit 61, 62) and
PCID_NO_FLUSHING (bit 63) for examples. These bits, along with possible
future new ones, won't cross bit 52, as it is the MAXPHYADDR maximum
defined by current SDM. As for [51, guest actual max_phy_addr], I think
it should be guaranteed by other modules to reserved-as-0 to conform to
SDM. Given this, I chose the conservative const for simplicity.

However, your words also make sense, since this function is get_pgd(),
literally return kvm_read_cr3(vcpu) & ~vcpu->arch.reserved_gpa_bits is
more right. I'll take this in next version. Thanks.
Sean Christopherson Jan. 7, 2023, 12:45 a.m. UTC | #3
On Fri, Dec 09, 2022, Robert Hoo wrote:
> The get_cr3() is the implementation of kvm_mmu::get_guest_pgd(), well, CR3
> cannot be naturally equivalent to pgd, SDM says CR3 high bits are reserved,
> must be zero.
> And now, with LAM feature's introduction, bit 61 ~ 62 are used.
> So, rename get_cr3() --> get_pgd() to better indicate function purpose and
> in it, filtered out CR3 high bits.

Depends on one's interpreation of "PGD".  KVM says it's the full thing, e.g. the
nEPT hook returns the full EPTP, not EP4TA (or EP5TA).  I don't think stripping
bits in get_cr3() is the right approach, e.g. the user might want the full thing
for comparison.  E.g. the PCID bits are left as is.

Changing get_cr3() but not nested_svm_get_tdp_cr3() and nested_ept_get_eptp() is
also weird.

I think my preference would be to strip the LAM bits in the few places that want
the physical address and keep get_cr3() as is.
Robert Hoo Jan. 7, 2023, 1:36 p.m. UTC | #4
On Sat, 2023-01-07 at 00:45 +0000, Sean Christopherson wrote:
> On Fri, Dec 09, 2022, Robert Hoo wrote:
> > The get_cr3() is the implementation of kvm_mmu::get_guest_pgd(),
> > well, CR3
> > cannot be naturally equivalent to pgd, SDM says CR3 high bits are
> > reserved,
> > must be zero.
> > And now, with LAM feature's introduction, bit 61 ~ 62 are used.
> > So, rename get_cr3() --> get_pgd() to better indicate function
> > purpose and
> > in it, filtered out CR3 high bits.
> 
> Depends on one's interpreation of "PGD".  KVM says it's the full
> thing, e.g. the
> nEPT hook returns the full EPTP, not EP4TA (or EP5TA).  I don't think
> stripping
> bits in get_cr3() is the right approach, e.g. the user might want the
> full thing
> for comparison.  E.g. the PCID bits are left as is.
> 
> Changing get_cr3() but not nested_svm_get_tdp_cr3() and
> nested_ept_get_eptp() is
> also weird.
> 
> I think my preference would be to strip the LAM bits in the few
> places that want
> the physical address and keep get_cr3() as is.

OK, will do as this in next version. Thanks.
diff mbox series

Patch

diff --git a/arch/x86/include/asm/processor-flags.h b/arch/x86/include/asm/processor-flags.h
index d8cccadc83a6..bb0f8dd16956 100644
--- a/arch/x86/include/asm/processor-flags.h
+++ b/arch/x86/include/asm/processor-flags.h
@@ -38,6 +38,7 @@ 
 #ifdef CONFIG_X86_64
 /* Mask off the address space ID and SME encryption bits. */
 #define CR3_ADDR_MASK	__sme_clr(PHYSICAL_PAGE_MASK)
+#define CR3_HIGH_RSVD_MASK	GENMASK_ULL(63, 52)
 #define CR3_PCID_MASK	0xFFFull
 #define CR3_NOFLUSH	BIT_ULL(63)
 
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index b6f96d47e596..d433c8923b18 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -4488,9 +4488,13 @@  void kvm_mmu_new_pgd(struct kvm_vcpu *vcpu, gpa_t new_pgd)
 }
 EXPORT_SYMBOL_GPL(kvm_mmu_new_pgd);
 
-static unsigned long get_cr3(struct kvm_vcpu *vcpu)
+static unsigned long get_pgd(struct kvm_vcpu *vcpu)
 {
+#ifdef CONFIG_X86_64
+	return kvm_read_cr3(vcpu) & ~CR3_HIGH_RSVD_MASK;
+#else
 	return kvm_read_cr3(vcpu);
+#endif
 }
 
 static bool sync_mmio_spte(struct kvm_vcpu *vcpu, u64 *sptep, gfn_t gfn,
@@ -5043,7 +5047,7 @@  static void init_kvm_tdp_mmu(struct kvm_vcpu *vcpu,
 	context->page_fault = kvm_tdp_page_fault;
 	context->sync_page = nonpaging_sync_page;
 	context->invlpg = NULL;
-	context->get_guest_pgd = get_cr3;
+	context->get_guest_pgd = get_pgd;
 	context->get_pdptr = kvm_pdptr_read;
 	context->inject_page_fault = kvm_inject_page_fault;
 
@@ -5193,7 +5197,7 @@  static void init_kvm_softmmu(struct kvm_vcpu *vcpu,
 
 	kvm_init_shadow_mmu(vcpu, cpu_role);
 
-	context->get_guest_pgd     = get_cr3;
+	context->get_guest_pgd     = get_pgd;
 	context->get_pdptr         = kvm_pdptr_read;
 	context->inject_page_fault = kvm_inject_page_fault;
 }
@@ -5207,7 +5211,7 @@  static void init_kvm_nested_mmu(struct kvm_vcpu *vcpu,
 		return;
 
 	g_context->cpu_role.as_u64   = new_mode.as_u64;
-	g_context->get_guest_pgd     = get_cr3;
+	g_context->get_guest_pgd     = get_pgd;
 	g_context->get_pdptr         = kvm_pdptr_read;
 	g_context->inject_page_fault = kvm_inject_page_fault;