diff mbox series

[07/21] KVM: TDX: Add load_mmu_pgd method for TDX

Message ID 20240904030751.117579-8-rick.p.edgecombe@intel.com (mailing list archive)
State New, archived
Headers show
Series TDX MMU Part 2 | expand

Commit Message

Edgecombe, Rick P Sept. 4, 2024, 3:07 a.m. UTC
From: Sean Christopherson <sean.j.christopherson@intel.com>

TDX uses two EPT pointers, one for the private half of the GPA space and
one for the shared half. The private half uses the normal EPT_POINTER vmcs
field, which is managed in a special way by the TDX module. For TDX, KVM is
not allowed to operate on it directly. The shared half uses a new
SHARED_EPT_POINTER field and will be managed by the conventional MMU
management operations that operate directly on the EPT root. This means for
TDX the .load_mmu_pgd() operation will need to know to use the
SHARED_EPT_POINTER field instead of the normal one. Add a new wrapper in
x86 ops for load_mmu_pgd() that either directs the write to the existing
vmx implementation or a TDX one.

tdx_load_mmu_pgd() is so much simpler than vmx_load_mmu_pgd() since for the
TDX mode of operation, EPT will always be used and KVM does not need to be
involved in virtualization of CR3 behavior. So tdx_load_mmu_pgd() can
simply write to SHARED_EPT_POINTER.

Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com>
Signed-off-by: Rick Edgecombe <rick.p.edgecombe@intel.com>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
---
TDX MMU part 2 v1:
- update the commit msg with the version rephrased by Rick.
  https://lore.kernel.org/all/78b1024ec3f5868e228baf797c6be98c5397bd49.camel@intel.com/

v19:
- Add WARN_ON_ONCE() to tdx_load_mmu_pgd() and drop unconditional mask
---
 arch/x86/include/asm/vmx.h |  1 +
 arch/x86/kvm/vmx/main.c    | 13 ++++++++++++-
 arch/x86/kvm/vmx/tdx.c     |  5 +++++
 arch/x86/kvm/vmx/x86_ops.h |  4 ++++
 4 files changed, 22 insertions(+), 1 deletion(-)

Comments

Chao Gao Sept. 11, 2024, 2:48 a.m. UTC | #1
On Tue, Sep 03, 2024 at 08:07:37PM -0700, Rick Edgecombe wrote:
>From: Sean Christopherson <sean.j.christopherson@intel.com>
>
>TDX uses two EPT pointers, one for the private half of the GPA space and
>one for the shared half. The private half uses the normal EPT_POINTER vmcs
>field, which is managed in a special way by the TDX module. For TDX, KVM is
>not allowed to operate on it directly. The shared half uses a new
>SHARED_EPT_POINTER field and will be managed by the conventional MMU
>management operations that operate directly on the EPT root. This means for
>TDX the .load_mmu_pgd() operation will need to know to use the
>SHARED_EPT_POINTER field instead of the normal one. Add a new wrapper in
>x86 ops for load_mmu_pgd() that either directs the write to the existing
>vmx implementation or a TDX one.
>
>tdx_load_mmu_pgd() is so much simpler than vmx_load_mmu_pgd() since for the
>TDX mode of operation, EPT will always be used and KVM does not need to be
>involved in virtualization of CR3 behavior. So tdx_load_mmu_pgd() can
>simply write to SHARED_EPT_POINTER.
>
>Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
>Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com>
>Signed-off-by: Rick Edgecombe <rick.p.edgecombe@intel.com>
>Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
>---
>TDX MMU part 2 v1:
>- update the commit msg with the version rephrased by Rick.
>  https://lore.kernel.org/all/78b1024ec3f5868e228baf797c6be98c5397bd49.camel@intel.com/
>
>v19:
>- Add WARN_ON_ONCE() to tdx_load_mmu_pgd() and drop unconditional mask
>---
> arch/x86/include/asm/vmx.h |  1 +
> arch/x86/kvm/vmx/main.c    | 13 ++++++++++++-
> arch/x86/kvm/vmx/tdx.c     |  5 +++++
> arch/x86/kvm/vmx/x86_ops.h |  4 ++++
> 4 files changed, 22 insertions(+), 1 deletion(-)
>
>diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
>index d77a31039f24..3e003183a4f7 100644
>--- a/arch/x86/include/asm/vmx.h
>+++ b/arch/x86/include/asm/vmx.h
>@@ -237,6 +237,7 @@ enum vmcs_field {
> 	TSC_MULTIPLIER_HIGH             = 0x00002033,
> 	TERTIARY_VM_EXEC_CONTROL	= 0x00002034,
> 	TERTIARY_VM_EXEC_CONTROL_HIGH	= 0x00002035,
>+	SHARED_EPT_POINTER		= 0x0000203C,
> 	PID_POINTER_TABLE		= 0x00002042,
> 	PID_POINTER_TABLE_HIGH		= 0x00002043,
> 	GUEST_PHYSICAL_ADDRESS          = 0x00002400,
>diff --git a/arch/x86/kvm/vmx/main.c b/arch/x86/kvm/vmx/main.c
>index d63685ea95ce..c9dfa3aa866c 100644
>--- a/arch/x86/kvm/vmx/main.c
>+++ b/arch/x86/kvm/vmx/main.c
>@@ -100,6 +100,17 @@ static void vt_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
> 	vmx_vcpu_reset(vcpu, init_event);
> }
> 
>+static void vt_load_mmu_pgd(struct kvm_vcpu *vcpu, hpa_t root_hpa,
>+			int pgd_level)
>+{
>+	if (is_td_vcpu(vcpu)) {
>+		tdx_load_mmu_pgd(vcpu, root_hpa, pgd_level);
>+		return;
>+	}
>+
>+	vmx_load_mmu_pgd(vcpu, root_hpa, pgd_level);
>+}
>+
> static int vt_mem_enc_ioctl(struct kvm *kvm, void __user *argp)
> {
> 	if (!is_td(kvm))
>@@ -229,7 +240,7 @@ struct kvm_x86_ops vt_x86_ops __initdata = {
> 	.write_tsc_offset = vmx_write_tsc_offset,
> 	.write_tsc_multiplier = vmx_write_tsc_multiplier,
> 
>-	.load_mmu_pgd = vmx_load_mmu_pgd,
>+	.load_mmu_pgd = vt_load_mmu_pgd,
> 
> 	.check_intercept = vmx_check_intercept,
> 	.handle_exit_irqoff = vmx_handle_exit_irqoff,
>diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c
>index 2ef95c84ee5b..8f43977ef4c6 100644
>--- a/arch/x86/kvm/vmx/tdx.c
>+++ b/arch/x86/kvm/vmx/tdx.c
>@@ -428,6 +428,11 @@ void tdx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
> 	 */
> }
> 
>+void tdx_load_mmu_pgd(struct kvm_vcpu *vcpu, hpa_t root_hpa, int pgd_level)
>+{

pgd_level isn't used. So, I think we can either drop it or assert that it matches
the secure EPT level.

>+	td_vmcs_write64(to_tdx(vcpu), SHARED_EPT_POINTER, root_hpa);
>+}
Edgecombe, Rick P Sept. 11, 2024, 2:49 a.m. UTC | #2
On Wed, 2024-09-11 at 10:48 +0800, Chao Gao wrote:
> > index 2ef95c84ee5b..8f43977ef4c6 100644
> > --- a/arch/x86/kvm/vmx/tdx.c
> > +++ b/arch/x86/kvm/vmx/tdx.c
> > @@ -428,6 +428,11 @@ void tdx_vcpu_reset(struct kvm_vcpu *vcpu, bool
> > init_event)
> >          */
> > }
> > 
> > +void tdx_load_mmu_pgd(struct kvm_vcpu *vcpu, hpa_t root_hpa, int pgd_level)
> > +{
> 
> pgd_level isn't used. So, I think we can either drop it or assert that it
> matches
> the secure EPT level.

Oh, yea. Good point.
diff mbox series

Patch

diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index d77a31039f24..3e003183a4f7 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -237,6 +237,7 @@  enum vmcs_field {
 	TSC_MULTIPLIER_HIGH             = 0x00002033,
 	TERTIARY_VM_EXEC_CONTROL	= 0x00002034,
 	TERTIARY_VM_EXEC_CONTROL_HIGH	= 0x00002035,
+	SHARED_EPT_POINTER		= 0x0000203C,
 	PID_POINTER_TABLE		= 0x00002042,
 	PID_POINTER_TABLE_HIGH		= 0x00002043,
 	GUEST_PHYSICAL_ADDRESS          = 0x00002400,
diff --git a/arch/x86/kvm/vmx/main.c b/arch/x86/kvm/vmx/main.c
index d63685ea95ce..c9dfa3aa866c 100644
--- a/arch/x86/kvm/vmx/main.c
+++ b/arch/x86/kvm/vmx/main.c
@@ -100,6 +100,17 @@  static void vt_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
 	vmx_vcpu_reset(vcpu, init_event);
 }
 
+static void vt_load_mmu_pgd(struct kvm_vcpu *vcpu, hpa_t root_hpa,
+			int pgd_level)
+{
+	if (is_td_vcpu(vcpu)) {
+		tdx_load_mmu_pgd(vcpu, root_hpa, pgd_level);
+		return;
+	}
+
+	vmx_load_mmu_pgd(vcpu, root_hpa, pgd_level);
+}
+
 static int vt_mem_enc_ioctl(struct kvm *kvm, void __user *argp)
 {
 	if (!is_td(kvm))
@@ -229,7 +240,7 @@  struct kvm_x86_ops vt_x86_ops __initdata = {
 	.write_tsc_offset = vmx_write_tsc_offset,
 	.write_tsc_multiplier = vmx_write_tsc_multiplier,
 
-	.load_mmu_pgd = vmx_load_mmu_pgd,
+	.load_mmu_pgd = vt_load_mmu_pgd,
 
 	.check_intercept = vmx_check_intercept,
 	.handle_exit_irqoff = vmx_handle_exit_irqoff,
diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c
index 2ef95c84ee5b..8f43977ef4c6 100644
--- a/arch/x86/kvm/vmx/tdx.c
+++ b/arch/x86/kvm/vmx/tdx.c
@@ -428,6 +428,11 @@  void tdx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
 	 */
 }
 
+void tdx_load_mmu_pgd(struct kvm_vcpu *vcpu, hpa_t root_hpa, int pgd_level)
+{
+	td_vmcs_write64(to_tdx(vcpu), SHARED_EPT_POINTER, root_hpa);
+}
+
 static int tdx_get_capabilities(struct kvm_tdx_cmd *cmd)
 {
 	const struct tdx_sys_info_td_conf *td_conf = &tdx_sysinfo->td_conf;
diff --git a/arch/x86/kvm/vmx/x86_ops.h b/arch/x86/kvm/vmx/x86_ops.h
index debc6877729a..dcf2b36efbb9 100644
--- a/arch/x86/kvm/vmx/x86_ops.h
+++ b/arch/x86/kvm/vmx/x86_ops.h
@@ -130,6 +130,8 @@  void tdx_vcpu_free(struct kvm_vcpu *vcpu);
 void tdx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event);
 
 int tdx_vcpu_ioctl(struct kvm_vcpu *vcpu, void __user *argp);
+
+void tdx_load_mmu_pgd(struct kvm_vcpu *vcpu, hpa_t root_hpa, int root_level);
 #else
 static inline int tdx_vm_init(struct kvm *kvm) { return -EOPNOTSUPP; }
 static inline void tdx_mmu_release_hkid(struct kvm *kvm) {}
@@ -142,6 +144,8 @@  static inline void tdx_vcpu_free(struct kvm_vcpu *vcpu) {}
 static inline void tdx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) {}
 
 static inline int tdx_vcpu_ioctl(struct kvm_vcpu *vcpu, void __user *argp) { return -EOPNOTSUPP; }
+
+static inline void tdx_load_mmu_pgd(struct kvm_vcpu *vcpu, hpa_t root_hpa, int root_level) {}
 #endif
 
 #endif /* __KVM_X86_VMX_X86_OPS_H */