[13/21] KVM: TDX: Handle TLB tracking for TDX

Message ID	20240904030751.117579-14-rick.p.edgecombe@intel.com (mailing list archive)
State	New
Headers	show Received: from mgamail.intel.com (mgamail.intel.com [192.198.163.18]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 11EB457C8D; Wed, 4 Sep 2024 03:14:39 +0000 (UTC) From: Rick Edgecombe <rick.p.edgecombe@intel.com> To: seanjc@google.com, pbonzini@redhat.com, kvm@vger.kernel.org Cc: kai.huang@intel.com, dmatlack@google.com, isaku.yamahata@gmail.com, yan.y.zhao@intel.com, nik.borisov@suse.com, rick.p.edgecombe@intel.com, linux-kernel@vger.kernel.org Subject: [PATCH 13/21] KVM: TDX: Handle TLB tracking for TDX Date: Tue, 3 Sep 2024 20:07:43 -0700 Message-Id: <20240904030751.117579-14-rick.p.edgecombe@intel.com> In-Reply-To: <20240904030751.117579-1-rick.p.edgecombe@intel.com> References: <20240904030751.117579-1-rick.p.edgecombe@intel.com> Precedence: bulk MIME-Version: 1.0 Content-Transfer-Encoding: 8bit
Series	TDX MMU Part 2 \| expand [00/21] TDX MMU Part 2 [01/21] KVM: x86/mmu: Implement memslot deletion for TDX [02/21] KVM: x86/tdp_mmu: Add a helper function to walk down the TDP MMU [03/21] KVM: x86/mmu: Do not enable page track for TD guest [04/21] KVM: VMX: Split out guts of EPT violation to common/exposed function [05/21] KVM: VMX: Teach EPT violation helper about private mem [06/21] KVM: TDX: Add accessors VMX VMCS helpers [07/21] KVM: TDX: Add load_mmu_pgd method for TDX [08/21] KVM: TDX: Set gfn_direct_bits to shared bit [09/21] KVM: TDX: Retry seamcall when TDX_OPERAND_BUSY with operand SEPT [10/21] KVM: TDX: Require TDP MMU and mmio caching for TDX [11/21] KVM: x86/mmu: Add setter for shadow_mmio_value [12/21] KVM: TDX: Set per-VM shadow_mmio_value to 0 [13/21] KVM: TDX: Handle TLB tracking for TDX [14/21] KVM: TDX: Implement hooks to propagate changes of TDP MMU mirror page table [15/21] KVM: TDX: Implement hook to get max mapping level of private pages [16/21] KVM: TDX: Premap initial guest memory [17/21] KVM: TDX: MTRR: implement get_mt_mask() for TDX [18/21] KVM: x86/mmu: Export kvm_tdp_map_page() [19/21] KVM: TDX: Add an ioctl to create initial guest memory [20/21] KVM: TDX: Finalize VM initialization [21/21] KVM: TDX: Handle vCPU dissociation

diff --git a/arch/x86/kvm/vmx/main.c b/arch/x86/kvm/vmx/main.c index 2cc29d0fc279..1c86849680a3 100644 --- a/arch/x86/kvm/vmx/main.c +++ b/arch/x86/kvm/vmx/main.c @@ -101,6 +101,50 @@ static void vt_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) vmx_vcpu_reset(vcpu, init_event); } +static void vt_flush_tlb_all(struct kvm_vcpu *vcpu) +{ + /* + * TDX calls tdx_track() in tdx_sept_remove_private_spte() to ensure + * private EPT will be flushed on the next TD enter. + * No need to call tdx_track() here again even when this callback is as + * a result of zapping private EPT. + * Just invoke invept() directly here to work for both shared EPT and + * private EPT. + */ + if (is_td_vcpu(vcpu)) { + ept_sync_global(); + return; + } + + vmx_flush_tlb_all(vcpu); +} + +static void vt_flush_tlb_current(struct kvm_vcpu *vcpu) +{ + if (is_td_vcpu(vcpu)) { + tdx_flush_tlb_current(vcpu); + return; + } + + vmx_flush_tlb_current(vcpu); +} + +static void vt_flush_tlb_gva(struct kvm_vcpu *vcpu, gva_t addr) +{ + if (is_td_vcpu(vcpu)) + return; + + vmx_flush_tlb_gva(vcpu, addr); +} + +static void vt_flush_tlb_guest(struct kvm_vcpu *vcpu) +{ + if (is_td_vcpu(vcpu)) + return; + + vmx_flush_tlb_guest(vcpu); +} + static void vt_load_mmu_pgd(struct kvm_vcpu *vcpu, hpa_t root_hpa, int pgd_level) { @@ -190,10 +234,10 @@ struct kvm_x86_ops vt_x86_ops __initdata = { .set_rflags = vmx_set_rflags, .get_if_flag = vmx_get_if_flag, - .flush_tlb_all = vmx_flush_tlb_all, - .flush_tlb_current = vmx_flush_tlb_current, - .flush_tlb_gva = vmx_flush_tlb_gva, - .flush_tlb_guest = vmx_flush_tlb_guest, + .flush_tlb_all = vt_flush_tlb_all, + .flush_tlb_current = vt_flush_tlb_current, + .flush_tlb_gva = vt_flush_tlb_gva, + .flush_tlb_guest = vt_flush_tlb_guest, .vcpu_pre_run = vmx_vcpu_pre_run, .vcpu_run = vmx_vcpu_run, diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c index 9da71782660f..6feb3ab96926 100644 --- a/arch/x86/kvm/vmx/tdx.c +++ b/arch/x86/kvm/vmx/tdx.c @@ -6,6 +6,7 @@ #include "mmu.h" #include "tdx.h" #include "tdx_ops.h" +#include "vmx.h" #include "mmu/spte.h" #undef pr_fmt @@ -446,6 +447,51 @@ void tdx_load_mmu_pgd(struct kvm_vcpu *vcpu, hpa_t root_hpa, int pgd_level) td_vmcs_write64(to_tdx(vcpu), SHARED_EPT_POINTER, root_hpa); } +/* + * Ensure shared and private EPTs to be flushed on all vCPUs. + * tdh_mem_track() is the only caller that increases TD epoch. An increase in + * the TD epoch (e.g., to value "N + 1") is successful only if no vCPUs are + * running in guest mode with the value "N - 1". + * + * A successful execution of tdh_mem_track() ensures that vCPUs can only run in + * guest mode with TD epoch value "N" if no TD exit occurs after the TD epoch + * being increased to "N + 1". + * + * Kicking off all vCPUs after that further results in no vCPUs can run in guest + * mode with TD epoch value "N", which unblocks the next tdh_mem_track() (e.g. + * to increase TD epoch to "N + 2"). + * + * TDX module will flush EPT on the next TD enter and make vCPUs to run in + * guest mode with TD epoch value "N + 1". + * + * kvm_make_all_cpus_request() guarantees all vCPUs are out of guest mode by + * waiting empty IPI handler ack_kick(). + * + * No action is required to the vCPUs being kicked off since the kicking off + * occurs certainly after TD epoch increment and before the next + * tdh_mem_track(). + */ +static void __always_unused tdx_track(struct kvm *kvm) +{ + struct kvm_tdx *kvm_tdx = to_kvm_tdx(kvm); + u64 err; + + /* If TD isn't finalized, it's before any vcpu running. */ + if (unlikely(!is_td_finalized(kvm_tdx))) + return; + + lockdep_assert_held_write(&kvm->mmu_lock); + + do { + err = tdh_mem_track(kvm_tdx); + } while (unlikely((err & TDX_SEAMCALL_STATUS_MASK) == TDX_OPERAND_BUSY)); + + if (KVM_BUG_ON(err, kvm)) + pr_tdx_error(TDH_MEM_TRACK, err); + + kvm_make_all_cpus_request(kvm, KVM_REQ_OUTSIDE_GUEST_MODE); +} + static int tdx_get_capabilities(struct kvm_tdx_cmd *cmd) { const struct tdx_sys_info_td_conf *td_conf = &tdx_sysinfo->td_conf; @@ -947,6 +993,15 @@ static int tdx_td_init(struct kvm *kvm, struct kvm_tdx_cmd *cmd) return ret; } +void tdx_flush_tlb_current(struct kvm_vcpu *vcpu) +{ + /* + * flush_tlb_current() is used only the first time for the vcpu to run. + * As it isn't performance critical, keep this function simple. + */ + ept_sync_global(); +} + int tdx_vm_ioctl(struct kvm *kvm, void __user *argp) { struct kvm_tdx_cmd tdx_cmd; diff --git a/arch/x86/kvm/vmx/x86_ops.h b/arch/x86/kvm/vmx/x86_ops.h index dcf2b36efbb9..28fda93f0b27 100644 --- a/arch/x86/kvm/vmx/x86_ops.h +++ b/arch/x86/kvm/vmx/x86_ops.h @@ -131,6 +131,7 @@ void tdx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event); int tdx_vcpu_ioctl(struct kvm_vcpu *vcpu, void __user *argp); +void tdx_flush_tlb_current(struct kvm_vcpu *vcpu); void tdx_load_mmu_pgd(struct kvm_vcpu *vcpu, hpa_t root_hpa, int root_level); #else static inline int tdx_vm_init(struct kvm *kvm) { return -EOPNOTSUPP; } @@ -145,6 +146,7 @@ static inline void tdx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) {} static inline int tdx_vcpu_ioctl(struct kvm_vcpu *vcpu, void __user *argp) { return -EOPNOTSUPP; } +static inline void tdx_flush_tlb_current(struct kvm_vcpu *vcpu) {} static inline void tdx_load_mmu_pgd(struct kvm_vcpu *vcpu, hpa_t root_hpa, int root_level) {} #endif

[13/21] KVM: TDX: Handle TLB tracking for TDX

Commit Message

Comments

Patch