diff mbox series

[06/10] KVM: x86: Move "flush guest's TLB" logic to separate kvm_x86_ops hook

Message ID 20200220204356.8837-7-sean.j.christopherson@intel.com (mailing list archive)
State New, archived
Headers show
Series KVM: x86: Clean up VMX's TLB flushing code | expand

Commit Message

Sean Christopherson Feb. 20, 2020, 8:43 p.m. UTC
Add a dedicated hook to handle flushing TLB entries on behalf of the
guest, i.e. for a paravirtualized TLB flush, and use it directly instead
of bouncing through kvm_vcpu_flush_tlb().  Change the effective VMX
implementation to never do INVEPT, i.e. to always flush via INVVPID.
The INVEPT performed by __vmx_flush_tlb() when @invalidate_gpa=false and
enable_vpid=0 is unnecessary, as it will only flush GPA->HPA mappings;
GVA->GPA and GVA->HPA translations are flushed by VM-Enter when VPID is
disabled, and changes in the guest pages tables only affect GVA->*PA
mappings.

When EPT and VPID are enabled, doing INVVPID is not required (by Intel's
architecture) to invalidate GPA mappings, i.e. TLB entries that cache
GPA->HPA translations can live across INVVPID as GPA->HPA mappings are
associated with an EPTP, not a VPID.  The intent of @invalidate_gpa is
to inform vmx_flush_tlb() that it needs to "invalidate gpa mappings",
i.e. do INVEPT and not simply INVVPID.  Other than nested VPID handling,
which now calls vpid_sync_context() directly, the only scenario where
KVM can safely do INVVPID instead of INVEPT (when EPT is enabled) is if
KVM is flushing TLB entries from the guest's perspective, i.e. is
invalidating GLA->GPA mappings.

Adding a dedicated ->tlb_flush_guest() paves the way toward removing
@invalidate_gpa, which is a potentially dangerous control flag as its
meaning is not exactly crystal clear, even for those who are familiar
with the subtleties of what mappings Intel CPUs are/aren't allowed to
keep across various invalidation scenarios.

Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
---
 arch/x86/include/asm/kvm_host.h |  6 ++++++
 arch/x86/kvm/svm.c              |  6 ++++++
 arch/x86/kvm/vmx/vmx.c          | 13 +++++++++++++
 arch/x86/kvm/x86.c              |  2 +-
 4 files changed, 26 insertions(+), 1 deletion(-)

Comments

Paolo Bonzini Feb. 21, 2020, 5:31 p.m. UTC | #1
On 21/02/20 14:52, Vitaly Kuznetsov wrote:
>> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
>> index fbabb2f06273..72f7ca4baa6d 100644
>> --- a/arch/x86/kvm/x86.c
>> +++ b/arch/x86/kvm/x86.c
>> @@ -2675,7 +2675,7 @@ static void record_steal_time(struct kvm_vcpu *vcpu)
>>  	trace_kvm_pv_tlb_flush(vcpu->vcpu_id,
>>  		st->preempted & KVM_VCPU_FLUSH_TLB);
>>  	if (xchg(&st->preempted, 0) & KVM_VCPU_FLUSH_TLB)
>> -		kvm_vcpu_flush_tlb(vcpu, false);
>> +		kvm_x86_ops->tlb_flush_guest(vcpu);
>>  
>>  	vcpu->arch.st.preempted = 0;
> There is one additional place in hyperv.c where we do TLB flush on
> behalf of the guest, kvm_hv_flush_tlb(). Currently, it does
> KVM_REQ_TLB_FLUSH (resulting in kvm_x86_ops->tlb_flush()), do we need
> something like KVM_REQ_TLB_FLUSH_GUEST instead?

Yes, that would be better since INVEPT does not flush linear mappings.
So, when EPT and VPID is enabled, KVM_REQ_TLB_FLUSH would not flush the
guest's translations.

Paolo
Paolo Bonzini Feb. 21, 2020, 5:32 p.m. UTC | #2
On 20/02/20 21:43, Sean Christopherson wrote:
> Add a dedicated hook to handle flushing TLB entries on behalf of the
> guest, i.e. for a paravirtualized TLB flush, and use it directly instead
> of bouncing through kvm_vcpu_flush_tlb().  Change the effective VMX
> implementation to never do INVEPT, i.e. to always flush via INVVPID.
> The INVEPT performed by __vmx_flush_tlb() when @invalidate_gpa=false and
> enable_vpid=0 is unnecessary, as it will only flush GPA->HPA mappings;
> GVA->GPA and GVA->HPA translations are flushed by VM-Enter when VPID is
> disabled, and changes in the guest pages tables only affect GVA->*PA
> mappings.
> 
> When EPT and VPID are enabled, doing INVVPID is not required (by Intel's
> architecture) to invalidate GPA mappings, i.e. TLB entries that cache
> GPA->HPA translations can live across INVVPID as GPA->HPA mappings are
> associated with an EPTP, not a VPID.  The intent of @invalidate_gpa is
> to inform vmx_flush_tlb() that it needs to "invalidate gpa mappings",
> i.e. do INVEPT and not simply INVVPID.  Other than nested VPID handling,
> which now calls vpid_sync_context() directly, the only scenario where
> KVM can safely do INVVPID instead of INVEPT (when EPT is enabled) is if
> KVM is flushing TLB entries from the guest's perspective, i.e. is
> invalidating GLA->GPA mappings.

Since you need a v2, can you replace the name of mappings with "linear",
"guest-physical" and "combined" as in the SDM?  It takes a little to get
used to them but it avoids three-letter acronym soup.

Paolo
diff mbox series

Patch

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 4dffbc10d3f8..86aed64b9a88 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1108,6 +1108,12 @@  struct kvm_x86_ops {
 	 */
 	void (*tlb_flush_gva)(struct kvm_vcpu *vcpu, gva_t addr);
 
+	/*
+	 * Flush any TLB entries created by the guest.  Like tlb_flush_gva(),
+	 * does not need to flush GPA->HPA mappings.
+	 */
+	void (*tlb_flush_guest)(struct kvm_vcpu *vcpu);
+
 	void (*run)(struct kvm_vcpu *vcpu);
 	int (*handle_exit)(struct kvm_vcpu *vcpu,
 		enum exit_fastpath_completion exit_fastpath);
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index a3e32d61d60c..e549811f51c6 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -5608,6 +5608,11 @@  static void svm_flush_tlb_gva(struct kvm_vcpu *vcpu, gva_t gva)
 	invlpga(gva, svm->vmcb->control.asid);
 }
 
+static void svm_flush_tlb_guest(struct kvm_vcpu *vcpu)
+{
+	svm_flush_tlb(vcpu, true);
+}
+
 static void svm_prepare_guest_switch(struct kvm_vcpu *vcpu)
 {
 }
@@ -7429,6 +7434,7 @@  static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
 
 	.tlb_flush = svm_flush_tlb,
 	.tlb_flush_gva = svm_flush_tlb_gva,
+	.tlb_flush_guest = svm_flush_tlb_guest,
 
 	.run = svm_vcpu_run,
 	.handle_exit = handle_exit,
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 349a6e054e0e..5372a93e1727 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -2835,6 +2835,18 @@  static void vmx_flush_tlb_gva(struct kvm_vcpu *vcpu, gva_t addr)
 	 */
 }
 
+static void vmx_flush_tlb_guest(struct kvm_vcpu *vcpu)
+{
+	/*
+	 * vpid_sync_context() is a nop if vmx->vpid==0, e.g. if enable_vpid==0
+	 * or a vpid couldn't be allocated for this vCPU.  VM-Enter and VM-Exit
+	 * are required to flush GVA->{G,H}PA mappings from the TLB if vpid is
+	 * disabled (VM-Enter with vpid enabled and vpid==0 is disallowed),
+	 * i.e. no explicit INVVPID is necessary.
+	 */
+	vpid_sync_context(to_vmx(vcpu)->vpid);
+}
+
 static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu)
 {
 	ulong cr0_guest_owned_bits = vcpu->arch.cr0_guest_owned_bits;
@@ -7779,6 +7791,7 @@  static struct kvm_x86_ops vmx_x86_ops __ro_after_init = {
 
 	.tlb_flush = vmx_flush_tlb,
 	.tlb_flush_gva = vmx_flush_tlb_gva,
+	.tlb_flush_guest = vmx_flush_tlb_guest,
 
 	.run = vmx_vcpu_run,
 	.handle_exit = vmx_handle_exit,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index fbabb2f06273..72f7ca4baa6d 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2675,7 +2675,7 @@  static void record_steal_time(struct kvm_vcpu *vcpu)
 	trace_kvm_pv_tlb_flush(vcpu->vcpu_id,
 		st->preempted & KVM_VCPU_FLUSH_TLB);
 	if (xchg(&st->preempted, 0) & KVM_VCPU_FLUSH_TLB)
-		kvm_vcpu_flush_tlb(vcpu, false);
+		kvm_x86_ops->tlb_flush_guest(vcpu);
 
 	vcpu->arch.st.preempted = 0;