diff mbox series

[v2,16/31] KVM: nVMX: hyper-v: Direct TLB flush

Message ID 20220407155645.940890-17-vkuznets@redhat.com (mailing list archive)
State New, archived
Headers show
Series KVM: x86: hyper-v: Fine-grained TLB flush + Direct TLB flush feature | expand

Commit Message

Vitaly Kuznetsov April 7, 2022, 3:56 p.m. UTC
Enable Direct TLB flush feature on nVMX when:
- Enlightened VMCS is in use.
- Direct TLB flush flag is enabled in eVMCS.
- Direct TLB flush is enabled in partition assist page.

Perform synthetic vmexit to L1 after processing TLB flush call upon
request (HV_VMX_SYNTHETIC_EXIT_REASON_TRAP_AFTER_FLUSH).

Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
---
 arch/x86/kvm/vmx/evmcs.c  | 20 ++++++++++++++++++++
 arch/x86/kvm/vmx/evmcs.h  |  3 +++
 arch/x86/kvm/vmx/nested.c | 16 ++++++++++++++++
 3 files changed, 39 insertions(+)

Comments

Sean Christopherson April 7, 2022, 6:47 p.m. UTC | #1
On Thu, Apr 07, 2022, Vitaly Kuznetsov wrote:
> Enable Direct TLB flush feature on nVMX when:
> - Enlightened VMCS is in use.
> - Direct TLB flush flag is enabled in eVMCS.
> - Direct TLB flush is enabled in partition assist page.

Yeah, KVM definitely needs a different name for "Direct TLB flush".  I don't have
any good ideas offhand, but honestly anything is better than "Direct".

> Perform synthetic vmexit to L1 after processing TLB flush call upon
> request (HV_VMX_SYNTHETIC_EXIT_REASON_TRAP_AFTER_FLUSH).
> 
> Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
> ---

...

> diff --git a/arch/x86/kvm/vmx/evmcs.h b/arch/x86/kvm/vmx/evmcs.h
> index 8862692a4c5d..ab0949c22d2d 100644
> --- a/arch/x86/kvm/vmx/evmcs.h
> +++ b/arch/x86/kvm/vmx/evmcs.h
> @@ -65,6 +65,8 @@ DECLARE_STATIC_KEY_FALSE(enable_evmcs);
>  #define EVMCS1_UNSUPPORTED_VMENTRY_CTRL (VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL)
>  #define EVMCS1_UNSUPPORTED_VMFUNC (VMX_VMFUNC_EPTP_SWITCHING)
>  
> +#define HV_VMX_SYNTHETIC_EXIT_REASON_TRAP_AFTER_FLUSH 0x10000031

LOL, I guess I have to appreciate the cleverness.  Bit 28 is cleared for all
exits except when using an SMI transfer monitor, and then it's set only if MTF
is pending.

  The remainder of the field (bits 31:28 and bits 26:16) is cleared to 0 (certain
  SMM VM exits may set some of these bits; see Section 31.15.2.3).

  If the SMM VM exit occurred in VMX non-root operation and an MTF VM exit was
  pending, bit 28 of the exit-reason field is set; otherwise, it is cleared.

So despite all appearances, Microsoft didn't actually steal a bit from Intel,
they're just abusing a bit that (a) will never be set so long as the VMM doesn't
use parallel SMM and (b) architecturally can't be set in conjuction with many
exit reasons (everything that's _not_ some form of SMI).

Can you add a comment note to document this?

/*
 * Note, Hyper-V isn't actually stealing bit 28 from Intel, just abusing it by
 * pairing it with architecturally impossible exit reasons.  Bit 28 is set only
 * on SMI exits to a SMI tranfer monitor (STM) and if and only if a MTF VM-Exit
 * is pending.  I.e. it will never be set by hardware for non-SMI exits (there
 * are only three), nor will it ever be set unless the VMM is an STM.
 */

>  struct evmcs_field {
>  	u16 offset;
>  	u16 clean_field;
> @@ -244,6 +246,7 @@ int nested_enable_evmcs(struct kvm_vcpu *vcpu,
>  			uint16_t *vmcs_version);
>  void nested_evmcs_filter_control_msr(u32 msr_index, u64 *pdata);
>  int nested_evmcs_check_controls(struct vmcs12 *vmcs12);
> +bool nested_evmcs_direct_flush_enabled(struct kvm_vcpu *vcpu);
Vitaly Kuznetsov April 11, 2022, 11:19 a.m. UTC | #2
Sean Christopherson <seanjc@google.com> writes:

> On Thu, Apr 07, 2022, Vitaly Kuznetsov wrote:
>> Enable Direct TLB flush feature on nVMX when:
>> - Enlightened VMCS is in use.
>> - Direct TLB flush flag is enabled in eVMCS.
>> - Direct TLB flush is enabled in partition assist page.
>
> Yeah, KVM definitely needs a different name for "Direct TLB flush".  I don't have
> any good ideas offhand, but honestly anything is better than "Direct".
>

I think we can get away without a name inside KVM, we'll be doing either
'L1 TLB flush' or 'L2 TLB flush'. In QEMU we can still use 'Direct' I
believe as it matches TLFS and doesn't intersect with KVM's MMU.
diff mbox series

Patch

diff --git a/arch/x86/kvm/vmx/evmcs.c b/arch/x86/kvm/vmx/evmcs.c
index 1705c4973636..cdf7ec5cb64c 100644
--- a/arch/x86/kvm/vmx/evmcs.c
+++ b/arch/x86/kvm/vmx/evmcs.c
@@ -6,6 +6,7 @@ 
 #include "../hyperv.h"
 #include "../cpuid.h"
 #include "evmcs.h"
+#include "nested.h"
 #include "vmcs.h"
 #include "vmx.h"
 #include "trace.h"
@@ -438,6 +439,25 @@  int nested_enable_evmcs(struct kvm_vcpu *vcpu,
 	return 0;
 }
 
+bool nested_evmcs_direct_flush_enabled(struct kvm_vcpu *vcpu)
+{
+	struct vcpu_vmx *vmx = to_vmx(vcpu);
+	struct hv_enlightened_vmcs *evmcs = vmx->nested.hv_evmcs;
+	struct hv_vp_assist_page assist_page;
+
+	if (!evmcs)
+		return false;
+
+	if (!evmcs->hv_enlightenments_control.nested_flush_hypercall)
+		return false;
+
+	if (unlikely(!kvm_hv_get_assist_page(vcpu, &assist_page)))
+		return false;
+
+	return assist_page.nested_control.features.directhypercall;
+}
+
 void vmx_post_hv_direct_flush(struct kvm_vcpu *vcpu)
 {
+	nested_vmx_vmexit(vcpu, HV_VMX_SYNTHETIC_EXIT_REASON_TRAP_AFTER_FLUSH, 0, 0);
 }
diff --git a/arch/x86/kvm/vmx/evmcs.h b/arch/x86/kvm/vmx/evmcs.h
index 8862692a4c5d..ab0949c22d2d 100644
--- a/arch/x86/kvm/vmx/evmcs.h
+++ b/arch/x86/kvm/vmx/evmcs.h
@@ -65,6 +65,8 @@  DECLARE_STATIC_KEY_FALSE(enable_evmcs);
 #define EVMCS1_UNSUPPORTED_VMENTRY_CTRL (VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL)
 #define EVMCS1_UNSUPPORTED_VMFUNC (VMX_VMFUNC_EPTP_SWITCHING)
 
+#define HV_VMX_SYNTHETIC_EXIT_REASON_TRAP_AFTER_FLUSH 0x10000031
+
 struct evmcs_field {
 	u16 offset;
 	u16 clean_field;
@@ -244,6 +246,7 @@  int nested_enable_evmcs(struct kvm_vcpu *vcpu,
 			uint16_t *vmcs_version);
 void nested_evmcs_filter_control_msr(u32 msr_index, u64 *pdata);
 int nested_evmcs_check_controls(struct vmcs12 *vmcs12);
+bool nested_evmcs_direct_flush_enabled(struct kvm_vcpu *vcpu);
 void vmx_post_hv_direct_flush(struct kvm_vcpu *vcpu);
 
 #endif /* __KVM_X86_VMX_EVMCS_H */
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index 7dd4104cfdf4..d53d0cfe1df1 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -1171,6 +1171,17 @@  static void nested_vmx_transition_tlb_flush(struct kvm_vcpu *vcpu,
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 
+	/*
+	 * KVM_REQ_HV_TLB_FLUSH flushes entries from either L1's VPID or
+	 * L2's VPID upon request from the guest. Make sure we check for
+	 * pending entries for the case when the request got misplaced (e.g.
+	 * a transition from L2->L1 happened while processing Direct TLB flush
+	 * request or vice versa). kvm_hv_vcpu_flush_tlb() will not flush
+	 * anything if there are no requests in the corresponding buffer.
+	 */
+	if (to_hv_vcpu(vcpu))
+		kvm_make_request(KVM_REQ_HV_TLB_FLUSH, vcpu);
+
 	/*
 	 * If vmcs12 doesn't use VPID, L1 expects linear and combined mappings
 	 * for *all* contexts to be flushed on VM-Enter/VM-Exit, i.e. it's a
@@ -5975,6 +5986,11 @@  static bool nested_vmx_l0_wants_exit(struct kvm_vcpu *vcpu,
 		 * Handle L2's bus locks in L0 directly.
 		 */
 		return true;
+	case EXIT_REASON_VMCALL:
+		/* Hyper-V Direct TLB flush hypercall is handled by L0 */
+		return kvm_hv_direct_tlb_flush_exposed(vcpu) &&
+			nested_evmcs_direct_flush_enabled(vcpu) &&
+			kvm_hv_is_tlb_flush_hcall(vcpu);
 	default:
 		break;
 	}