diff mbox series

[v2,07/31] KVM: x86: hyper-v: Create a separate ring for Direct TLB flush

Message ID 20220407155645.940890-8-vkuznets@redhat.com (mailing list archive)
State New, archived
Headers show
Series KVM: x86: hyper-v: Fine-grained TLB flush + Direct TLB flush feature | expand

Commit Message

Vitaly Kuznetsov April 7, 2022, 3:56 p.m. UTC
To handle Direct TLB flush requests from L2 KVM needs to use a
separate ring from regular Hyper-V TLB flush requests: e.g. when a
request to flush something in L2 is made, the target vCPU can
transition from L2 to L1, receive a request to flush a GVA for L1 and
then try to enter L2 back. The first request needs to be processed
then. Similarly, requests to flush GVAs in L1 must wait until L2
exits to L1.

No functional change yet as KVM doesn't handle Direct TLB flush
requests from L2 yet.

Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
---
 arch/x86/include/asm/kvm_host.h |  3 ++-
 arch/x86/kvm/hyperv.c           |  7 ++++---
 arch/x86/kvm/hyperv.h           | 17 ++++++++++++++---
 3 files changed, 20 insertions(+), 7 deletions(-)

Comments

Sean Christopherson April 7, 2022, 5:57 p.m. UTC | #1
On Thu, Apr 07, 2022, Vitaly Kuznetsov wrote:
> To handle Direct TLB flush requests from L2 KVM needs to use a
> separate ring from regular Hyper-V TLB flush requests: e.g. when a
> request to flush something in L2 is made, the target vCPU can
> transition from L2 to L1, receive a request to flush a GVA for L1 and
> then try to enter L2 back. The first request needs to be processed
> then. Similarly, requests to flush GVAs in L1 must wait until L2
> exits to L1.
> 
> No functional change yet as KVM doesn't handle Direct TLB flush
> requests from L2 yet.
> 
> Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
> ---
>  arch/x86/include/asm/kvm_host.h |  3 ++-
>  arch/x86/kvm/hyperv.c           |  7 ++++---
>  arch/x86/kvm/hyperv.h           | 17 ++++++++++++++---
>  3 files changed, 20 insertions(+), 7 deletions(-)
> 
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index 15d798fe280d..b8d7c1422da6 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -617,7 +617,8 @@ struct kvm_vcpu_hv {
>  		u32 syndbg_cap_eax; /* HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES.EAX */
>  	} cpuid_cache;
>  
> -	struct kvm_vcpu_hv_tlbflush_ring tlb_flush_ring;

Probably feedback for a prior patch, but please be consistent in tlbflush vs
tlb_flush.  I prefer the tlb_flush variant.

> +	/* Two rings for regular Hyper-V TLB flush and Direct TLB flush */
> +	struct kvm_vcpu_hv_tlbflush_ring tlb_flush_ring[2];

Use an enum, then the magic numbers go away, e.g.

enum hv_tlb_flush_rings {
	HV_L1_TLB_FLUSH_RING,
	HV_L2_TLB_FLUSH_RING,
	HV_NR_TLB_FLUSH_RINGS,
}

>  };
>  
>  /* Xen HVM per vcpu emulation context */
> diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
> index 918642bcdbd0..16cbf41b5b7b 100644
> --- a/arch/x86/kvm/hyperv.c
> +++ b/arch/x86/kvm/hyperv.c
> @@ -956,7 +956,8 @@ static int kvm_hv_vcpu_init(struct kvm_vcpu *vcpu)
>  
>  	hv_vcpu->vp_index = vcpu->vcpu_idx;
>  
> -	spin_lock_init(&hv_vcpu->tlb_flush_ring.write_lock);
> +	spin_lock_init(&hv_vcpu->tlb_flush_ring[0].write_lock);
> +	spin_lock_init(&hv_vcpu->tlb_flush_ring[1].write_lock);

Or

	for (i = 0; i < ARRAY_SIZE(&hv_vcpu->tlb_flush_ring); i++)
		spin_lock_init(&hv_vcpu->tlb_flush_ring[i].write_lock)

or replace ARRAY_SIZE() with HV_NR_TLB_FLUSH_RINGS.

>  
>  	return 0;
>  }
> @@ -1860,7 +1861,7 @@ static void hv_tlb_flush_ring_enqueue(struct kvm_vcpu *vcpu, bool flush_all,
>  	if (!hv_vcpu)
>  		return;
>  
> -	tlb_flush_ring = &hv_vcpu->tlb_flush_ring;
> +	tlb_flush_ring = &hv_vcpu->tlb_flush_ring[0];

The [0] is gross, and it only gets worse in future patches that take @direct,
though this is slightly less gross:

	/* Here's a comment explaining why this is hardcoded to L1's ring. */
	tlb_flush_ring = &hv_vcpu->tlb_flush_ring[HV_L1_TLB_FLUSH_RING];

More thoughts in the patch that adds @direct.

>  	spin_lock_irqsave(&tlb_flush_ring->write_lock, flags);
>  
> @@ -1920,7 +1921,7 @@ void kvm_hv_vcpu_flush_tlb(struct kvm_vcpu *vcpu)
>  		return;
>  	}
>  
> -	tlb_flush_ring = &hv_vcpu->tlb_flush_ring;
> +	tlb_flush_ring = kvm_hv_get_tlb_flush_ring(vcpu);
>  	read_idx = READ_ONCE(tlb_flush_ring->read_idx);
>  	write_idx = READ_ONCE(tlb_flush_ring->write_idx);
>  
> diff --git a/arch/x86/kvm/hyperv.h b/arch/x86/kvm/hyperv.h
> index 6847caeaaf84..448877b478ef 100644
> --- a/arch/x86/kvm/hyperv.h
> +++ b/arch/x86/kvm/hyperv.h
> @@ -22,6 +22,7 @@
>  #define __ARCH_X86_KVM_HYPERV_H__
>  
>  #include <linux/kvm_host.h>
> +#include "x86.h"
>  
>  /*
>   * The #defines related to the synthetic debugger are required by KDNet, but
> @@ -147,15 +148,25 @@ int kvm_vm_ioctl_hv_eventfd(struct kvm *kvm, struct kvm_hyperv_eventfd *args);
>  int kvm_get_hv_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid2 *cpuid,
>  		     struct kvm_cpuid_entry2 __user *entries);
>  
> +static inline struct kvm_vcpu_hv_tlbflush_ring *kvm_hv_get_tlb_flush_ring(struct kvm_vcpu *vcpu)
> +{
> +	struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu);
> +
> +	if (!is_guest_mode(vcpu))
> +		return &hv_vcpu->tlb_flush_ring[0];

Maybe this?

	int i = !is_guest_mode(vcpu) ? HV_L1_TLB_FLUSH_RING :
				       HV_L2_TLB_FLUSH_RING;

	return &hv_vcpu->tlb_flush_ring[i];

Though shouldn't this be a WARN condition as of this patch?  I.e. shouldn't it be
impossible to request a flush for L2 at this point?

> +
> +	return &hv_vcpu->tlb_flush_ring[1];
> +}
>  
>  static inline void kvm_hv_vcpu_empty_flush_tlb(struct kvm_vcpu *vcpu)
>  {
> -	struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu);
> +	struct kvm_vcpu_hv_tlbflush_ring *tlb_flush_ring;
>  
> -	if (!hv_vcpu)
> +	if (!to_hv_vcpu(vcpu))
>  		return;
>  
> -	hv_vcpu->tlb_flush_ring.read_idx = hv_vcpu->tlb_flush_ring.write_idx;
> +	tlb_flush_ring = kvm_hv_get_tlb_flush_ring(vcpu);
> +	tlb_flush_ring->read_idx = tlb_flush_ring->write_idx;
>  }
>  void kvm_hv_vcpu_flush_tlb(struct kvm_vcpu *vcpu);
>  
> -- 
> 2.35.1
>
diff mbox series

Patch

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 15d798fe280d..b8d7c1422da6 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -617,7 +617,8 @@  struct kvm_vcpu_hv {
 		u32 syndbg_cap_eax; /* HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES.EAX */
 	} cpuid_cache;
 
-	struct kvm_vcpu_hv_tlbflush_ring tlb_flush_ring;
+	/* Two rings for regular Hyper-V TLB flush and Direct TLB flush */
+	struct kvm_vcpu_hv_tlbflush_ring tlb_flush_ring[2];
 };
 
 /* Xen HVM per vcpu emulation context */
diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index 918642bcdbd0..16cbf41b5b7b 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -956,7 +956,8 @@  static int kvm_hv_vcpu_init(struct kvm_vcpu *vcpu)
 
 	hv_vcpu->vp_index = vcpu->vcpu_idx;
 
-	spin_lock_init(&hv_vcpu->tlb_flush_ring.write_lock);
+	spin_lock_init(&hv_vcpu->tlb_flush_ring[0].write_lock);
+	spin_lock_init(&hv_vcpu->tlb_flush_ring[1].write_lock);
 
 	return 0;
 }
@@ -1860,7 +1861,7 @@  static void hv_tlb_flush_ring_enqueue(struct kvm_vcpu *vcpu, bool flush_all,
 	if (!hv_vcpu)
 		return;
 
-	tlb_flush_ring = &hv_vcpu->tlb_flush_ring;
+	tlb_flush_ring = &hv_vcpu->tlb_flush_ring[0];
 
 	spin_lock_irqsave(&tlb_flush_ring->write_lock, flags);
 
@@ -1920,7 +1921,7 @@  void kvm_hv_vcpu_flush_tlb(struct kvm_vcpu *vcpu)
 		return;
 	}
 
-	tlb_flush_ring = &hv_vcpu->tlb_flush_ring;
+	tlb_flush_ring = kvm_hv_get_tlb_flush_ring(vcpu);
 	read_idx = READ_ONCE(tlb_flush_ring->read_idx);
 	write_idx = READ_ONCE(tlb_flush_ring->write_idx);
 
diff --git a/arch/x86/kvm/hyperv.h b/arch/x86/kvm/hyperv.h
index 6847caeaaf84..448877b478ef 100644
--- a/arch/x86/kvm/hyperv.h
+++ b/arch/x86/kvm/hyperv.h
@@ -22,6 +22,7 @@ 
 #define __ARCH_X86_KVM_HYPERV_H__
 
 #include <linux/kvm_host.h>
+#include "x86.h"
 
 /*
  * The #defines related to the synthetic debugger are required by KDNet, but
@@ -147,15 +148,25 @@  int kvm_vm_ioctl_hv_eventfd(struct kvm *kvm, struct kvm_hyperv_eventfd *args);
 int kvm_get_hv_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid2 *cpuid,
 		     struct kvm_cpuid_entry2 __user *entries);
 
+static inline struct kvm_vcpu_hv_tlbflush_ring *kvm_hv_get_tlb_flush_ring(struct kvm_vcpu *vcpu)
+{
+	struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu);
+
+	if (!is_guest_mode(vcpu))
+		return &hv_vcpu->tlb_flush_ring[0];
+
+	return &hv_vcpu->tlb_flush_ring[1];
+}
 
 static inline void kvm_hv_vcpu_empty_flush_tlb(struct kvm_vcpu *vcpu)
 {
-	struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu);
+	struct kvm_vcpu_hv_tlbflush_ring *tlb_flush_ring;
 
-	if (!hv_vcpu)
+	if (!to_hv_vcpu(vcpu))
 		return;
 
-	hv_vcpu->tlb_flush_ring.read_idx = hv_vcpu->tlb_flush_ring.write_idx;
+	tlb_flush_ring = kvm_hv_get_tlb_flush_ring(vcpu);
+	tlb_flush_ring->read_idx = tlb_flush_ring->write_idx;
 }
 void kvm_hv_vcpu_flush_tlb(struct kvm_vcpu *vcpu);