diff mbox series

[v10,03/39] KVM: x86: hyper-v: Introduce TLB flush fifo

Message ID 20220921152436.3673454-4-vkuznets@redhat.com (mailing list archive)
State New, archived
Headers show
Series KVM: x86: hyper-v: Fine-grained TLB flush + L2 TLB flush features | expand

Commit Message

Vitaly Kuznetsov Sept. 21, 2022, 3:24 p.m. UTC
To allow flushing individual GVAs instead of always flushing the whole
VPID a per-vCPU structure to pass the requests is needed. Use standard
'kfifo' to queue two types of entries: individual GVA (GFN + up to 4095
following GFNs in the lower 12 bits) and 'flush all'.

The size of the fifo is arbitrary set to '16'.

Note, kvm_hv_flush_tlb() only queues 'flush all' entries for now and
kvm_hv_vcpu_flush_tlb() doesn't actually read the fifo just resets the
queue before doing full TLB flush so the functional change is very
small but the infrastructure is prepared to handle individual GVA
flush requests.

Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
---
 arch/x86/include/asm/kvm_host.h | 20 ++++++++++++++
 arch/x86/kvm/hyperv.c           | 46 +++++++++++++++++++++++++++++++++
 arch/x86/kvm/hyperv.h           | 16 ++++++++++++
 arch/x86/kvm/x86.c              |  8 +++---
 arch/x86/kvm/x86.h              |  1 +
 5 files changed, 87 insertions(+), 4 deletions(-)

Comments

Sean Christopherson Sept. 21, 2022, 4:56 p.m. UTC | #1
On Wed, Sep 21, 2022, Vitaly Kuznetsov wrote:
> To allow flushing individual GVAs instead of always flushing the whole
> VPID a per-vCPU structure to pass the requests is needed. Use standard
> 'kfifo' to queue two types of entries: individual GVA (GFN + up to 4095
> following GFNs in the lower 12 bits) and 'flush all'.
> 
> The size of the fifo is arbitrary set to '16'.

s/arbitrary/arbitrarily

> +static void hv_tlb_flush_enqueue(struct kvm_vcpu *vcpu)
> +{
> +	struct kvm_vcpu_hv_tlb_flush_fifo *tlb_flush_fifo;
> +	struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu);
> +	u64 flush_all_entry = KVM_HV_TLB_FLUSHALL_ENTRY;
> +
> +	if (!hv_vcpu)
> +		return;
> +
> +	tlb_flush_fifo = &hv_vcpu->tlb_flush_fifo;
> +
> +	kfifo_in_spinlocked(&tlb_flush_fifo->entries, &flush_all_entry,
> +			    1, &tlb_flush_fifo->write_lock);

Unless I'm missing something, there's no need to disable IRQs, i.e. this can be
kfifo_in_spinlocked_noirqsave() and the later patch can use spin_lock() instead
of spin_lock_irqsave().  The only calls to hv_tlb_flush_enqueue() are from
kvm_hv_hypercall(), i.e. it's always called from process context.
  
> diff --git a/arch/x86/kvm/hyperv.h b/arch/x86/kvm/hyperv.h
> index 1030b1b50552..ac30091ab346 100644
> --- a/arch/x86/kvm/hyperv.h
> +++ b/arch/x86/kvm/hyperv.h
> @@ -151,4 +151,20 @@ int kvm_vm_ioctl_hv_eventfd(struct kvm *kvm, struct kvm_hyperv_eventfd *args);
>  int kvm_get_hv_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid2 *cpuid,
>  		     struct kvm_cpuid_entry2 __user *entries);
>  
> +

Unnecessary newline.

> +static inline void kvm_hv_vcpu_empty_flush_tlb(struct kvm_vcpu *vcpu)

What about "reset" or "purge" instead of "empty"?  "empty" is often used as query,
e.g. list_empty(), it took me a second to realize this is a command.

> +{
> +	struct kvm_vcpu_hv_tlb_flush_fifo *tlb_flush_fifo;
> +	struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu);
> +
> +	if (!hv_vcpu || !kvm_check_request(KVM_REQ_HV_TLB_FLUSH, vcpu))
> +		return;
> +
> +	tlb_flush_fifo = &hv_vcpu->tlb_flush_fifo;
> +
> +	kfifo_reset_out(&tlb_flush_fifo->entries);
> +}

Missing newline.

> +void kvm_hv_vcpu_flush_tlb(struct kvm_vcpu *vcpu);
> +
> +

One too many newlines.

>  #endif
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 86504a8bfd9a..45c35c5467f8 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -3385,7 +3385,7 @@ static void kvm_vcpu_flush_tlb_all(struct kvm_vcpu *vcpu)
>  	static_call(kvm_x86_flush_tlb_all)(vcpu);
>  }
>  
> -static void kvm_vcpu_flush_tlb_guest(struct kvm_vcpu *vcpu)
> +void kvm_vcpu_flush_tlb_guest(struct kvm_vcpu *vcpu)
>  {
>  	++vcpu->stat.tlb_flush;
>  
> @@ -3420,14 +3420,14 @@ void kvm_service_local_tlb_flush_requests(struct kvm_vcpu *vcpu)
>  {
>  	if (kvm_check_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu)) {
>  		kvm_vcpu_flush_tlb_current(vcpu);
> -		kvm_clear_request(KVM_REQ_HV_TLB_FLUSH, vcpu);
> +		kvm_hv_vcpu_empty_flush_tlb(vcpu);

It might be worth adding a comment to call out that emptying the FIFO _after_ the
TLB flush is ok, because it's impossible for the CPU to insert TLB entries for the
guest while running in the host.  At first glance, it looks like this (and the
existing similar pattern in vcpu_enter_guest()) has a race where it could miss a
TLB flush.

Definitely not required, e.g. kvm_vcpu_flush_tlb_all() doesn't have a similar
comment.  I think it's just the existence of the FIFO that made me pause.

>  	}
>  
>  	if (kvm_check_request(KVM_REQ_TLB_FLUSH_GUEST, vcpu)) {
>  		kvm_vcpu_flush_tlb_guest(vcpu);
> -		kvm_clear_request(KVM_REQ_HV_TLB_FLUSH, vcpu);
> +		kvm_hv_vcpu_empty_flush_tlb(vcpu);
>  	} else if (kvm_check_request(KVM_REQ_HV_TLB_FLUSH, vcpu)) {
> -		kvm_vcpu_flush_tlb_guest(vcpu);
> +		kvm_hv_vcpu_flush_tlb(vcpu);

Rather than expose kvm_vcpu_flush_tlb_guest() to Hyper-V, what about implementing
this in a similar way to how way KVM-on-HyperV implements remote TLB flushes?  I.e.
fall back to kvm_vcpu_flush_tlb_guest() if the precise flush "fails".

I don't mind exposing kvm_vcpu_flush_tlb_guest(), but burying the calls inside
Hyper-V code makes it difficult to see the relationship between KVM_REQ_HV_TLB_FLUSH
and KVM_REQ_TLB_FLUSH_GUEST.

And as a minor bonus, that also helps document that kvm_hv_vcpu_flush_tlb() doesn't
yet support precise flushing.

E.g.

	if (kvm_check_request(KVM_REQ_TLB_FLUSH_GUEST, vcpu)) {
		kvm_vcpu_flush_tlb_guest(vcpu);
	} else if (kvm_check_request(KVM_REQ_HV_TLB_FLUSH, vcpu)) {
		/*
		 * Fall back to a "full" guest flush if Hyper-V's precise
		 * flushing fails.
		 */
		if (kvm_hv_vcpu_flush_tlb(vcpu))
			kvm_vcpu_flush_tlb_guest(vcpu);
	}


int kvm_hv_vcpu_flush_tlb(struct kvm_vcpu *vcpu)
{
	struct kvm_vcpu_hv_tlb_flush_fifo *tlb_flush_fifo;
	struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu);

	if (!hv_vcpu)
		return -EINVAL;

	tlb_flush_fifo = &hv_vcpu->tlb_flush_fifo;

	kfifo_reset_out(&tlb_flush_fifo->entries);

	/* Precise flushing isn't implemented yet. */
	return -EOPNOTSUPP;
}
Vitaly Kuznetsov Sept. 22, 2022, 9:42 a.m. UTC | #2
Sean Christopherson <seanjc@google.com> writes:

> On Wed, Sep 21, 2022, Vitaly Kuznetsov wrote:
>> To allow flushing individual GVAs instead of always flushing the whole
>> VPID a per-vCPU structure to pass the requests is needed. Use standard
>> 'kfifo' to queue two types of entries: individual GVA (GFN + up to 4095
>> following GFNs in the lower 12 bits) and 'flush all'.
>> 
>> The size of the fifo is arbitrary set to '16'.
>
> s/arbitrary/arbitrarily
>
>> +static void hv_tlb_flush_enqueue(struct kvm_vcpu *vcpu)
>> +{
>> +	struct kvm_vcpu_hv_tlb_flush_fifo *tlb_flush_fifo;
>> +	struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu);
>> +	u64 flush_all_entry = KVM_HV_TLB_FLUSHALL_ENTRY;
>> +
>> +	if (!hv_vcpu)
>> +		return;
>> +
>> +	tlb_flush_fifo = &hv_vcpu->tlb_flush_fifo;
>> +
>> +	kfifo_in_spinlocked(&tlb_flush_fifo->entries, &flush_all_entry,
>> +			    1, &tlb_flush_fifo->write_lock);
>
> Unless I'm missing something, there's no need to disable IRQs, i.e. this can be
> kfifo_in_spinlocked_noirqsave() and the later patch can use spin_lock() instead
> of spin_lock_irqsave().  The only calls to hv_tlb_flush_enqueue() are from
> kvm_hv_hypercall(), i.e. it's always called from process context.
>   

Yes, no IRQ/... contexts are expected, the intention was to take the
spinlock for the shortest amount of time, not to protect against a
deadlock. This probably is not worthy and causes a confusion so I'll
remove it.

>> diff --git a/arch/x86/kvm/hyperv.h b/arch/x86/kvm/hyperv.h
>> index 1030b1b50552..ac30091ab346 100644
>> --- a/arch/x86/kvm/hyperv.h
>> +++ b/arch/x86/kvm/hyperv.h
>> @@ -151,4 +151,20 @@ int kvm_vm_ioctl_hv_eventfd(struct kvm *kvm, struct kvm_hyperv_eventfd *args);
>>  int kvm_get_hv_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid2 *cpuid,
>>  		     struct kvm_cpuid_entry2 __user *entries);
>>  
>> +
>
> Unnecessary newline.
>
>> +static inline void kvm_hv_vcpu_empty_flush_tlb(struct kvm_vcpu *vcpu)
>
> What about "reset" or "purge" instead of "empty"?  "empty" is often used as query,
> e.g. list_empty(), it took me a second to realize this is a command.
>

'purge' sounds good to me!

>> +{
>> +	struct kvm_vcpu_hv_tlb_flush_fifo *tlb_flush_fifo;
>> +	struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu);
>> +
>> +	if (!hv_vcpu || !kvm_check_request(KVM_REQ_HV_TLB_FLUSH, vcpu))
>> +		return;
>> +
>> +	tlb_flush_fifo = &hv_vcpu->tlb_flush_fifo;
>> +
>> +	kfifo_reset_out(&tlb_flush_fifo->entries);
>> +}
>
> Missing newline.
>
>> +void kvm_hv_vcpu_flush_tlb(struct kvm_vcpu *vcpu);
>> +
>> +
>
> One too many newlines.
>
>>  #endif
>> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
>> index 86504a8bfd9a..45c35c5467f8 100644
>> --- a/arch/x86/kvm/x86.c
>> +++ b/arch/x86/kvm/x86.c
>> @@ -3385,7 +3385,7 @@ static void kvm_vcpu_flush_tlb_all(struct kvm_vcpu *vcpu)
>>  	static_call(kvm_x86_flush_tlb_all)(vcpu);
>>  }
>>  
>> -static void kvm_vcpu_flush_tlb_guest(struct kvm_vcpu *vcpu)
>> +void kvm_vcpu_flush_tlb_guest(struct kvm_vcpu *vcpu)
>>  {
>>  	++vcpu->stat.tlb_flush;
>>  
>> @@ -3420,14 +3420,14 @@ void kvm_service_local_tlb_flush_requests(struct kvm_vcpu *vcpu)
>>  {
>>  	if (kvm_check_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu)) {
>>  		kvm_vcpu_flush_tlb_current(vcpu);
>> -		kvm_clear_request(KVM_REQ_HV_TLB_FLUSH, vcpu);
>> +		kvm_hv_vcpu_empty_flush_tlb(vcpu);
>
> It might be worth adding a comment to call out that emptying the FIFO _after_ the
> TLB flush is ok, because it's impossible for the CPU to insert TLB entries for the
> guest while running in the host.  At first glance, it looks like this (and the
> existing similar pattern in vcpu_enter_guest()) has a race where it could miss a
> TLB flush.
>
> Definitely not required, e.g. kvm_vcpu_flush_tlb_all() doesn't have a similar
> comment.  I think it's just the existence of the FIFO that made me pause.
>

Np, will add something for future generation of readers)

>>  	}
>>  
>>  	if (kvm_check_request(KVM_REQ_TLB_FLUSH_GUEST, vcpu)) {
>>  		kvm_vcpu_flush_tlb_guest(vcpu);
>> -		kvm_clear_request(KVM_REQ_HV_TLB_FLUSH, vcpu);
>> +		kvm_hv_vcpu_empty_flush_tlb(vcpu);
>>  	} else if (kvm_check_request(KVM_REQ_HV_TLB_FLUSH, vcpu)) {
>> -		kvm_vcpu_flush_tlb_guest(vcpu);
>> +		kvm_hv_vcpu_flush_tlb(vcpu);
>
> Rather than expose kvm_vcpu_flush_tlb_guest() to Hyper-V, what about implementing
> this in a similar way to how way KVM-on-HyperV implements remote TLB flushes?  I.e.
> fall back to kvm_vcpu_flush_tlb_guest() if the precise flush "fails".
>
> I don't mind exposing kvm_vcpu_flush_tlb_guest(), but burying the calls inside
> Hyper-V code makes it difficult to see the relationship between KVM_REQ_HV_TLB_FLUSH
> and KVM_REQ_TLB_FLUSH_GUEST.
>
> And as a minor bonus, that also helps document that kvm_hv_vcpu_flush_tlb() doesn't
> yet support precise flushing.
>
> E.g.
>
> 	if (kvm_check_request(KVM_REQ_TLB_FLUSH_GUEST, vcpu)) {
> 		kvm_vcpu_flush_tlb_guest(vcpu);
> 	} else if (kvm_check_request(KVM_REQ_HV_TLB_FLUSH, vcpu)) {
> 		/*
> 		 * Fall back to a "full" guest flush if Hyper-V's precise
> 		 * flushing fails.
> 		 */
> 		if (kvm_hv_vcpu_flush_tlb(vcpu))
> 			kvm_vcpu_flush_tlb_guest(vcpu);
> 	}
>
>
> int kvm_hv_vcpu_flush_tlb(struct kvm_vcpu *vcpu)
> {
> 	struct kvm_vcpu_hv_tlb_flush_fifo *tlb_flush_fifo;
> 	struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu);
>
> 	if (!hv_vcpu)
> 		return -EINVAL;
>
> 	tlb_flush_fifo = &hv_vcpu->tlb_flush_fifo;
>
> 	kfifo_reset_out(&tlb_flush_fifo->entries);
>
> 	/* Precise flushing isn't implemented yet. */
> 	return -EOPNOTSUPP;
> }
>

Oh, I see, certainly can be done this way, even if just to improve the
readability. Will change.
diff mbox series

Patch

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 45c390c804f0..c97161436a9d 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -25,6 +25,7 @@ 
 #include <linux/clocksource.h>
 #include <linux/irqbypass.h>
 #include <linux/hyperv.h>
+#include <linux/kfifo.h>
 
 #include <asm/apic.h>
 #include <asm/pvclock-abi.h>
@@ -599,6 +600,23 @@  struct kvm_vcpu_hv_synic {
 	bool dont_zero_synic_pages;
 };
 
+/* The maximum number of entries on the TLB flush fifo. */
+#define KVM_HV_TLB_FLUSH_FIFO_SIZE (16)
+/*
+ * Note: the following 'magic' entry is made up by KVM to avoid putting
+ * anything besides GVA on the TLB flush fifo. It is theoretically possible
+ * to observe a request to flush 4095 PFNs starting from 0xfffffffffffff000
+ * which will look identical. KVM's action to 'flush everything' instead of
+ * flushing these particular addresses is, however, fully legitimate as
+ * flushing more than requested is always OK.
+ */
+#define KVM_HV_TLB_FLUSHALL_ENTRY  ((u64)-1)
+
+struct kvm_vcpu_hv_tlb_flush_fifo {
+	spinlock_t write_lock;
+	DECLARE_KFIFO(entries, u64, KVM_HV_TLB_FLUSH_FIFO_SIZE);
+};
+
 /* Hyper-V per vcpu emulation context */
 struct kvm_vcpu_hv {
 	struct kvm_vcpu *vcpu;
@@ -620,6 +638,8 @@  struct kvm_vcpu_hv {
 		u32 nested_eax; /* HYPERV_CPUID_NESTED_FEATURES.EAX */
 		u32 nested_ebx; /* HYPERV_CPUID_NESTED_FEATURES.EBX */
 	} cpuid_cache;
+
+	struct kvm_vcpu_hv_tlb_flush_fifo tlb_flush_fifo;
 };
 
 /* Xen HVM per vcpu emulation context */
diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index 3c0f639f6a05..b127b6bb84dd 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -29,6 +29,7 @@ 
 #include <linux/kvm_host.h>
 #include <linux/highmem.h>
 #include <linux/sched/cputime.h>
+#include <linux/spinlock.h>
 #include <linux/eventfd.h>
 
 #include <asm/apicdef.h>
@@ -954,6 +955,9 @@  int kvm_hv_vcpu_init(struct kvm_vcpu *vcpu)
 
 	hv_vcpu->vp_index = vcpu->vcpu_idx;
 
+	INIT_KFIFO(hv_vcpu->tlb_flush_fifo.entries);
+	spin_lock_init(&hv_vcpu->tlb_flush_fifo.write_lock);
+
 	return 0;
 }
 
@@ -1783,6 +1787,36 @@  static u64 kvm_get_sparse_vp_set(struct kvm *kvm, struct kvm_hv_hcall *hc,
 			      var_cnt * sizeof(*sparse_banks));
 }
 
+static void hv_tlb_flush_enqueue(struct kvm_vcpu *vcpu)
+{
+	struct kvm_vcpu_hv_tlb_flush_fifo *tlb_flush_fifo;
+	struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu);
+	u64 flush_all_entry = KVM_HV_TLB_FLUSHALL_ENTRY;
+
+	if (!hv_vcpu)
+		return;
+
+	tlb_flush_fifo = &hv_vcpu->tlb_flush_fifo;
+
+	kfifo_in_spinlocked(&tlb_flush_fifo->entries, &flush_all_entry,
+			    1, &tlb_flush_fifo->write_lock);
+}
+
+void kvm_hv_vcpu_flush_tlb(struct kvm_vcpu *vcpu)
+{
+	struct kvm_vcpu_hv_tlb_flush_fifo *tlb_flush_fifo;
+	struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu);
+
+	kvm_vcpu_flush_tlb_guest(vcpu);
+
+	if (!hv_vcpu)
+		return;
+
+	tlb_flush_fifo = &hv_vcpu->tlb_flush_fifo;
+
+	kfifo_reset_out(&tlb_flush_fifo->entries);
+}
+
 static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc)
 {
 	struct kvm *kvm = vcpu->kvm;
@@ -1791,6 +1825,8 @@  static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc)
 	DECLARE_BITMAP(vcpu_mask, KVM_MAX_VCPUS);
 	u64 valid_bank_mask;
 	u64 sparse_banks[KVM_HV_MAX_SPARSE_VCPU_SET_BITS];
+	struct kvm_vcpu *v;
+	unsigned long i;
 	bool all_cpus;
 
 	/*
@@ -1870,10 +1906,20 @@  static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc)
 	 * analyze it here, flush TLB regardless of the specified address space.
 	 */
 	if (all_cpus) {
+		kvm_for_each_vcpu(i, v, kvm)
+			hv_tlb_flush_enqueue(v);
+
 		kvm_make_all_cpus_request(kvm, KVM_REQ_HV_TLB_FLUSH);
 	} else {
 		sparse_set_to_vcpu_mask(kvm, sparse_banks, valid_bank_mask, vcpu_mask);
 
+		for_each_set_bit(i, vcpu_mask, KVM_MAX_VCPUS) {
+			v = kvm_get_vcpu(kvm, i);
+			if (!v)
+				continue;
+			hv_tlb_flush_enqueue(v);
+		}
+
 		kvm_make_vcpus_request_mask(kvm, KVM_REQ_HV_TLB_FLUSH, vcpu_mask);
 	}
 
diff --git a/arch/x86/kvm/hyperv.h b/arch/x86/kvm/hyperv.h
index 1030b1b50552..ac30091ab346 100644
--- a/arch/x86/kvm/hyperv.h
+++ b/arch/x86/kvm/hyperv.h
@@ -151,4 +151,20 @@  int kvm_vm_ioctl_hv_eventfd(struct kvm *kvm, struct kvm_hyperv_eventfd *args);
 int kvm_get_hv_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid2 *cpuid,
 		     struct kvm_cpuid_entry2 __user *entries);
 
+
+static inline void kvm_hv_vcpu_empty_flush_tlb(struct kvm_vcpu *vcpu)
+{
+	struct kvm_vcpu_hv_tlb_flush_fifo *tlb_flush_fifo;
+	struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu);
+
+	if (!hv_vcpu || !kvm_check_request(KVM_REQ_HV_TLB_FLUSH, vcpu))
+		return;
+
+	tlb_flush_fifo = &hv_vcpu->tlb_flush_fifo;
+
+	kfifo_reset_out(&tlb_flush_fifo->entries);
+}
+void kvm_hv_vcpu_flush_tlb(struct kvm_vcpu *vcpu);
+
+
 #endif
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 86504a8bfd9a..45c35c5467f8 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3385,7 +3385,7 @@  static void kvm_vcpu_flush_tlb_all(struct kvm_vcpu *vcpu)
 	static_call(kvm_x86_flush_tlb_all)(vcpu);
 }
 
-static void kvm_vcpu_flush_tlb_guest(struct kvm_vcpu *vcpu)
+void kvm_vcpu_flush_tlb_guest(struct kvm_vcpu *vcpu)
 {
 	++vcpu->stat.tlb_flush;
 
@@ -3420,14 +3420,14 @@  void kvm_service_local_tlb_flush_requests(struct kvm_vcpu *vcpu)
 {
 	if (kvm_check_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu)) {
 		kvm_vcpu_flush_tlb_current(vcpu);
-		kvm_clear_request(KVM_REQ_HV_TLB_FLUSH, vcpu);
+		kvm_hv_vcpu_empty_flush_tlb(vcpu);
 	}
 
 	if (kvm_check_request(KVM_REQ_TLB_FLUSH_GUEST, vcpu)) {
 		kvm_vcpu_flush_tlb_guest(vcpu);
-		kvm_clear_request(KVM_REQ_HV_TLB_FLUSH, vcpu);
+		kvm_hv_vcpu_empty_flush_tlb(vcpu);
 	} else if (kvm_check_request(KVM_REQ_HV_TLB_FLUSH, vcpu)) {
-		kvm_vcpu_flush_tlb_guest(vcpu);
+		kvm_hv_vcpu_flush_tlb(vcpu);
 	}
 }
 EXPORT_SYMBOL_GPL(kvm_service_local_tlb_flush_requests);
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index a784ff90740b..1ea28a7bdf2f 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -79,6 +79,7 @@  static inline unsigned int __shrink_ple_window(unsigned int val,
 
 #define MSR_IA32_CR_PAT_DEFAULT  0x0007040600070406ULL
 
+void kvm_vcpu_flush_tlb_guest(struct kvm_vcpu *vcpu);
 void kvm_service_local_tlb_flush_requests(struct kvm_vcpu *vcpu);
 int kvm_check_nested_events(struct kvm_vcpu *vcpu);