diff mbox

[v3,5/8] KVM Guest: Add paravirt kvm_flush_tlb_others

Message ID 20120731104849.16662.52362.stgit@abhimanyu.in.ibm.com (mailing list archive)
State New, archived
Headers show

Commit Message

Nikunj A. Dadhania July 31, 2012, 10:48 a.m. UTC
From: Nikunj A. Dadhania <nikunj@linux.vnet.ibm.com>

flush_tlb_others_ipi depends on lot of statics in tlb.c.  Replicated
the flush_tlb_others_ipi as kvm_flush_tlb_others to further adapt to
paravirtualization.

Use the vcpu state information inside the kvm_flush_tlb_others to
avoid sending ipi to pre-empted vcpus.

* Do not send ipi's to offline vcpus and set flush_on_enter flag
* For online vcpus: Wait for them to clear the flag

The approach was discussed here: https://lkml.org/lkml/2012/2/20/157

v3:
* use only one state variable for vcpu-running/flush_on_enter
* use cmpxchg to update the state
* adapt to Alex Shi's TLB flush optimization

v2:
* use ACCESS_ONCE so the value is not register cached
* Separate HV and Guest code

Suggested-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Nikunj A. Dadhania <nikunj@linux.vnet.ibm.com>

--
Pseudo Algo:
------------
   Hypervisor
   ==========
   guest_exit()
       if (!(xchg(state, NOT_IN_GUEST) == SHOULD_FLUSH))
           tlb_flush(vcpu);

   guest_enter()
       if (!(xchg(state, IN_GUEST) == SHOULD_FLUSH))
	   tlb_flush(vcpu);

    Guest
    =====
    flushcpumask = cpumask;
    for_each_cpu(i, flushmask) {
        state = vs->state;
        if(!test_bit(IN_GUEST_MODE, state)) {
            if (cmpxchg(&vs->state, state,
                        state | (1 << SHOULD_FLUSH)) == SUCCESS)
	       cpumask_clear_cpu(flushmask,i)
        }
    }
    if(!empty(flushmask)
        smp_call_function_many(f->flushmask, flush_tlb_func)

Summary:
Author:
---
 arch/x86/include/asm/tlbflush.h |   11 +++++++++++
 arch/x86/kernel/kvm.c           |    4 +++-
 arch/x86/mm/tlb.c               |   37 +++++++++++++++++++++++++++++++++++++
 3 files changed, 51 insertions(+), 1 deletions(-)


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index 74a4433..0a343a1 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -119,6 +119,13 @@  static inline void native_flush_tlb_others(const struct cpumask *cpumask,
 {
 }
 
+static inline void kvm_flush_tlb_others(const struct cpumask *cpumask,
+					struct mm_struct *mm,
+					unsigned long start,
+					unsigned long end)
+{
+}
+
 static inline void reset_lazy_tlbstate(void)
 {
 }
@@ -153,6 +160,10 @@  void native_flush_tlb_others(const struct cpumask *cpumask,
 				struct mm_struct *mm,
 				unsigned long start, unsigned long end);
 
+void kvm_flush_tlb_others(const struct cpumask *cpumask,
+			struct mm_struct *mm, unsigned long start,
+			unsigned long end);
+
 #define TLBSTATE_OK	1
 #define TLBSTATE_LAZY	2
 
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 37e6599..b538a31 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -501,8 +501,10 @@  void __init kvm_guest_init(void)
 		apic_set_eoi_write(kvm_guest_apic_eoi_write);
 
 #ifdef CONFIG_PARAVIRT_TLB_FLUSH
-	if (kvm_para_has_feature(KVM_FEATURE_VCPU_STATE))
+	if (kvm_para_has_feature(KVM_FEATURE_VCPU_STATE)) {
 		has_vcpu_state = 1;
+		pv_mmu_ops.flush_tlb_others = kvm_flush_tlb_others;
+	}
 #endif
 
 #ifdef CONFIG_SMP
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 613cd83..2399013 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -6,6 +6,7 @@ 
 #include <linux/interrupt.h>
 #include <linux/module.h>
 #include <linux/cpu.h>
+#include <linux/kvm_para.h>
 
 #include <asm/tlbflush.h>
 #include <asm/mmu_context.h>
@@ -119,6 +120,42 @@  static void flush_tlb_func(void *info)
 
 }
 
+#ifdef CONFIG_KVM_GUEST
+
+DECLARE_PER_CPU(struct kvm_vcpu_state, vcpu_state) __aligned(64);
+
+void kvm_flush_tlb_others(const struct cpumask *cpumask,
+			struct mm_struct *mm, unsigned long start,
+			unsigned long end)
+{
+	struct flush_tlb_info info;
+	struct kvm_vcpu_state *v_state;
+	u64 state;
+	int cpu;
+	cpumask_t flushmask;
+
+	cpumask_copy(&flushmask, cpumask);
+	info.flush_mm = mm;
+	info.flush_start = start;
+	info.flush_end = end;
+	/*
+	 * We have to call flush only on online vCPUs. And
+	 * queue flush_on_enter for pre-empted vCPUs
+	 */
+	for_each_cpu(cpu, to_cpumask(&flushmask)) {
+		v_state = &per_cpu(vcpu_state, cpu);
+		state = v_state->state;
+		if (!test_bit(KVM_VCPU_STATE_IN_GUEST_MODE, &state)) {
+			if (cmpxchg(&v_state->state, state, state | 1 << KVM_VCPU_STATE_SHOULD_FLUSH))
+				cpumask_clear_cpu(cpu, to_cpumask(&flushmask));
+		}
+	}
+
+	if (!cpumask_empty(to_cpumask(&flushmask)))
+		smp_call_function_many(&flushmask, flush_tlb_func, &info, 1);
+}
+#endif /* CONFIG_KVM_GUEST */
+
 void native_flush_tlb_others(const struct cpumask *cpumask,
 				 struct mm_struct *mm, unsigned long start,
 				 unsigned long end)