From patchwork Mon Mar 16 11:07:50 2009 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Avi Kivity X-Patchwork-Id: 12389 Received: from vger.kernel.org (vger.kernel.org [209.132.176.167]) by demeter.kernel.org (8.14.2/8.14.2) with ESMTP id n2GB8BoD022355 for ; Mon, 16 Mar 2009 11:08:11 GMT Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1756752AbZCPLIK (ORCPT ); Mon, 16 Mar 2009 07:08:10 -0400 Received: (majordomo@vger.kernel.org) by vger.kernel.org id S1756141AbZCPLIJ (ORCPT ); Mon, 16 Mar 2009 07:08:09 -0400 Received: from mx2.redhat.com ([66.187.237.31]:46416 "EHLO mx2.redhat.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1755260AbZCPLII (ORCPT ); Mon, 16 Mar 2009 07:08:08 -0400 Received: from int-mx2.corp.redhat.com (int-mx2.corp.redhat.com [172.16.27.26]) by mx2.redhat.com (8.13.8/8.13.8) with ESMTP id n2GB87Rv021284 for ; Mon, 16 Mar 2009 07:08:07 -0400 Received: from ns3.rdu.redhat.com (ns3.rdu.redhat.com [10.11.255.199]) by int-mx2.corp.redhat.com (8.13.1/8.13.1) with ESMTP id n2GB86eS012348; Mon, 16 Mar 2009 07:08:07 -0400 Received: from cleopatra.tlv.redhat.com (cleopatra.tlv.redhat.com [10.35.255.11]) by ns3.rdu.redhat.com (8.13.8/8.13.8) with ESMTP id n2GB85ZC009366; Mon, 16 Mar 2009 07:08:06 -0400 Received: from localhost.localdomain (cleopatra.tlv.redhat.com [10.35.255.11]) by cleopatra.tlv.redhat.com (Postfix) with ESMTP id 2760EA0117; Mon, 16 Mar 2009 13:07:51 +0200 (IST) From: Avi Kivity To: Andrea Arcangeli , Marcelo Tosatti Cc: kvm@vger.kernel.org Subject: [PATCH] KVM: Defer remote tlb flushes on invlpg (v2) Date: Mon, 16 Mar 2009 13:07:50 +0200 Message-Id: <1237201670-5572-1-git-send-email-avi@redhat.com> X-Scanned-By: MIMEDefang 2.58 on 172.16.27.26 Sender: kvm-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: kvm@vger.kernel.org KVM flushes tlbs on remote cpus for two purposes: to protect guest pages that it needs to collect information about, and to prevent stale tlb entries from pointing to pages that no longer belong to the guest. We can defer the latter flushes to the point when we actually free the pages, which is during an mmu notifier invocation. To this end, we add a new state remote_tlbs_dirty which marks whether the guest tlb might be inconsistent with the the shadow page tables. Whenever we do a conditional flush of remote tlbs, we check this state, and if the remote tlbs are dirty we flush them to ensure no inconsistency. [v2: add help kvm_flush_remote_tlbs_cond() to remove the need for callers to care about the new logic] Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_host.h | 1 - arch/x86/kvm/mmu.c | 14 ++++++++------ arch/x86/kvm/paging_tmpl.h | 6 ++++-- include/linux/kvm_host.h | 2 ++ virt/kvm/kvm_main.c | 18 ++++++++++++++++-- 5 files changed, 30 insertions(+), 11 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 4627627..eb1ab29 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -383,7 +383,6 @@ struct kvm_mem_alias { struct kvm_arch{ int naliases; struct kvm_mem_alias aliases[KVM_ALIAS_SLOTS]; - unsigned int n_free_mmu_pages; unsigned int n_requested_mmu_pages; unsigned int n_alloc_mmu_pages; diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 2a36f7f..18bcee5 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -1093,13 +1093,15 @@ static int kvm_mmu_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp); static int kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) { + bool need_flush; + if (sp->role.glevels != vcpu->arch.mmu.root_level) { kvm_mmu_zap_page(vcpu->kvm, sp); return 1; } - if (rmap_write_protect(vcpu->kvm, sp->gfn)) - kvm_flush_remote_tlbs(vcpu->kvm); + need_flush = rmap_write_protect(vcpu->kvm, sp->gfn); + kvm_flush_remote_tlbs_cond(vcpu->kvm, need_flush); kvm_unlink_unsync_page(vcpu->kvm, sp); if (vcpu->arch.mmu.sync_page(vcpu, sp)) { kvm_mmu_zap_page(vcpu->kvm, sp); @@ -1184,8 +1186,7 @@ static void mmu_sync_children(struct kvm_vcpu *vcpu, for_each_sp(pages, sp, parents, i) protected |= rmap_write_protect(vcpu->kvm, sp->gfn); - if (protected) - kvm_flush_remote_tlbs(vcpu->kvm); + kvm_flush_remote_tlbs_cond(vcpu->kvm, protected); for_each_sp(pages, sp, parents, i) { kvm_sync_page(vcpu, sp); @@ -1210,6 +1211,7 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, struct hlist_head *bucket; struct kvm_mmu_page *sp; struct hlist_node *node, *tmp; + bool need_flush; role = vcpu->arch.mmu.base_role; role.level = level; @@ -1251,8 +1253,8 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, sp->global = role.cr4_pge; hlist_add_head(&sp->hash_link, bucket); if (!direct) { - if (rmap_write_protect(vcpu->kvm, gfn)) - kvm_flush_remote_tlbs(vcpu->kvm); + need_flush = rmap_write_protect(vcpu->kvm, gfn); + kvm_flush_remote_tlbs_cond(vcpu->kvm, need_flush); account_shadowed(vcpu->kvm, gfn); } if (shadow_trap_nonpresent_pte != shadow_notrap_nonpresent_pte) diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 855eb71..18abdf9 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -475,8 +475,10 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva) break; } - if (need_flush) - kvm_flush_remote_tlbs(vcpu->kvm); + if (need_flush) { + vcpu->kvm->remote_tlbs_dirty = true; + kvm_x86_ops->tlb_flush(vcpu); + } spin_unlock(&vcpu->kvm->mmu_lock); if (pte_gpa == -1) diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 11eb702..b779c57 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -125,6 +125,7 @@ struct kvm_kernel_irq_routing_entry { struct kvm { struct mutex lock; /* protects the vcpus array and APIC accesses */ spinlock_t mmu_lock; + bool remote_tlbs_dirty; struct rw_semaphore slots_lock; struct mm_struct *mm; /* userspace tied to this vm */ int nmemslots; @@ -235,6 +236,7 @@ void kvm_resched(struct kvm_vcpu *vcpu); void kvm_load_guest_fpu(struct kvm_vcpu *vcpu); void kvm_put_guest_fpu(struct kvm_vcpu *vcpu); void kvm_flush_remote_tlbs(struct kvm *kvm); +void kvm_flush_remote_tlbs_cond(struct kvm *kvm, bool cond); void kvm_reload_remote_mmus(struct kvm *kvm); long kvm_arch_dev_ioctl(struct file *filp, diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 68b217e..2ee6a6d 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -758,10 +758,22 @@ static bool make_all_cpus_request(struct kvm *kvm, unsigned int req) void kvm_flush_remote_tlbs(struct kvm *kvm) { + kvm->remote_tlbs_dirty = false; + wmb(); if (make_all_cpus_request(kvm, KVM_REQ_TLB_FLUSH)) ++kvm->stat.remote_tlb_flush; } +void kvm_flush_remote_tlbs_cond(struct kvm *kvm, bool cond) +{ + if (!cond) { + rmb(); + cond = kvm->remote_tlbs_dirty; + } + if (cond) + kvm_flush_remote_tlbs(kvm); +} + void kvm_reload_remote_mmus(struct kvm *kvm) { make_all_cpus_request(kvm, KVM_REQ_MMU_RELOAD); @@ -840,8 +852,9 @@ static void kvm_mmu_notifier_invalidate_page(struct mmu_notifier *mn, need_tlb_flush = kvm_unmap_hva(kvm, address); spin_unlock(&kvm->mmu_lock); + rmb(); /* we've to flush the tlb before the pages can be freed */ - if (need_tlb_flush) + if (need_tlb_flush || kvm->remote_tlbs_dirty) kvm_flush_remote_tlbs(kvm); } @@ -865,8 +878,9 @@ static void kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn, need_tlb_flush |= kvm_unmap_hva(kvm, start); spin_unlock(&kvm->mmu_lock); + rmb(); /* we've to flush the tlb before the pages can be freed */ - if (need_tlb_flush) + if (need_tlb_flush || kvm->remote_tlbs_dirty) kvm_flush_remote_tlbs(kvm); }