From patchwork Tue Jul 6 16:24:57 2010 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Gleb Natapov X-Patchwork-Id: 110448 Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by demeter.kernel.org (8.14.4/8.14.3) with ESMTP id o66GUNCK018060 for ; Tue, 6 Jul 2010 16:32:42 GMT Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1755576Ab0GFQZk (ORCPT ); Tue, 6 Jul 2010 12:25:40 -0400 Received: from mx1.redhat.com ([209.132.183.28]:6493 "EHLO mx1.redhat.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1753550Ab0GFQZi (ORCPT ); Tue, 6 Jul 2010 12:25:38 -0400 Received: from int-mx03.intmail.prod.int.phx2.redhat.com (int-mx03.intmail.prod.int.phx2.redhat.com [10.5.11.16]) by mx1.redhat.com (8.13.8/8.13.8) with ESMTP id o66GP4aD016051 (version=TLSv1/SSLv3 cipher=DHE-RSA-AES256-SHA bits=256 verify=OK); Tue, 6 Jul 2010 12:25:04 -0400 Received: from dhcp-1-237.tlv.redhat.com (dhcp-1-237.tlv.redhat.com [10.35.1.237]) by int-mx03.intmail.prod.int.phx2.redhat.com (8.13.8/8.13.8) with ESMTP id o66GP3CR016572; Tue, 6 Jul 2010 12:25:03 -0400 Received: by dhcp-1-237.tlv.redhat.com (Postfix, from userid 13519) id CE26C134414; Tue, 6 Jul 2010 19:25:00 +0300 (IDT) From: Gleb Natapov To: kvm@vger.kernel.org Cc: linux-mm@kvack.org, linux-kernel@vger.kernel.org, avi@redhat.com, mingo@elte.hu, a.p.zijlstra@chello.nl, tglx@linutronix.de, hpa@zytor.com, riel@redhat.com, cl@linux-foundation.org, mtosatti@redhat.com Subject: [PATCH v4 09/12] Retry fault before vmentry Date: Tue, 6 Jul 2010 19:24:57 +0300 Message-Id: <1278433500-29884-10-git-send-email-gleb@redhat.com> In-Reply-To: <1278433500-29884-1-git-send-email-gleb@redhat.com> References: <1278433500-29884-1-git-send-email-gleb@redhat.com> X-Scanned-By: MIMEDefang 2.67 on 10.5.11.16 Sender: kvm-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: kvm@vger.kernel.org X-Greylist: IP, sender and recipient auto-whitelisted, not delayed by milter-greylist-4.2.3 (demeter.kernel.org [140.211.167.41]); Tue, 06 Jul 2010 16:32:42 +0000 (UTC) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index db514ea..45e6c12 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -236,7 +236,8 @@ struct kvm_pio_request { */ struct kvm_mmu { void (*new_cr3)(struct kvm_vcpu *vcpu); - int (*page_fault)(struct kvm_vcpu *vcpu, gva_t gva, u32 err); + int (*page_fault)(struct kvm_vcpu *vcpu, gva_t gva, u32 err, bool sync); + int (*page_fault_other_cr3)(struct kvm_vcpu *vcpu, gpa_t cr3, gva_t gva, u32 err); void (*free)(struct kvm_vcpu *vcpu); gpa_t (*gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t gva, u32 access, u32 *error); @@ -534,6 +535,8 @@ struct kvm_x86_ops { struct kvm_arch_async_pf { u32 token; + gpa_t cr3; + u32 error_code; }; extern struct kvm_x86_ops *kvm_x86_ops; @@ -777,6 +780,8 @@ void kvm_arch_inject_async_page_not_present(struct kvm_vcpu *vcpu, struct kvm_async_pf *work); void kvm_arch_inject_async_page_present(struct kvm_vcpu *vcpu, struct kvm_async_pf *work); +void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, + struct kvm_async_pf *work); bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu); #endif /* _ASM_X86_KVM_HOST_H */ diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index a49565b..95a0a8b 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -2246,7 +2246,7 @@ static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, gva_t vaddr, } static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gva_t gva, - u32 error_code) + u32 error_code, bool sync) { gfn_t gfn; int r; @@ -2265,10 +2265,13 @@ static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gva_t gva, error_code & PFERR_WRITE_MASK, gfn); } -int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn) +int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gpa_t cr3, gva_t gva, + gfn_t gfn, u32 error_code) { struct kvm_arch_async_pf arch; arch.token = (vcpu->arch.async_pf_id++ << 12) | vcpu->vcpu_id; + arch.cr3 = cr3; + arch.error_code = error_code; return kvm_setup_async_pf(vcpu, gva, gfn, &arch); } @@ -2280,8 +2283,8 @@ static bool can_do_async_pf(struct kvm_vcpu *vcpu) return !!kvm_x86_ops->get_cpl(vcpu); } -static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, - u32 error_code) +static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code, + bool sync) { pfn_t pfn; int r; @@ -2304,7 +2307,7 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, mmu_seq = vcpu->kvm->mmu_notifier_seq; smp_rmb(); - if (can_do_async_pf(vcpu)) { + if (!sync && can_do_async_pf(vcpu)) { pfn = gfn_to_pfn_async(vcpu->kvm, gfn, &async); trace_kvm_try_async_get_page(async, pfn); } else { @@ -2314,7 +2317,8 @@ do_sync: } if (async) { - if (!kvm_arch_setup_async_pf(vcpu, gpa, gfn)) + if (!kvm_arch_setup_async_pf(vcpu, vcpu->arch.cr3, gpa, gfn, + error_code)) goto do_sync; return 0; } @@ -2338,6 +2342,12 @@ out_unlock: return 0; } +static int tdp_page_fault_sync(struct kvm_vcpu *vcpu, gpa_t cr3, gva_t gpa, + u32 error_code) +{ + return tdp_page_fault(vcpu, gpa, error_code, true); +} + static void nonpaging_free(struct kvm_vcpu *vcpu) { mmu_free_roots(vcpu); @@ -2468,6 +2478,7 @@ static int paging64_init_context_common(struct kvm_vcpu *vcpu, int level) ASSERT(is_pae(vcpu)); context->new_cr3 = paging_new_cr3; context->page_fault = paging64_page_fault; + context->page_fault_other_cr3 = paging64_page_fault_other_cr3; context->gva_to_gpa = paging64_gva_to_gpa; context->prefetch_page = paging64_prefetch_page; context->sync_page = paging64_sync_page; @@ -2492,6 +2503,7 @@ static int paging32_init_context(struct kvm_vcpu *vcpu) reset_rsvds_bits_mask(vcpu, PT32_ROOT_LEVEL); context->new_cr3 = paging_new_cr3; context->page_fault = paging32_page_fault; + context->page_fault_other_cr3 = paging32_page_fault_other_cr3; context->gva_to_gpa = paging32_gva_to_gpa; context->free = paging_free; context->prefetch_page = paging32_prefetch_page; @@ -2515,6 +2527,7 @@ static int init_kvm_tdp_mmu(struct kvm_vcpu *vcpu) context->new_cr3 = nonpaging_new_cr3; context->page_fault = tdp_page_fault; + context->page_fault_other_cr3 = tdp_page_fault_sync; context->free = nonpaging_free; context->prefetch_page = nonpaging_prefetch_page; context->sync_page = nonpaging_sync_page; @@ -2902,7 +2915,7 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u32 error_code) int r; enum emulation_result er; - r = vcpu->arch.mmu.page_fault(vcpu, cr2, error_code); + r = vcpu->arch.mmu.page_fault(vcpu, cr2, error_code, false); if (r < 0) goto out; diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 26d6b74..cbc9729 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -410,8 +410,8 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, * Returns: 1 if we need to emulate the instruction, 0 otherwise, or * a negative value on error. */ -static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, - u32 error_code) +static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code, + bool sync) { int write_fault = error_code & PFERR_WRITE_MASK; int user_fault = error_code & PFERR_USER_MASK; @@ -456,7 +456,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, mmu_seq = vcpu->kvm->mmu_notifier_seq; smp_rmb(); - if (can_do_async_pf(vcpu)) { + if (!sync && can_do_async_pf(vcpu)) { pfn = gfn_to_pfn_async(vcpu->kvm, walker.gfn, &async); trace_kvm_try_async_get_page(async, pfn); } else { @@ -466,7 +466,8 @@ do_sync: } if (async) { - if (!kvm_arch_setup_async_pf(vcpu, addr, walker.gfn)) + if (!kvm_arch_setup_async_pf(vcpu, vcpu->arch.cr3, addr, + walker.gfn, error_code)) goto do_sync; return 0; } @@ -500,6 +501,34 @@ out_unlock: return 0; } +static int FNAME(page_fault_other_cr3)(struct kvm_vcpu *vcpu, gpa_t cr3, + gva_t addr, u32 error_code) +{ + int r = 0; + gpa_t curr_cr3 = vcpu->arch.cr3; + + if (curr_cr3 != cr3) { + /* + * We do page fault on behalf of a process that is sleeping + * because of async PF. PV guest takes reference to mm that cr3 + * belongs too, so it has to be valid here. + */ + kvm_set_cr3(vcpu, cr3); + if (kvm_mmu_reload(vcpu)) + goto switch_cr3; + } + + r = FNAME(page_fault)(vcpu, addr, error_code, true); + +switch_cr3: + if (curr_cr3 != vcpu->arch.cr3) { + kvm_set_cr3(vcpu, curr_cr3); + kvm_mmu_reload(vcpu); + } + + return r; +} + static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva) { struct kvm_shadow_walk_iterator iterator; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 6b7542f..ae7164e 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -5716,6 +5716,15 @@ void kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) } EXPORT_SYMBOL_GPL(kvm_set_rflags); +void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, + struct kvm_async_pf *work) +{ + if (!vcpu->arch.mmu.page_fault_other_cr3 || is_error_page(work->page)) + return; + vcpu->arch.mmu.page_fault_other_cr3(vcpu, work->arch.cr3, work->gva, + work->arch.error_code); +} + static int apf_put_user(struct kvm_vcpu *vcpu, u32 val) { if (unlikely(vcpu->arch.apf_memslot_ver != diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 0656054..409b9b9 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1339,6 +1339,7 @@ void kvm_check_async_pf_completion(struct kvm_vcpu *vcpu) spin_lock(&vcpu->async_pf_lock); list_del(&work->link); spin_unlock(&vcpu->async_pf_lock); + kvm_arch_async_page_ready(vcpu, work); put_page(work->page); async_pf_work_free(work); list_del(&work->queue); @@ -1357,6 +1358,7 @@ void kvm_check_async_pf_completion(struct kvm_vcpu *vcpu) list_del(&work->queue); vcpu->async_pf_queued--; + kvm_arch_async_page_ready(vcpu, work); kvm_arch_inject_async_page_present(vcpu, work); put_page(work->page);