From patchwork Fri Jan 4 13:56:59 2013 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Xiao Guangrong X-Patchwork-Id: 1932981 Return-Path: X-Original-To: patchwork-kvm@patchwork.kernel.org Delivered-To: patchwork-process-083081@patchwork1.kernel.org Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by patchwork1.kernel.org (Postfix) with ESMTP id A7FA23FDDA for ; Fri, 4 Jan 2013 13:57:13 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1754806Ab3ADN5L (ORCPT ); Fri, 4 Jan 2013 08:57:11 -0500 Received: from e28smtp07.in.ibm.com ([122.248.162.7]:36557 "EHLO e28smtp07.in.ibm.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1754724Ab3ADN5J (ORCPT ); Fri, 4 Jan 2013 08:57:09 -0500 Received: from /spool/local by e28smtp07.in.ibm.com with IBM ESMTP SMTP Gateway: Authorized Use Only! Violators will be prosecuted for from ; Fri, 4 Jan 2013 19:25:54 +0530 Received: from d28dlp03.in.ibm.com (9.184.220.128) by e28smtp07.in.ibm.com (192.168.1.137) with IBM ESMTP SMTP Gateway: Authorized Use Only! Violators will be prosecuted; Fri, 4 Jan 2013 19:25:51 +0530 Received: from d28relay05.in.ibm.com (d28relay05.in.ibm.com [9.184.220.62]) by d28dlp03.in.ibm.com (Postfix) with ESMTP id DBE551258052; Fri, 4 Jan 2013 19:27:11 +0530 (IST) Received: from d28av04.in.ibm.com (d28av04.in.ibm.com [9.184.220.66]) by d28relay05.in.ibm.com (8.13.8/8.13.8/NCO v10.0) with ESMTP id r04Dv0LR46137390; Fri, 4 Jan 2013 19:27:01 +0530 Received: from d28av04.in.ibm.com (loopback [127.0.0.1]) by d28av04.in.ibm.com (8.14.4/8.13.1/NCO v10.0 AVout) with ESMTP id r04Dv1QK008955; Sat, 5 Jan 2013 00:57:02 +1100 Received: from localhost.localdomain ([9.123.236.187]) by d28av04.in.ibm.com (8.14.4/8.13.1/NCO v10.0 AVin) with ESMTP id r04Dv0w5008886; Sat, 5 Jan 2013 00:57:00 +1100 Message-ID: <50E6DFAB.6050008@linux.vnet.ibm.com> Date: Fri, 04 Jan 2013 21:56:59 +0800 From: Xiao Guangrong User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:15.0) Gecko/20120911 Thunderbird/15.0.1 MIME-Version: 1.0 To: Xiao Guangrong CC: Marcelo Tosatti , Gleb Natapov , LKML , KVM Subject: [PATCH v4 5/5] KVM: x86: improve reexecute_instruction References: <50E6DEDC.7040800@linux.vnet.ibm.com> In-Reply-To: <50E6DEDC.7040800@linux.vnet.ibm.com> X-Content-Scanned: Fidelis XPS MAILER x-cbid: 13010413-8878-0000-0000-0000056555CE Sender: kvm-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: kvm@vger.kernel.org The current reexecute_instruction can not well detect the failed instruction emulation. It allows guest to retry all the instructions except it accesses on error pfn For example, some cases are nested-write-protect - if the page we want to write is used as PDE but it chains to itself. Under this case, we should stop the emulation and report the case to userspace Signed-off-by: Xiao Guangrong --- arch/x86/include/asm/kvm_host.h | 7 +++++ arch/x86/kvm/paging_tmpl.h | 24 +++++++++++++----- arch/x86/kvm/x86.c | 50 ++++++++++++++++++++++++-------------- 3 files changed, 55 insertions(+), 26 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index c431b33..de229e6 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -502,6 +502,13 @@ struct kvm_vcpu_arch { u64 msr_val; struct gfn_to_hva_cache data; } pv_eoi; + + /* + * Indicate whether the gfn is used as page table in guest which + * is set when fix page fault and used to detect unhandeable + * instruction. + */ + bool target_gfn_is_pt; }; struct kvm_lpage_info { diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 0453fa0..ca1be75 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -507,20 +507,27 @@ out_gpte_changed: */ static bool FNAME(is_self_change_mapping)(struct kvm_vcpu *vcpu, - struct guest_walker *walker, int user_fault) + struct guest_walker *walker, int user_fault, + bool *target_gfn_is_pt) { int level; gfn_t mask = ~(KVM_PAGES_PER_HPAGE(walker->level) - 1); + bool self_changed = false; + + *target_gfn_is_pt = false; if (!(walker->pte_access & ACC_WRITE_MASK || (!is_write_protection(vcpu) && !user_fault))) return false; - for (level = walker->level; level <= walker->max_level; level++) - if (!((walker->gfn ^ walker->table_gfn[level - 1]) & mask)) - return true; + for (level = walker->level; level <= walker->max_level; level++) { + gfn_t gfn = walker->gfn ^ walker->table_gfn[level - 1]; + + self_changed |= !(gfn & mask); + *target_gfn_is_pt |= !gfn; + } - return false; + return self_changed; } /* @@ -548,7 +555,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code, int level = PT_PAGE_TABLE_LEVEL; int force_pt_level; unsigned long mmu_seq; - bool map_writable; + bool map_writable, is_self_change_mapping; pgprintk("%s: addr %lx err %x\n", __func__, addr, error_code); @@ -576,9 +583,12 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code, return 0; } + is_self_change_mapping = FNAME(is_self_change_mapping)(vcpu, + &walker, user_fault, &vcpu->arch.target_gfn_is_pt); + if (walker.level >= PT_DIRECTORY_LEVEL) force_pt_level = mapping_level_dirty_bitmap(vcpu, walker.gfn) - || FNAME(is_self_change_mapping)(vcpu, &walker, user_fault); + || is_self_change_mapping; else force_pt_level = 1; if (!force_pt_level) { diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index b0a3678..44c6992 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4756,15 +4756,8 @@ static int handle_emulation_failure(struct kvm_vcpu *vcpu) static bool reexecute_instruction(struct kvm_vcpu *vcpu, unsigned long cr2) { gpa_t gpa = cr2; + gfn_t gfn; pfn_t pfn; - unsigned int indirect_shadow_pages; - - spin_lock(&vcpu->kvm->mmu_lock); - indirect_shadow_pages = vcpu->kvm->arch.indirect_shadow_pages; - spin_unlock(&vcpu->kvm->mmu_lock); - - if (!indirect_shadow_pages) - return false; if (!vcpu->arch.mmu.direct_map) { /* @@ -4781,13 +4774,7 @@ static bool reexecute_instruction(struct kvm_vcpu *vcpu, unsigned long cr2) return true; } - /* - * if emulation was due to access to shadowed page table - * and it failed try to unshadow page and re-enter the - * guest to let CPU execute the instruction. - */ - if (kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(gpa))) - return true; + gfn = gpa_to_gfn(gpa); /* * Do not retry the unhandleable instruction if it faults on the @@ -4795,13 +4782,38 @@ static bool reexecute_instruction(struct kvm_vcpu *vcpu, unsigned long cr2) * retry instruction -> write #PF -> emulation fail -> retry * instruction -> ... */ - pfn = gfn_to_pfn(vcpu->kvm, gpa_to_gfn(gpa)); - if (!is_error_noslot_pfn(pfn)) { - kvm_release_pfn_clean(pfn); + pfn = gfn_to_pfn(vcpu->kvm, gfn); + + /* + * If the instruction failed on the error pfn, it can not be fixed, + * report the error to userspace. + */ + if (is_error_noslot_pfn(pfn)) + return false; + + kvm_release_pfn_clean(pfn); + + /* The instructions are well-emulated on direct mmu. */ + if (vcpu->arch.mmu.direct_map) { + unsigned int indirect_shadow_pages; + + spin_lock(&vcpu->kvm->mmu_lock); + indirect_shadow_pages = vcpu->kvm->arch.indirect_shadow_pages; + spin_unlock(&vcpu->kvm->mmu_lock); + + if (indirect_shadow_pages) + kvm_mmu_unprotect_page(vcpu->kvm, gfn); + return true; } - return false; + kvm_mmu_unprotect_page(vcpu->kvm, gfn); + + /* If the target gfn is used as page table, the fault can + * not be avoided by unprotecting shadow page and it will + * be reported to userspace. + */ + return !vcpu->arch.target_gfn_is_pt; } static bool retry_instruction(struct x86_emulate_ctxt *ctxt,