From patchwork Fri Mar 27 04:19:09 2009 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: "Dong, Eddie" X-Patchwork-Id: 14664 Received: from vger.kernel.org (vger.kernel.org [209.132.176.167]) by demeter.kernel.org (8.14.2/8.14.2) with ESMTP id n2R4KwM3027704 for ; Fri, 27 Mar 2009 04:20:58 GMT Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1751033AbZC0EU4 (ORCPT ); Fri, 27 Mar 2009 00:20:56 -0400 Received: (majordomo@vger.kernel.org) by vger.kernel.org id S1751081AbZC0EU4 (ORCPT ); Fri, 27 Mar 2009 00:20:56 -0400 Received: from mga14.intel.com ([143.182.124.37]:62081 "EHLO mga14.intel.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1750961AbZC0EUz convert rfc822-to-8bit (ORCPT ); Fri, 27 Mar 2009 00:20:55 -0400 Received: from azsmga001.ch.intel.com ([10.2.17.19]) by azsmga102.ch.intel.com with ESMTP; 26 Mar 2009 21:20:53 -0700 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="4.38,430,1233561600"; d="scan'208";a="124760690" Received: from pgsmsx603.gar.corp.intel.com ([10.221.43.87]) by azsmga001.ch.intel.com with ESMTP; 26 Mar 2009 21:20:52 -0700 Received: from pdsmsx601.ccr.corp.intel.com (172.16.12.94) by pgsmsx603.gar.corp.intel.com (10.221.43.87) with Microsoft SMTP Server (TLS) id 8.1.340.0; Fri, 27 Mar 2009 12:19:12 +0800 Received: from pdsmsx503.ccr.corp.intel.com ([172.16.12.95]) by pdsmsx601.ccr.corp.intel.com ([172.16.12.94]) with mapi; Fri, 27 Mar 2009 12:19:11 +0800 From: "Dong, Eddie" To: "Dong, Eddie" , "kvm@vger.kernel.org" , Avi Kivity CC: "Dong, Eddie" Date: Fri, 27 Mar 2009 12:19:09 +0800 Subject: RFC: Add reserved bits check Thread-Topic: Add reserved bits check Thread-Index: Acmt4UTmB2PHyFL1QLCQeUACEyD0SAAsb16A Message-ID: <9832F13BD22FB94A829F798DA4A8280501A2106E6A@pdsmsx503.ccr.corp.intel.com> References: <9832F13BD22FB94A829F798DA4A8280501A21068EF@pdsmsx503.ccr.corp.intel.com> In-Reply-To: <9832F13BD22FB94A829F798DA4A8280501A21068EF@pdsmsx503.ccr.corp.intel.com> Accept-Language: en-US Content-Language: en-US X-MS-Has-Attach: X-MS-TNEF-Correlator: acceptlanguage: en-US MIME-Version: 1.0 Sender: kvm-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: kvm@vger.kernel.org Current KVM doesn't check reserved bits of guest page table, while may use reserved bits to bypass guest #PF in VMX. This patch add this check while leaving shadow pte un-constructed if guest RSVD=1. Comments? Thx, eddie --- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 55fd4c5..9370ff0 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -261,6 +261,8 @@ struct kvm_mmu { union kvm_mmu_page_role base_role; u64 *pae_root; + u64 rsvd_bits_mask[4]; + u64 large_page_rsvd_mask; }; struct kvm_vcpu_arch { diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 31ba3cb..7f55c4a 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -127,6 +127,7 @@ module_param(oos_shadow, bool, 0644); #define PFERR_PRESENT_MASK (1U << 0) #define PFERR_WRITE_MASK (1U << 1) #define PFERR_USER_MASK (1U << 2) +#define PFERR_RSVD_MASK (1U << 3) #define PFERR_FETCH_MASK (1U << 4) #define PT_DIRECTORY_LEVEL 2 @@ -179,6 +180,13 @@ static u64 __read_mostly shadow_user_mask; static u64 __read_mostly shadow_accessed_mask; static u64 __read_mostly shadow_dirty_mask; static u64 __read_mostly shadow_mt_mask; +extern struct kvm_cpuid_entry2 *kvm_find_cpuid_entry( + struct kvm_vcpu *vcpu, u32 function, u32 index); + +static inline u64 rsvd_bits(int s, int e) +{ + return ((1ULL << (e - s + 1)) - 1) << s; +} void kvm_mmu_set_nonpresent_ptes(u64 trap_pte, u64 notrap_pte) { @@ -251,6 +259,18 @@ static int is_rmap_pte(u64 pte) return is_shadow_present_pte(pte); } +static int cpuid_maxphyaddr(struct kvm_vcpu *vcpu) +{ + u32 function=0x80000008; + struct kvm_cpuid_entry2 *best; + + best = kvm_find_cpuid_entry(vcpu, function, 0); + if (best) { + return best->eax & 0xff; + } + return 40; +} + static pfn_t spte_to_pfn(u64 pte) { return (pte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT; @@ -2156,6 +2176,17 @@ static void paging_free(struct kvm_vcpu *vcpu) nonpaging_free(vcpu); } +static int is_rsvd_bits_set(struct kvm_vcpu *vcpu, unsigned long pte, int level) +{ + if (level == PT_DIRECTORY_LEVEL && (pte & PT_PAGE_SIZE_MASK)) { + /* large page */ + return (pte & vcpu->arch.mmu.large_page_rsvd_mask) != 0; + } + else + /* 4K page */ + return (pte & vcpu->arch.mmu.rsvd_bits_mask[level-1]) != 0; +} + #define PTTYPE 64 #include "paging_tmpl.h" #undef PTTYPE @@ -2184,6 +2215,18 @@ static int paging64_init_context_common(struct kvm_vcpu *vcpu, int level) static int paging64_init_context(struct kvm_vcpu *vcpu) { + struct kvm_mmu *context = &vcpu->arch.mmu; + int maxphyaddr = cpuid_maxphyaddr(vcpu); + + context->rsvd_bits_mask[3] = + rsvd_bits(maxphyaddr, 51) | rsvd_bits(7, 8); + context->rsvd_bits_mask[2] = + rsvd_bits(maxphyaddr, 51) | rsvd_bits(7, 8); + context->rsvd_bits_mask[1] = + rsvd_bits(maxphyaddr, 51) | rsvd_bits(7, 8); + context->rsvd_bits_mask[0] = rsvd_bits(maxphyaddr, 51); + context->large_page_rsvd_mask = /* 2MB PDE */ + rsvd_bits(maxphyaddr, 51) | rsvd_bits(13, 20); return paging64_init_context_common(vcpu, PT64_ROOT_LEVEL); } @@ -2191,6 +2234,15 @@ static int paging32_init_context(struct kvm_vcpu *vcpu) { struct kvm_mmu *context = &vcpu->arch.mmu; + /* no rsvd bits for 2 level 4K page table entries */ + context->rsvd_bits_mask[0] = 0; + context->rsvd_bits_mask[1] = 0; + if (is_cpuid_PSE36()) + /* 36bits PSE 4MB page */ + context->large_page_rsvd_mask = rsvd_bits(17, 21); + else + /* 32 bits PSE 4MB page */ + context->large_page_rsvd_mask = rsvd_bits(13, 21); context->new_cr3 = paging_new_cr3; context->page_fault = paging32_page_fault; context->gva_to_gpa = paging32_gva_to_gpa; @@ -2206,6 +2258,18 @@ static int paging32_init_context(struct kvm_vcpu *vcpu) static int paging32E_init_context(struct kvm_vcpu *vcpu) { + struct kvm_mmu *context = &vcpu->arch.mmu; + int maxphyaddr = cpuid_maxphyaddr(vcpu); + + /* 3 levels */ + context->rsvd_bits_mask[2] = rsvd_bits(maxphyaddr, 63) | + rsvd_bits(7, 8) | rsvd_bits(1,2); /* PDPTE */ + context->rsvd_bits_mask[1] = rsvd_bits(maxphyaddr, 63); /* PDE */ + context->rsvd_bits_mask[0] = /* PTE */ + rsvd_bits(maxphyaddr, 63) | rsvd_bits(7, 8) | rsvd_bits(1, 2); + context->large_page_rsvd_mask = /* 2M page */ + rsvd_bits(maxphyaddr, 63) | rsvd_bits(13, 20); + return paging64_init_context_common(vcpu, PT32E_ROOT_LEVEL); } diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 7314c09..844efe9 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -123,6 +123,7 @@ static int FNAME(walk_addr)(struct guest_walker *walker, gfn_t table_gfn; unsigned index, pt_access, pte_access; gpa_t pte_gpa; + int rsvd_fault; pgprintk("%s: addr %lx\n", __func__, addr); walk: @@ -153,10 +154,13 @@ walk: walker->level - 1, table_gfn); kvm_read_guest(vcpu->kvm, pte_gpa, &pte, sizeof(pte)); + rsvd_fault = is_rsvd_bits_set(vcpu, pte, walker->level); if (!is_present_pte(pte)) goto not_present; + if (rsvd_fault) + goto access_error; if (write_fault && !is_writeble_pte(pte)) if (user_fault || is_write_protection(vcpu)) goto access_error; @@ -233,6 +237,8 @@ err: walker->error_code |= PFERR_USER_MASK; if (fetch_fault) walker->error_code |= PFERR_FETCH_MASK; + if (rsvd_fault) + walker->error_code |= PFERR_RSVD_MASK; return 0; }