diff mbox

[5/6] KVM: MMU: prefetch ptes when intercepted guest #PF

Message ID 4C16E9A8.10409@cn.fujitsu.com (mailing list archive)
State New, archived
Headers show

Commit Message

Xiao Guangrong June 15, 2010, 2:47 a.m. UTC
None
diff mbox

Patch

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 92ff099..941c86b 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -89,6 +89,8 @@  module_param(oos_shadow, bool, 0644);
 	}
 #endif
 
+#define PTE_PREFETCH_NUM	16
+
 #define PT_FIRST_AVAIL_BITS_SHIFT 9
 #define PT64_SECOND_AVAIL_BITS_SHIFT 52
 
@@ -2041,6 +2043,39 @@  static void nonpaging_new_cr3(struct kvm_vcpu *vcpu)
 {
 }
 
+static void direct_pte_prefetch(struct kvm_vcpu *vcpu, u64 *sptep)
+{
+	struct kvm_mmu_page *sp;
+	int index, i;
+
+	sp = page_header(__pa(sptep));
+	WARN_ON(!sp->role.direct);
+	index = sptep - sp->spt;
+
+	for (i = index + 1; i < min(PT64_ENT_PER_PAGE,
+				      index + PTE_PREFETCH_NUM); i++) {
+		gfn_t gfn;
+		pfn_t pfn;
+		u64 *spte = sp->spt + i;
+
+		if (*spte != shadow_trap_nonpresent_pte)
+			continue;
+
+		gfn = sp->gfn + (i << ((sp->role.level - 1) * PT64_LEVEL_BITS));
+
+		pfn = gfn_to_pfn_atomic(vcpu->kvm, gfn);
+		if (is_error_pfn(pfn)) {
+			kvm_release_pfn_clean(pfn);
+			break;
+		}
+		if (pte_prefetch_topup_memory_cache(vcpu))
+			break;
+
+		mmu_set_spte(vcpu, spte, ACC_ALL, ACC_ALL, 0, 0, 1, NULL,
+			     sp->role.level, gfn, pfn, true, false);
+	}
+}
+
 static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write,
 			int level, gfn_t gfn, pfn_t pfn)
 {
@@ -2055,6 +2090,7 @@  static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write,
 				     0, write, 1, &pt_write,
 				     level, gfn, pfn, false, true);
 			++vcpu->stat.pf_fixed;
+			direct_pte_prefetch(vcpu, iterator.sptep);
 			break;
 		}
 
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index eb47148..af4e041 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -291,6 +291,81 @@  static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
 		     gpte_to_gfn(gpte), pfn, true, true);
 }
 
+static void FNAME(pte_prefetch)(struct kvm_vcpu *vcpu, u64 *sptep)
+{
+	struct kvm_mmu_page *sp;
+	pt_element_t *table = NULL;
+	int offset = 0, shift, index, i;
+
+	sp = page_header(__pa(sptep));
+	index = sptep - sp->spt;
+
+	if (PTTYPE == 32) {
+		shift = PAGE_SHIFT - (PT_LEVEL_BITS -
+					PT64_LEVEL_BITS) * sp->role.level;
+		offset = sp->role.quadrant << shift;
+	}
+
+	for (i = index + 1; i < min(PT64_ENT_PER_PAGE,
+				      index + PTE_PREFETCH_NUM); i++) {
+		struct page *page;
+		pt_element_t gpte;
+		unsigned pte_access;
+		u64 *spte = sp->spt + i;
+		gfn_t gfn;
+		pfn_t pfn;
+		int dirty;
+
+		if (*spte != shadow_trap_nonpresent_pte)
+			continue;
+
+		pte_access = sp->role.access;
+		if (sp->role.direct) {
+			dirty = 1;
+			gfn = sp->gfn + (i << ((sp->role.level - 1) *
+					      PT64_LEVEL_BITS));
+			goto gfn_mapping;
+		}
+
+		if (!table) {
+			page = gfn_to_page_atomic(vcpu->kvm, sp->gfn);
+			if (is_error_page(page)) {
+				kvm_release_page_clean(page);
+				break;
+			}
+			table = kmap_atomic(page, KM_USER0);
+			table = (pt_element_t *)((char *)table + offset);
+		}
+
+		gpte = table[i];
+		if (!(gpte & PT_ACCESSED_MASK))
+			continue;
+
+		if (!is_present_gpte(gpte)) {
+			if (!sp->unsync)
+				*spte = shadow_notrap_nonpresent_pte;
+			continue;
+		}
+		dirty = is_dirty_gpte(gpte);
+		gfn = (gpte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT;
+		pte_access = pte_access & FNAME(gpte_access)(vcpu, gpte);
+gfn_mapping:
+		pfn = gfn_to_pfn_atomic(vcpu->kvm, gfn);
+		if (is_error_pfn(pfn)) {
+			kvm_release_pfn_clean(pfn);
+			break;
+		}
+
+		if (pte_prefetch_topup_memory_cache(vcpu))
+			break;
+		mmu_set_spte(vcpu, spte, sp->role.access, pte_access, 0, 0,
+			     dirty, NULL, sp->role.level, gfn, pfn,
+			     true, false);
+	}
+	if (table)
+		kunmap_atomic((char *)table - offset, KM_USER0);
+}
+
 /*
  * Fetch a shadow pte for a specific level in the paging hierarchy.
  */
@@ -322,6 +397,7 @@  static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
 				     is_dirty_gpte(gw->ptes[gw->level-1]),
 				     ptwrite, level,
 				     gw->gfn, pfn, false, true);
+			FNAME(pte_prefetch)(vcpu, sptep);
 			break;
 		}