diff mbox

KVM: MMU: limit rmap chain length

Message ID 20090804121718.GA18476@amt.cnet (mailing list archive)
State New, archived
Headers show

Commit Message

Marcelo Tosatti Aug. 4, 2009, 12:17 p.m. UTC
Otherwise the host can spend too long traversing an rmap chain, which
happens under a spinlock.

Cc: stable@kernel.org
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Comments

Avi Kivity Aug. 4, 2009, 12:30 p.m. UTC | #1
On 08/04/2009 03:17 PM, Marcelo Tosatti wrote:
> Otherwise the host can spend too long traversing an rmap chain, which
> happens under a spinlock.
>    

Applied, thanks.
diff mbox

Patch

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 24e4188..08b6d98 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -550,16 +550,19 @@  static unsigned long *gfn_to_rmap(struct kvm *kvm, gfn_t gfn, int level)
  *
  * If rmapp bit zero is one, (then rmap & ~1) points to a struct kvm_rmap_desc
  * containing more mappings.
+ * 
+ * Returns the number of rmap entries before the spte was added or zero if
+ * the spte was not added.
  */
-static void rmap_add(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn)
+static int rmap_add(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn)
 {
 	struct kvm_mmu_page *sp;
 	struct kvm_rmap_desc *desc;
 	unsigned long *rmapp;
-	int i;
+	int i, count = 0;
 
 	if (!is_rmap_spte(*spte))
-		return;
+		return count;
 	gfn = unalias_gfn(vcpu->kvm, gfn);
 	sp = page_header(__pa(spte));
 	sp->gfns[spte - sp->spt] = gfn;
@@ -576,8 +579,10 @@  static void rmap_add(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn)
 	} else {
 		rmap_printk("rmap_add: %p %llx many->many\n", spte, *spte);
 		desc = (struct kvm_rmap_desc *)(*rmapp & ~1ul);
-		while (desc->sptes[RMAP_EXT-1] && desc->more)
+		while (desc->sptes[RMAP_EXT-1] && desc->more) {
 			desc = desc->more;
+			count += RMAP_EXT;
+		}
 		if (desc->sptes[RMAP_EXT-1]) {
 			desc->more = mmu_alloc_rmap_desc(vcpu);
 			desc = desc->more;
@@ -586,6 +591,7 @@  static void rmap_add(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn)
 			;
 		desc->sptes[i] = spte;
 	}
+	return count;
 }
 
 static void rmap_desc_remove_entry(unsigned long *rmapp,
@@ -822,6 +828,22 @@  static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp)
 	return young;
 }
 
+#define RMAP_RECYCLE_THRESHOLD 1000
+
+static void rmap_recycle(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn)
+{
+	unsigned long *rmapp;
+	struct kvm_mmu_page *sp;
+
+	sp = page_header(__pa(spte));
+
+	gfn = unalias_gfn(vcpu->kvm, gfn);
+	rmapp = gfn_to_rmap(vcpu->kvm, gfn, sp->role.level);
+
+	kvm_unmap_rmapp(vcpu->kvm, rmapp);
+	kvm_flush_remote_tlbs(vcpu->kvm);
+}
+
 int kvm_age_hva(struct kvm *kvm, unsigned long hva)
 {
 	return kvm_handle_hva(kvm, hva, kvm_age_rmapp);
@@ -1809,6 +1831,7 @@  static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
 {
 	int was_rmapped = 0;
 	int was_writeble = is_writeble_pte(*sptep);
+	int rmap_count;
 
 	pgprintk("%s: spte %llx access %x write_fault %d"
 		 " user_fault %d gfn %lx\n",
@@ -1852,9 +1875,11 @@  static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
 
 	page_header_update_slot(vcpu->kvm, sptep, gfn);
 	if (!was_rmapped) {
-		rmap_add(vcpu, sptep, gfn);
+		rmap_count = rmap_add(vcpu, sptep, gfn);
 		if (!is_rmap_spte(*sptep))
 			kvm_release_pfn_clean(pfn);
+		if (rmap_count > RMAP_RECYCLE_THRESHOLD)
+			rmap_recycle(vcpu, sptep, gfn);
 	} else {
 		if (was_writeble)
 			kvm_release_pfn_dirty(pfn);