diff mbox series

[1/4] KVM:MMU: Introduce the set_spte_notify() callback

Message ID 20200724235448.106142-2-Eric.VanTassell@amd.com (mailing list archive)
State New, archived
Headers show
Series Defer page pinning for SEV guests until guest pages touched | expand

Commit Message

eric van tassell July 24, 2020, 11:54 p.m. UTC
This generic callback will be called just before setting the spte.

Check the return code where not previously checked since this operation can
return an error.

This will be used by SVM to defer pinning of SEV guest pages until they're
used in order to reduce SEV guest startup time by eliminating the compute
cycles required to pin all the pages up front. Additionally, we want to
reduce memory pressure due to guests that reserve but only sparsely access
a large amount of memory.

Signed-off-by: eric van tassell <Eric.VanTassell@amd.com>
---
 arch/x86/include/asm/kvm_host.h |  3 +++
 arch/x86/kvm/mmu/mmu.c          | 31 +++++++++++++++++++++++++++----
 arch/x86/kvm/mmu/paging_tmpl.h  | 27 ++++++++++++++++-----------
 3 files changed, 46 insertions(+), 15 deletions(-)
diff mbox series

Patch

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 5aaef036627f..e8c37e28a8ae 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1227,6 +1227,9 @@  struct kvm_x86_ops {
 	int (*enable_direct_tlbflush)(struct kvm_vcpu *vcpu);
 
 	void (*migrate_timers)(struct kvm_vcpu *vcpu);
+
+	int (*set_spte_notify)(struct kvm_vcpu *vcpu, gfn_t gfn, kvm_pfn_t pfn,
+			       int level, bool mmio, u64 *spte);
 };
 
 struct kvm_x86_nested_ops {
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index fa506aaaf019..0a6a3df8c8c8 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -3004,6 +3004,23 @@  static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
 		spte |= kvm_x86_ops.get_mt_mask(vcpu, gfn,
 			kvm_is_mmio_pfn(pfn));
 
+	if (kvm_x86_ops.set_spte_notify) {
+		ret = kvm_x86_ops.set_spte_notify(vcpu, gfn, pfn, level,
+						  kvm_is_mmio_pfn(pfn), &spte);
+		if (ret) {
+			if (WARN_ON_ONCE(ret > 0))
+				/*
+				 * set_spte_notify() should return 0 on success
+				 * and non-zero preferably less than zero,
+				 * for failure.  We check for any unanticipated
+				 * positive return values here.
+				 */
+				ret = -EINVAL;
+
+			return ret;
+		}
+	}
+
 	if (host_writable)
 		spte |= SPTE_HOST_WRITEABLE;
 	else
@@ -3086,6 +3103,9 @@  static int mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
 
 	set_spte_ret = set_spte(vcpu, sptep, pte_access, level, gfn, pfn,
 				speculative, true, host_writable);
+	if (set_spte_ret < 0)
+		return set_spte_ret;
+
 	if (set_spte_ret & SET_SPTE_WRITE_PROTECTED_PT) {
 		if (write_fault)
 			ret = RET_PF_EMULATE;
@@ -3134,7 +3154,7 @@  static int direct_pte_prefetch_many(struct kvm_vcpu *vcpu,
 	struct page *pages[PTE_PREFETCH_NUM];
 	struct kvm_memory_slot *slot;
 	unsigned int access = sp->role.access;
-	int i, ret;
+	int i, ret, error_ret = 0;
 	gfn_t gfn;
 
 	gfn = kvm_mmu_page_get_gfn(sp, start - sp->spt);
@@ -3147,12 +3167,15 @@  static int direct_pte_prefetch_many(struct kvm_vcpu *vcpu,
 		return -1;
 
 	for (i = 0; i < ret; i++, gfn++, start++) {
-		mmu_set_spte(vcpu, start, access, 0, sp->role.level, gfn,
-			     page_to_pfn(pages[i]), true, true);
+		ret = mmu_set_spte(vcpu, start, access, 0, sp->role.level, gfn,
+				   page_to_pfn(pages[i]), true, true);
+		if (ret < 0 && error_ret == 0) /* only track 1st fail */
+			error_ret = ret;
 		put_page(pages[i]);
 	}
 
-	return 0;
+	/* If there was an error for any gfn, return non-0. */
+	return error_ret;
 }
 
 static void __direct_pte_prefetch(struct kvm_vcpu *vcpu,
diff --git a/arch/x86/kvm/mmu/paging_tmpl.h b/arch/x86/kvm/mmu/paging_tmpl.h
index 0172a949f6a7..a777bb43dfa0 100644
--- a/arch/x86/kvm/mmu/paging_tmpl.h
+++ b/arch/x86/kvm/mmu/paging_tmpl.h
@@ -532,6 +532,7 @@  FNAME(prefetch_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
 	unsigned pte_access;
 	gfn_t gfn;
 	kvm_pfn_t pfn;
+	int set_spte_ret;
 
 	if (FNAME(prefetch_invalid_gpte)(vcpu, sp, spte, gpte))
 		return false;
@@ -550,11 +551,12 @@  FNAME(prefetch_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
 	 * we call mmu_set_spte() with host_writable = true because
 	 * pte_prefetch_gfn_to_pfn always gets a writable pfn.
 	 */
-	mmu_set_spte(vcpu, spte, pte_access, 0, PG_LEVEL_4K, gfn, pfn,
-		     true, true);
+	set_spte_ret = mmu_set_spte(vcpu, spte, pte_access, 0,
+				    PG_LEVEL_4K, gfn, pfn, true, true);
 
 	kvm_release_pfn_clean(pfn);
-	return true;
+
+	return set_spte_ret >= 0;
 }
 
 static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
@@ -1011,7 +1013,8 @@  static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
 	int i, nr_present = 0;
 	bool host_writable;
 	gpa_t first_pte_gpa;
-	int set_spte_ret = 0;
+	int ret;
+	int accum_set_spte_flags = 0;
 
 	/* direct kvm_mmu_page can not be unsync. */
 	BUG_ON(sp->role.direct);
@@ -1064,17 +1067,19 @@  static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
 			continue;
 		}
 
-		nr_present++;
-
 		host_writable = sp->spt[i] & SPTE_HOST_WRITEABLE;
 
-		set_spte_ret |= set_spte(vcpu, &sp->spt[i],
-					 pte_access, PG_LEVEL_4K,
-					 gfn, spte_to_pfn(sp->spt[i]),
-					 true, false, host_writable);
+		ret = set_spte(vcpu, &sp->spt[i], pte_access,
+			       PG_LEVEL_4K, gfn,
+			       spte_to_pfn(sp->spt[i]), true, false,
+			       host_writable);
+		if (ret >= 0) {
+			nr_present++;
+			accum_set_spte_flags |= ret;
+		}
 	}
 
-	if (set_spte_ret & SET_SPTE_NEED_REMOTE_TLB_FLUSH)
+	if (accum_set_spte_flags & SET_SPTE_NEED_REMOTE_TLB_FLUSH)
 		kvm_flush_remote_tlbs(vcpu->kvm);
 
 	return nr_present;