@@ -865,10 +865,13 @@ struct kvm_lpage_info {
int disallow_lpage;
};
+bool kvm_host_write_track_is_active(struct kvm *kvm, gfn_t gfn);
+
struct kvm_arch_memory_slot {
struct kvm_rmap_head *rmap[KVM_NR_PAGE_SIZES];
struct kvm_lpage_info *lpage_info[KVM_NR_PAGE_SIZES - 1];
unsigned short *gfn_track[KVM_PAGE_TRACK_MAX];
+ unsigned short *host_write_track[KVM_PAGE_TRACK_MAX];
};
/*
@@ -1393,6 +1396,9 @@ struct kvm_x86_ops {
void (*vcpu_deliver_sipi_vector)(struct kvm_vcpu *vcpu, u8 vector);
void *(*alloc_apic_backing_page)(struct kvm_vcpu *vcpu);
int (*get_tdp_max_page_level)(struct kvm_vcpu *vcpu, gpa_t gpa, int max_level);
+
+ void (*write_page_begin)(struct kvm *kvm, struct kvm_memory_slot *slot, gfn_t gfn);
+ void (*write_page_end)(struct kvm *kvm, struct kvm_memory_slot *slot, gfn_t gfn);
};
struct kvm_x86_nested_ops {
@@ -2862,6 +2862,19 @@ static int snp_make_page_shared(struct kvm_vcpu *vcpu, gpa_t gpa, kvm_pfn_t pfn,
return rmpupdate(pfn_to_page(pfn), &val);
}
+static inline bool kvm_host_write_track_gpa_range_is_active(struct kvm *kvm,
+ gpa_t start, gpa_t end)
+{
+ while (start < end) {
+ if (kvm_host_write_track_is_active(kvm, gpa_to_gfn(start)))
+ return 1;
+
+ start += PAGE_SIZE;
+ }
+
+ return false;
+}
+
static int snp_make_page_private(struct kvm_vcpu *vcpu, gpa_t gpa, kvm_pfn_t pfn, int level)
{
struct kvm_sev_info *sev = &to_kvm_svm(vcpu->kvm)->sev_info;
@@ -2873,6 +2886,14 @@ static int snp_make_page_private(struct kvm_vcpu *vcpu, gpa_t gpa, kvm_pfn_t pfn
if (!e)
return -EINVAL;
+ /*
+ * If the GPA is tracked for the write access then do not change the
+ * page state from shared to private.
+ */
+ if (kvm_host_write_track_gpa_range_is_active(vcpu->kvm,
+ gpa, gpa + page_level_size(level)))
+ return -EBUSY;
+
/* Log if the entry is validated */
if (rmpentry_validated(e))
pr_warn_ratelimited("Asked to make a pre-validated gpa %llx private\n", gpa);
@@ -3446,3 +3467,33 @@ int sev_get_tdp_max_page_level(struct kvm_vcpu *vcpu, gpa_t gpa, int max_level)
return min_t(uint32_t, level, max_level);
}
+
+void sev_snp_write_page_begin(struct kvm *kvm, struct kvm_memory_slot *slot, gfn_t gfn)
+{
+ struct rmpentry *e;
+ int level, rc;
+ kvm_pfn_t pfn;
+
+ if (!sev_snp_guest(kvm))
+ return;
+
+ pfn = gfn_to_pfn(kvm, gfn);
+ if (is_error_noslot_pfn(pfn))
+ return;
+
+ e = snp_lookup_page_in_rmptable(pfn_to_page(pfn), &level);
+ if (unlikely(!e))
+ return;
+
+ /*
+ * A hypervisor should never write to the guest private page. A write to the
+ * guest private will cause an RMP violation. If the guest page is private,
+ * then make it shared.
+ */
+ if (rmpentry_assigned(e)) {
+ pr_err("SEV-SNP: write to guest private gfn %llx\n", gfn);
+ rc = snp_make_page_shared(kvm_get_vcpu(kvm, 0),
+ gfn << PAGE_SHIFT, pfn, PG_LEVEL_4K);
+ BUG_ON(rc != 0);
+ }
+}
@@ -4577,6 +4577,8 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
.alloc_apic_backing_page = svm_alloc_apic_backing_page,
.get_tdp_max_page_level = sev_get_tdp_max_page_level,
+
+ .write_page_begin = sev_snp_write_page_begin,
};
static struct kvm_x86_init_ops svm_init_ops __initdata = {
@@ -576,6 +576,7 @@ void sev_es_prepare_guest_switch(struct vcpu_svm *svm, unsigned int cpu);
void sev_es_unmap_ghcb(struct vcpu_svm *svm);
struct page *snp_safe_alloc_page(struct kvm_vcpu *vcpu);
int sev_get_tdp_max_page_level(struct kvm_vcpu *vcpu, gpa_t gpa, int max_level);
+void sev_snp_write_page_begin(struct kvm *kvm, struct kvm_memory_slot *slot, gfn_t gfn);
/* vmenter.S */
@@ -9076,6 +9076,48 @@ void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
kvm_make_all_cpus_request(kvm, KVM_REQ_APIC_PAGE_RELOAD);
}
+static void update_gfn_track(struct kvm_memory_slot *slot, gfn_t gfn,
+ enum kvm_page_track_mode mode, short count)
+{
+ int index, val;
+
+ index = gfn_to_index(gfn, slot->base_gfn, PG_LEVEL_4K);
+
+ val = slot->arch.host_write_track[mode][index];
+
+ if (WARN_ON(val + count < 0 || val + count > USHRT_MAX))
+ return;
+
+ slot->arch.host_write_track[mode][index] += count;
+}
+
+bool kvm_host_write_track_is_active(struct kvm *kvm, gfn_t gfn)
+{
+ struct kvm_memory_slot *slot;
+ int index;
+
+ slot = gfn_to_memslot(kvm, gfn);
+ if (!slot)
+ return false;
+
+ index = gfn_to_index(gfn, slot->base_gfn, PG_LEVEL_4K);
+ return !!READ_ONCE(slot->arch.host_write_track[KVM_PAGE_TRACK_WRITE][index]);
+}
+EXPORT_SYMBOL_GPL(kvm_host_write_track_is_active);
+
+void kvm_arch_write_gfn_begin(struct kvm *kvm, struct kvm_memory_slot *slot, gfn_t gfn)
+{
+ update_gfn_track(slot, gfn, KVM_PAGE_TRACK_WRITE, 1);
+
+ if (kvm_x86_ops.write_page_begin)
+ kvm_x86_ops.write_page_begin(kvm, slot, gfn);
+}
+
+void kvm_arch_write_gfn_end(struct kvm *kvm, struct kvm_memory_slot *slot, gfn_t gfn)
+{
+ update_gfn_track(slot, gfn, KVM_PAGE_TRACK_WRITE, -1);
+}
+
void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu)
{
if (!lapic_in_kernel(vcpu))
@@ -10896,6 +10938,36 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
kvm_hv_destroy_vm(kvm);
}
+static void kvm_write_page_track_free_memslot(struct kvm_memory_slot *slot)
+{
+ int i;
+
+ for (i = 0; i < KVM_PAGE_TRACK_MAX; i++) {
+ kvfree(slot->arch.host_write_track[i]);
+ slot->arch.host_write_track[i] = NULL;
+ }
+}
+
+static int kvm_write_page_track_create_memslot(struct kvm_memory_slot *slot,
+ unsigned long npages)
+{
+ int i;
+
+ for (i = 0; i < KVM_PAGE_TRACK_MAX; i++) {
+ slot->arch.host_write_track[i] =
+ kvcalloc(npages, sizeof(*slot->arch.host_write_track[i]),
+ GFP_KERNEL_ACCOUNT);
+ if (!slot->arch.host_write_track[i])
+ goto track_free;
+ }
+
+ return 0;
+
+track_free:
+ kvm_write_page_track_free_memslot(slot);
+ return -ENOMEM;
+}
+
void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *slot)
{
int i;
@@ -10969,8 +11041,14 @@ static int kvm_alloc_memslot_metadata(struct kvm_memory_slot *slot,
if (kvm_page_track_create_memslot(slot, npages))
goto out_free;
+ if (kvm_write_page_track_create_memslot(slot, npages))
+ goto e_free_page_track;
+
return 0;
+e_free_page_track:
+ kvm_page_track_free_memslot(slot);
+
out_free:
for (i = 0; i < KVM_NR_PAGE_SIZES; ++i) {
kvfree(slot->arch.rmap[i]);
@@ -1550,6 +1550,9 @@ static inline long kvm_arch_vcpu_async_ioctl(struct file *filp,
void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
unsigned long start, unsigned long end);
+void kvm_arch_write_gfn_begin(struct kvm *kvm, struct kvm_memory_slot *slot, gfn_t gfn);
+void kvm_arch_write_gfn_end(struct kvm *kvm, struct kvm_memory_slot *slot, gfn_t gfn);
+
#ifdef CONFIG_HAVE_KVM_VCPU_RUN_PID_CHANGE
int kvm_arch_vcpu_run_pid_change(struct kvm_vcpu *vcpu);
#else
@@ -160,6 +160,14 @@ __weak void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
{
}
+__weak void kvm_arch_write_gfn_begin(struct kvm *kvm, struct kvm_memory_slot *slot, gfn_t gfn)
+{
+}
+
+__weak void kvm_arch_write_gfn_end(struct kvm *kvm, struct kvm_memory_slot *slot, gfn_t gfn)
+{
+}
+
bool kvm_is_zone_device_pfn(kvm_pfn_t pfn)
{
/*
@@ -2309,7 +2317,8 @@ static void kvm_cache_gfn_to_pfn(struct kvm_memory_slot *slot, gfn_t gfn,
cache->generation = gen;
}
-static int __kvm_map_gfn(struct kvm_memslots *slots, gfn_t gfn,
+static int __kvm_map_gfn(struct kvm *kvm,
+ struct kvm_memslots *slots, gfn_t gfn,
struct kvm_host_map *map,
struct gfn_to_pfn_cache *cache,
bool atomic)
@@ -2361,20 +2370,22 @@ static int __kvm_map_gfn(struct kvm_memslots *slots, gfn_t gfn,
map->pfn = pfn;
map->gfn = gfn;
+ kvm_arch_write_gfn_begin(kvm, slot, map->gfn);
+
return 0;
}
int kvm_map_gfn(struct kvm_vcpu *vcpu, gfn_t gfn, struct kvm_host_map *map,
struct gfn_to_pfn_cache *cache, bool atomic)
{
- return __kvm_map_gfn(kvm_memslots(vcpu->kvm), gfn, map,
+ return __kvm_map_gfn(vcpu->kvm, kvm_memslots(vcpu->kvm), gfn, map,
cache, atomic);
}
EXPORT_SYMBOL_GPL(kvm_map_gfn);
int kvm_vcpu_map(struct kvm_vcpu *vcpu, gfn_t gfn, struct kvm_host_map *map)
{
- return __kvm_map_gfn(kvm_vcpu_memslots(vcpu), gfn, map,
+ return __kvm_map_gfn(vcpu->kvm, kvm_vcpu_memslots(vcpu), gfn, map,
NULL, false);
}
EXPORT_SYMBOL_GPL(kvm_vcpu_map);
@@ -2412,6 +2423,8 @@ static void __kvm_unmap_gfn(struct kvm *kvm,
else
kvm_release_pfn(map->pfn, dirty, NULL);
+ kvm_arch_write_gfn_end(kvm, memslot, map->gfn);
+
map->hva = NULL;
map->page = NULL;
}
@@ -2612,7 +2625,9 @@ static int __kvm_write_guest_page(struct kvm *kvm,
addr = gfn_to_hva_memslot(memslot, gfn);
if (kvm_is_error_hva(addr))
return -EFAULT;
+ kvm_arch_write_gfn_begin(kvm, memslot, gfn);
r = __copy_to_user((void __user *)addr + offset, data, len);
+ kvm_arch_write_gfn_end(kvm, memslot, gfn);
if (r)
return -EFAULT;
mark_page_dirty_in_slot(kvm, memslot, gfn);
The kvm_write_guest{_page} and kvm_vcpu_write_guest{_page} are used by the hypevisor to write to the guest memory. The kvm_vcpu_map() and kvm_map_gfn() are used by the hypervisor to map the guest memory and and access it later. When SEV-SNP is enabled in the guest VM, the guest memory pages can either be a private or shared. A write from the hypervisor goes through the RMP checks. If hardware sees that hypervisor is attempting to write to a guest private page, then it triggers an RMP violation (i.e, #PF with RMP bit set). Enhance the KVM guest write helpers to invoke an architecture specific hooks (kvm_arch_write_gfn_{begin,end}) to track the write access from the hypervisor. When SEV-SNP is enabled, the guest uses the PAGE_STATE vmgexit to ask the hypervisor to change the page state from shared to private or vice versa. While changing the page state to private, use the kvm_host_write_track_is_active() to check whether the page is being tracked for the host write access (i.e either mapped or kvm_write_guest is in progress). If its tracked, then do not change the page state. Signed-off-by: Brijesh Singh <brijesh.singh@amd.com> --- arch/x86/include/asm/kvm_host.h | 6 +++ arch/x86/kvm/svm/sev.c | 51 +++++++++++++++++++++ arch/x86/kvm/svm/svm.c | 2 + arch/x86/kvm/svm/svm.h | 1 + arch/x86/kvm/x86.c | 78 +++++++++++++++++++++++++++++++++ include/linux/kvm_host.h | 3 ++ virt/kvm/kvm_main.c | 21 +++++++-- 7 files changed, 159 insertions(+), 3 deletions(-)