@@ -211,12 +211,14 @@ static int gzvm_vm_ioctl_get_pvmfw_size(struct gzvm *gzvm,
* @gfn: Guest frame number.
* @total_pages: Total page numbers.
* @slot: Pointer to struct gzvm_memslot.
+ * @gzvm: Pointer to struct gzvm.
*
* Return: how many pages we've fill in, negative if error
*/
static int fill_constituents(struct mem_region_addr_range *consti,
int *consti_cnt, int max_nr_consti, u64 gfn,
- u32 total_pages, struct gzvm_memslot *slot)
+ u32 total_pages, struct gzvm_memslot *slot,
+ struct gzvm *gzvm)
{
u64 pfn = 0, prev_pfn = 0, gfn_end = 0;
int nr_pages = 0;
@@ -227,6 +229,8 @@ static int fill_constituents(struct mem_region_addr_range *consti,
gfn_end = gfn + total_pages;
while (i < max_nr_consti && gfn < gfn_end) {
+ if (gzvm_vm_allocate_guest_page(gzvm, slot, gfn, &pfn) != 0)
+ return -EFAULT;
if (pfn == (prev_pfn + 1)) {
consti[i].pg_cnt++;
} else {
@@ -282,7 +286,7 @@ int gzvm_vm_populate_mem_region(struct gzvm *gzvm, int slot_id)
nr_pages = fill_constituents(region->constituents,
®ion->constituent_cnt,
max_nr_consti, gfn,
- remain_pages, memslot);
+ remain_pages, memslot, gzvm);
if (nr_pages < 0) {
pr_err("Failed to fill constituents\n");
@@ -8,4 +8,4 @@ GZVM_DIR ?= ../../../drivers/virt/geniezone
gzvm-y := $(GZVM_DIR)/gzvm_main.o $(GZVM_DIR)/gzvm_vm.o \
$(GZVM_DIR)/gzvm_vcpu.o $(GZVM_DIR)/gzvm_irqfd.o \
- $(GZVM_DIR)/gzvm_ioeventfd.o
+ $(GZVM_DIR)/gzvm_ioeventfd.o $(GZVM_DIR)/gzvm_mmu.o
new file mode 100644
@@ -0,0 +1,103 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2023 MediaTek Inc.
+ */
+
+#include <linux/soc/mediatek/gzvm_drv.h>
+
+static int cmp_ppages(struct rb_node *node, const struct rb_node *parent)
+{
+ struct gzvm_pinned_page *a = container_of(node,
+ struct gzvm_pinned_page,
+ node);
+ struct gzvm_pinned_page *b = container_of(parent,
+ struct gzvm_pinned_page,
+ node);
+
+ if (a->ipa < b->ipa)
+ return -1;
+ if (a->ipa > b->ipa)
+ return 1;
+ return 0;
+}
+
+/* Invoker of this function is responsible for locking */
+static int gzvm_insert_ppage(struct gzvm *vm, struct gzvm_pinned_page *ppage)
+{
+ if (rb_find_add(&ppage->node, &vm->pinned_pages, cmp_ppages))
+ return -EEXIST;
+ return 0;
+}
+
+static int pin_one_page(struct gzvm *vm, unsigned long hva, u64 gpa,
+ struct page **out_page)
+{
+ unsigned int flags = FOLL_HWPOISON | FOLL_LONGTERM | FOLL_WRITE;
+ struct gzvm_pinned_page *ppage = NULL;
+ struct mm_struct *mm = current->mm;
+ struct page *page = NULL;
+ int ret;
+
+ ppage = kmalloc(sizeof(*ppage), GFP_KERNEL_ACCOUNT);
+ if (!ppage)
+ return -ENOMEM;
+
+ mmap_read_lock(mm);
+ ret = pin_user_pages(hva, 1, flags, &page);
+ mmap_read_unlock(mm);
+
+ if (ret != 1 || !page) {
+ kfree(ppage);
+ return -EFAULT;
+ }
+
+ ppage->page = page;
+ ppage->ipa = gpa;
+
+ mutex_lock(&vm->mem_lock);
+ ret = gzvm_insert_ppage(vm, ppage);
+
+ /**
+ * The return of -EEXIST from gzvm_insert_ppage is considered an
+ * expected behavior in this context.
+ * This situation arises when two or more VCPUs are concurrently
+ * engaged in demand paging handling. The initial VCPU has already
+ * allocated and pinned a page, while the subsequent VCPU attempts
+ * to pin the same page again. As a result, we prompt the unpinning
+ * and release of the allocated structure, followed by a return 0.
+ */
+ if (ret == -EEXIST) {
+ kfree(ppage);
+ unpin_user_pages(&page, 1);
+ ret = 0;
+ }
+ mutex_unlock(&vm->mem_lock);
+ *out_page = page;
+
+ return ret;
+}
+
+int gzvm_vm_allocate_guest_page(struct gzvm *vm, struct gzvm_memslot *slot,
+ u64 gfn, u64 *pfn)
+{
+ struct page *page = NULL;
+ unsigned long hva;
+ int ret;
+
+ if (gzvm_gfn_to_hva_memslot(slot, gfn, (u64 *)&hva) != 0)
+ return -EINVAL;
+
+ ret = pin_one_page(vm, hva, PFN_PHYS(gfn), &page);
+ if (ret != 0)
+ return ret;
+
+ if (page == NULL)
+ return -EFAULT;
+ /**
+ * As `pin_user_pages` already gets the page struct, we don't need to
+ * call other APIs to reduce function call overhead.
+ */
+ *pfn = page_to_pfn(page);
+
+ return 0;
+}
@@ -298,6 +298,22 @@ static long gzvm_vm_ioctl(struct file *filp, unsigned int ioctl,
return ret;
}
+/* Invoker of this function is responsible for locking */
+static void gzvm_destroy_all_ppage(struct gzvm *gzvm)
+{
+ struct gzvm_pinned_page *ppage;
+ struct rb_node *node;
+
+ node = rb_first(&gzvm->pinned_pages);
+ while (node) {
+ ppage = rb_entry(node, struct gzvm_pinned_page, node);
+ unpin_user_pages_dirty_lock(&ppage->page, 1, true);
+ node = rb_next(node);
+ rb_erase(&ppage->node, &gzvm->pinned_pages);
+ kfree(ppage);
+ }
+}
+
static void gzvm_destroy_vm(struct gzvm *gzvm)
{
pr_debug("VM-%u is going to be destroyed\n", gzvm->vm_id);
@@ -314,6 +330,9 @@ static void gzvm_destroy_vm(struct gzvm *gzvm)
mutex_unlock(&gzvm->lock);
+ /* No need to lock here becauese it's single-threaded execution */
+ gzvm_destroy_all_ppage(gzvm);
+
kfree(gzvm);
}
@@ -349,6 +368,8 @@ static struct gzvm *gzvm_create_vm(unsigned long vm_type)
gzvm->vm_id = ret;
gzvm->mm = current->mm;
mutex_init(&gzvm->lock);
+ mutex_init(&gzvm->mem_lock);
+ gzvm->pinned_pages = RB_ROOT;
ret = gzvm_vm_irqfd_init(gzvm);
if (ret) {
@@ -12,6 +12,7 @@
#include <linux/mutex.h>
#include <linux/gzvm.h>
#include <linux/srcu.h>
+#include <linux/rbtree.h>
/*
* For the normal physical address, the highest 12 bits should be zero, so we
@@ -97,6 +98,12 @@ struct gzvm_vcpu {
struct gzvm_vcpu_hwstate *hwstate;
};
+struct gzvm_pinned_page {
+ struct rb_node node;
+ struct page *page;
+ u64 ipa;
+};
+
/**
* struct gzvm: the following data structures are for data transferring between
* driver and hypervisor, and they're aligned with hypervisor definitions.
@@ -112,6 +119,8 @@ struct gzvm_vcpu {
* @irq_ack_notifier_list: list head for irq ack notifier
* @irq_srcu: structure data for SRCU(sleepable rcu)
* @irq_lock: lock for irq injection
+ * @pinned_pages: use rb-tree to record pin/unpin page
+ * @mem_lock: lock for memory operations
*/
struct gzvm {
struct gzvm_vcpu *vcpus[GZVM_MAX_VCPUS];
@@ -135,6 +144,9 @@ struct gzvm {
struct hlist_head irq_ack_notifier_list;
struct srcu_struct irq_srcu;
struct mutex irq_lock;
+
+ struct rb_root pinned_pages;
+ struct mutex mem_lock;
};
long gzvm_dev_ioctl_check_extension(struct gzvm *gzvm, unsigned long args);
@@ -160,6 +172,8 @@ int gzvm_vm_ioctl_arch_enable_cap(struct gzvm *gzvm,
int gzvm_gfn_to_hva_memslot(struct gzvm_memslot *memslot, u64 gfn,
u64 *hva_memslot);
int gzvm_vm_populate_mem_region(struct gzvm *gzvm, int slot_id);
+int gzvm_vm_allocate_guest_page(struct gzvm *gzvm, struct gzvm_memslot *slot,
+ u64 gfn, u64 *pfn);
int gzvm_vm_ioctl_create_vcpu(struct gzvm *gzvm, u32 cpuid);
int gzvm_arch_vcpu_update_one_reg(struct gzvm_vcpu *vcpu, __u64 reg_id,