@@ -211,12 +211,14 @@ static int gzvm_vm_ioctl_get_pvmfw_size(struct gzvm *gzvm,
* @gfn: Guest frame number.
* @total_pages: Total page numbers.
* @slot: Pointer to struct gzvm_memslot.
+ * @gzvm: Pointer to struct gzvm.
*
* Return: how many pages we've fill in, negative if error
*/
static int fill_constituents(struct mem_region_addr_range *consti,
int *consti_cnt, int max_nr_consti, u64 gfn,
- u32 total_pages, struct gzvm_memslot *slot)
+ u32 total_pages, struct gzvm_memslot *slot,
+ struct gzvm *gzvm)
{
u64 pfn = 0, prev_pfn = 0, gfn_end = 0;
int nr_pages = 0;
@@ -227,7 +229,7 @@ static int fill_constituents(struct mem_region_addr_range *consti,
gfn_end = gfn + total_pages;
while (i < max_nr_consti && gfn < gfn_end) {
- if (gzvm_vm_allocate_guest_page(slot, gfn, &pfn) != 0)
+ if (gzvm_vm_allocate_guest_page(gzvm, slot, gfn, &pfn) != 0)
return -EFAULT;
if (pfn == (prev_pfn + 1)) {
consti[i].pg_cnt++;
@@ -284,7 +286,7 @@ int gzvm_vm_populate_mem_region(struct gzvm *gzvm, int slot_id)
nr_pages = fill_constituents(region->constituents,
®ion->constituent_cnt,
max_nr_consti, gfn,
- remain_pages, memslot);
+ remain_pages, memslot, gzvm);
if (nr_pages < 0) {
pr_err("Failed to fill constituents\n");
@@ -108,11 +108,88 @@ int gzvm_gfn_to_pfn_memslot(struct gzvm_memslot *memslot, u64 gfn,
return 0;
}
-int gzvm_vm_allocate_guest_page(struct gzvm_memslot *slot, u64 gfn, u64 *pfn)
+static int cmp_ppages(struct rb_node *node, const struct rb_node *parent)
{
+ struct gzvm_pinned_page *a = container_of(node,
+ struct gzvm_pinned_page,
+ node);
+ struct gzvm_pinned_page *b = container_of(parent,
+ struct gzvm_pinned_page,
+ node);
+
+ if (a->ipa < b->ipa)
+ return -1;
+ if (a->ipa > b->ipa)
+ return 1;
+ return 0;
+}
+
+/* Invoker of this function is responsible for locking */
+static int gzvm_insert_ppage(struct gzvm *vm, struct gzvm_pinned_page *ppage)
+{
+ if (rb_find_add(&ppage->node, &vm->pinned_pages, cmp_ppages))
+ return -EEXIST;
+ return 0;
+}
+
+static int pin_one_page(struct gzvm *vm, unsigned long hva, u64 gpa)
+{
+ unsigned int flags = FOLL_HWPOISON | FOLL_LONGTERM | FOLL_WRITE;
+ struct gzvm_pinned_page *ppage = NULL;
+ struct mm_struct *mm = current->mm;
+ struct page *page = NULL;
+ int ret;
+
+ ppage = kmalloc(sizeof(*ppage), GFP_KERNEL_ACCOUNT);
+ if (!ppage)
+ return -ENOMEM;
+
+ mmap_read_lock(mm);
+ pin_user_pages(hva, 1, flags, &page);
+ mmap_read_unlock(mm);
+
+ if (!page) {
+ kfree(ppage);
+ return -EFAULT;
+ }
+
+ ppage->page = page;
+ ppage->ipa = gpa;
+
+ mutex_lock(&vm->mem_lock);
+ ret = gzvm_insert_ppage(vm, ppage);
+
+ /**
+ * The return of -EEXIST from gzvm_insert_ppage is considered an
+ * expected behavior in this context.
+ * This situation arises when two or more VCPUs are concurrently
+ * engaged in demand paging handling. The initial VCPU has already
+ * allocated and pinned a page, while the subsequent VCPU attempts
+ * to pin the same page again. As a result, we prompt the unpinning
+ * and release of the allocated structure, followed by a return 0.
+ */
+ if (ret == -EEXIST) {
+ kfree(ppage);
+ unpin_user_pages(&page, 1);
+ ret = 0;
+ }
+ mutex_unlock(&vm->mem_lock);
+
+ return ret;
+}
+
+int gzvm_vm_allocate_guest_page(struct gzvm *vm, struct gzvm_memslot *slot,
+ u64 gfn, u64 *pfn)
+{
+ unsigned long hva;
+
if (gzvm_gfn_to_pfn_memslot(slot, gfn, pfn) != 0)
return -EFAULT;
- return 0;
+
+ if (gzvm_gfn_to_hva_memslot(slot, gfn, (u64 *)&hva) != 0)
+ return -EINVAL;
+
+ return pin_one_page(vm, hva, PFN_PHYS(gfn));
}
static int handle_block_demand_page(struct gzvm *vm, int memslot_id, u64 gfn)
@@ -138,7 +215,7 @@ static int handle_block_demand_page(struct gzvm *vm, int memslot_id, u64 gfn)
mutex_lock(&vm->demand_paging_lock);
for (i = 0, __gfn = start_gfn; i < nr_entries; i++, __gfn++) {
- ret = gzvm_vm_allocate_guest_page(memslot, __gfn, &pfn);
+ ret = gzvm_vm_allocate_guest_page(vm, memslot, __gfn, &pfn);
if (unlikely(ret)) {
ret = -ERR_FAULT;
goto err_unlock;
@@ -164,15 +241,14 @@ static int handle_single_demand_page(struct gzvm *vm, int memslot_id, u64 gfn)
int ret;
u64 pfn;
- ret = gzvm_vm_allocate_guest_page(&vm->memslot[memslot_id], gfn, &pfn);
+ ret = gzvm_vm_allocate_guest_page(vm, &vm->memslot[memslot_id], gfn, &pfn);
if (unlikely(ret))
return -EFAULT;
ret = gzvm_arch_map_guest(vm->vm_id, memslot_id, pfn, gfn, 1);
if (unlikely(ret))
return -EFAULT;
-
- return 0;
+ return ret;
}
/**
@@ -299,6 +299,22 @@ static long gzvm_vm_ioctl(struct file *filp, unsigned int ioctl,
return ret;
}
+/* Invoker of this function is responsible for locking */
+static void gzvm_destroy_all_ppage(struct gzvm *gzvm)
+{
+ struct gzvm_pinned_page *ppage;
+ struct rb_node *node;
+
+ node = rb_first(&gzvm->pinned_pages);
+ while (node) {
+ ppage = rb_entry(node, struct gzvm_pinned_page, node);
+ unpin_user_pages_dirty_lock(&ppage->page, 1, true);
+ node = rb_next(node);
+ rb_erase(&ppage->node, &gzvm->pinned_pages);
+ kfree(ppage);
+ }
+}
+
static void gzvm_destroy_vm(struct gzvm *gzvm)
{
size_t allocated_size;
@@ -322,6 +338,9 @@ static void gzvm_destroy_vm(struct gzvm *gzvm)
mutex_unlock(&gzvm->lock);
+ /* No need to lock here becauese it's single-threaded execution */
+ gzvm_destroy_all_ppage(gzvm);
+
kfree(gzvm);
}
@@ -415,6 +434,8 @@ static struct gzvm *gzvm_create_vm(unsigned long vm_type)
gzvm->vm_id = ret;
gzvm->mm = current->mm;
mutex_init(&gzvm->lock);
+ mutex_init(&gzvm->mem_lock);
+ gzvm->pinned_pages = RB_ROOT;
ret = gzvm_vm_irqfd_init(gzvm);
if (ret) {
@@ -12,6 +12,7 @@
#include <linux/mutex.h>
#include <linux/gzvm.h>
#include <linux/srcu.h>
+#include <linux/rbtree.h>
/*
* For the normal physical address, the highest 12 bits should be zero, so we
@@ -99,6 +100,12 @@ struct gzvm_vcpu {
struct gzvm_vcpu_hwstate *hwstate;
};
+struct gzvm_pinned_page {
+ struct rb_node node;
+ struct page *page;
+ u64 ipa;
+};
+
/**
* struct gzvm: the following data structures are for data transferring between
* driver and hypervisor, and they're aligned with hypervisor definitions.
@@ -119,6 +126,8 @@ struct gzvm_vcpu {
* @demand_page_buffer: the mailbox for transferring large portion pages
* @demand_paging_lock: lock for preventing multiple cpu using the same demand
* page mailbox at the same time
+ * @pinned_pages: use rb-tree to record pin/unpin page
+ * @mem_lock: lock for memory operations
*/
struct gzvm {
struct gzvm_vcpu *vcpus[GZVM_MAX_VCPUS];
@@ -146,6 +155,9 @@ struct gzvm {
u32 demand_page_gran;
u64 *demand_page_buffer;
struct mutex demand_paging_lock;
+
+ struct rb_root pinned_pages;
+ struct mutex mem_lock;
};
long gzvm_dev_ioctl_check_extension(struct gzvm *gzvm, unsigned long args);
@@ -178,7 +190,8 @@ int gzvm_gfn_to_pfn_memslot(struct gzvm_memslot *memslot, u64 gfn, u64 *pfn);
int gzvm_gfn_to_hva_memslot(struct gzvm_memslot *memslot, u64 gfn,
u64 *hva_memslot);
int gzvm_vm_populate_mem_region(struct gzvm *gzvm, int slot_id);
-int gzvm_vm_allocate_guest_page(struct gzvm_memslot *slot, u64 gfn, u64 *pfn);
+int gzvm_vm_allocate_guest_page(struct gzvm *gzvm, struct gzvm_memslot *slot,
+ u64 gfn, u64 *pfn);
int gzvm_vm_ioctl_create_vcpu(struct gzvm *gzvm, u32 cpuid);
int gzvm_arch_vcpu_update_one_reg(struct gzvm_vcpu *vcpu, __u64 reg_id,