diff mbox series

[RFC,05/18] pkernfs: add file mmap callback

Message ID 20240205120203.60312-6-jgowans@amazon.com (mailing list archive)
State New, archived
Headers show
Series Pkernfs: Support persistence for live update | expand

Commit Message

Gowans, James Feb. 5, 2024, 12:01 p.m. UTC
Make the file data useable to userspace by adding mmap. That's all that
QEMU needs for guest RAM, so that's all be bother implementing for now.

When mmaping the file the VMA is marked as PFNMAP to indicate that there
are no struct pages for the memory in this VMA. Remap_pfn_range() is
used to actually populate the page tables. All PTEs are pre-faulted into
the pgtables at mmap time so that the pgtables are useable when this
virtual address range is given to VFIO's MAP_DMA.
---
 fs/pkernfs/file.c    | 42 +++++++++++++++++++++++++++++++++++++++++-
 fs/pkernfs/pkernfs.c |  2 +-
 fs/pkernfs/pkernfs.h |  2 ++
 3 files changed, 44 insertions(+), 2 deletions(-)

Comments

Dave Chinner Feb. 5, 2024, 11:34 p.m. UTC | #1
On Mon, Feb 05, 2024 at 12:01:50PM +0000, James Gowans wrote:
> Make the file data useable to userspace by adding mmap. That's all that
> QEMU needs for guest RAM, so that's all be bother implementing for now.
> 
> When mmaping the file the VMA is marked as PFNMAP to indicate that there
> are no struct pages for the memory in this VMA. Remap_pfn_range() is
> used to actually populate the page tables. All PTEs are pre-faulted into
> the pgtables at mmap time so that the pgtables are useable when this
> virtual address range is given to VFIO's MAP_DMA.

And so what happens when this file is truncated whilst it is mmap()d
by an application? Ain't that just a great big UAF waiting to be
exploited?

-Dave.
diff mbox series

Patch

diff --git a/fs/pkernfs/file.c b/fs/pkernfs/file.c
index 27a637423178..844b6cc63840 100644
--- a/fs/pkernfs/file.c
+++ b/fs/pkernfs/file.c
@@ -1,6 +1,7 @@ 
 // SPDX-License-Identifier: GPL-2.0-only
 
 #include "pkernfs.h"
+#include <linux/mm.h>
 
 static int truncate(struct inode *inode, loff_t newsize)
 {
@@ -42,6 +43,45 @@  static int inode_setattr(struct mnt_idmap *idmap, struct dentry *dentry, struct
 	return 0;
 }
 
+/*
+ * To be able to use PFNMAP VMAs for VFIO DMA mapping we need the page tables
+ * populated with mappings. Pre-fault everything.
+ */
+static int mmap(struct file *filp, struct vm_area_struct *vma)
+{
+	int rc;
+	unsigned long *mappings_block;
+	struct pkernfs_inode *pkernfs_inode;
+
+	pkernfs_inode = pkernfs_get_persisted_inode(filp->f_inode->i_sb, filp->f_inode->i_ino);
+
+	mappings_block = (unsigned long *)pkernfs_addr_for_block(filp->f_inode->i_sb,
+			pkernfs_inode->mappings_block);
+
+	/* Remap-pfn-range will mark the range VM_IO */
+	for (unsigned long vma_addr_offset = vma->vm_start;
+			vma_addr_offset < vma->vm_end;
+			vma_addr_offset += PMD_SIZE) {
+		int block, mapped_block;
+
+		block = (vma_addr_offset - vma->vm_start) / PMD_SIZE;
+		mapped_block = *(mappings_block + block);
+		/*
+		 * It's wrong to use rempa_pfn_range; this will install PTE-level entries.
+		 * The whole point of 2 MiB allocs is to improve TLB perf!
+		 * We should use something like mm/huge_memory.c#insert_pfn_pmd
+		 * but that is currently static.
+		 * TODO: figure out the best way to install PMDs.
+		 */
+		rc = remap_pfn_range(vma,
+				vma_addr_offset,
+				(pkernfs_base >> PAGE_SHIFT) + (mapped_block * 512),
+				PMD_SIZE,
+				vma->vm_page_prot);
+	}
+	return 0;
+}
+
 const struct inode_operations pkernfs_file_inode_operations = {
 	.setattr = inode_setattr,
 	.getattr = simple_getattr,
@@ -49,5 +89,5 @@  const struct inode_operations pkernfs_file_inode_operations = {
 
 const struct file_operations pkernfs_file_fops = {
 	.owner = THIS_MODULE,
-	.iterate_shared = NULL,
+	.mmap = mmap,
 };
diff --git a/fs/pkernfs/pkernfs.c b/fs/pkernfs/pkernfs.c
index 199c2c648bca..f010c2d76c76 100644
--- a/fs/pkernfs/pkernfs.c
+++ b/fs/pkernfs/pkernfs.c
@@ -7,7 +7,7 @@ 
 #include <linux/fs_context.h>
 #include <linux/io.h>
 
-static phys_addr_t pkernfs_base, pkernfs_size;
+phys_addr_t pkernfs_base, pkernfs_size;
 void *pkernfs_mem;
 static const struct super_operations pkernfs_super_ops = { };
 
diff --git a/fs/pkernfs/pkernfs.h b/fs/pkernfs/pkernfs.h
index 8b4fee8c5b2e..1a7aa783a9be 100644
--- a/fs/pkernfs/pkernfs.h
+++ b/fs/pkernfs/pkernfs.h
@@ -6,6 +6,8 @@ 
 #define PKERNFS_FILENAME_LEN 255
 
 extern void *pkernfs_mem;
+/* Units of bytes */
+extern phys_addr_t pkernfs_base, pkernfs_size;
 
 struct pkernfs_sb {
 	unsigned long magic_number;