@@ -103,5 +103,6 @@
#define DEVMEM_MAGIC 0x454d444d /* "DMEM" */
#define SECRETMEM_MAGIC 0x5345434d /* "SECM" */
#define PID_FS_MAGIC 0x50494446 /* "PIDF" */
+#define KVM_GUEST_MEM_MAGIC 0x474d454d /* "GMEM" */
#endif /* __LINUX_MAGIC_H__ */
@@ -4,9 +4,74 @@
#include <linux/kvm_host.h>
#include <linux/pagemap.h>
#include <linux/anon_inodes.h>
+#include <linux/pseudo_fs.h>
#include "kvm_mm.h"
+/* Do all the filesystem crap just for evict_inode... */
+
+static struct vfsmount *kvm_gmem_mnt __read_mostly;
+
+static void gmem_evict_inode(struct inode *inode)
+{
+ kvfree(inode->i_private);
+ truncate_inode_pages_final(&inode->i_data);
+ clear_inode(inode);
+}
+
+static const struct super_operations gmem_super_operations = {
+ .drop_inode = generic_delete_inode,
+ .evict_inode = gmem_evict_inode,
+ .statfs = simple_statfs,
+};
+
+static int gmem_init_fs_context(struct fs_context *fc)
+{
+ struct pseudo_fs_context *ctx = init_pseudo(fc, KVM_GUEST_MEM_MAGIC);
+ if (!ctx)
+ return -ENOMEM;
+
+ ctx->ops = &gmem_super_operations;
+ return 0;
+}
+
+static struct file_system_type kvm_gmem_fs_type = {
+ .name = "kvm_gmemfs",
+ .init_fs_context = gmem_init_fs_context,
+ .kill_sb = kill_anon_super,
+};
+
+static struct file *kvm_gmem_create_file(const char *name, const struct file_operations *fops)
+{
+ struct inode *inode;
+ struct file *file;
+
+ if (fops->owner && !try_module_get(fops->owner))
+ return ERR_PTR(-ENOENT);
+
+ inode = alloc_anon_inode(kvm_gmem_mnt->mnt_sb);
+ if (IS_ERR(inode)) {
+ file = ERR_CAST(inode);
+ goto err;
+ }
+ file = alloc_file_pseudo(inode, kvm_gmem_mnt, name, O_RDWR, fops);
+ if (IS_ERR(file))
+ goto err_iput;
+
+ return file;
+
+err_iput:
+ iput(inode);
+err:
+ module_put(fops->owner);
+ return file;
+}
+
+
+struct kvm_gmem_inode {
+ unsigned long flags;
+};
+
struct kvm_gmem {
struct kvm *kvm;
struct xarray bindings;
@@ -308,9 +373,31 @@ static struct file_operations kvm_gmem_fops = {
.fallocate = kvm_gmem_fallocate,
};
-void kvm_gmem_init(struct module *module)
+int kvm_gmem_init(struct module *module)
{
+ int ret;
+
+ ret = register_filesystem(&kvm_gmem_fs_type);
+ if (ret) {
+ pr_err("kvm-gmem: cannot register file system (%d)\n", ret);
+ return ret;
+ }
+
+ kvm_gmem_mnt = kern_mount(&kvm_gmem_fs_type);
+ if (IS_ERR(kvm_gmem_mnt)) {
+ pr_err("kvm-gmem: kernel mount failed (%ld)\n", PTR_ERR(kvm_gmem_mnt));
+ return PTR_ERR(kvm_gmem_mnt);
+ }
+
kvm_gmem_fops.owner = module;
+
+ return 0;
+}
+
+void kvm_gmem_exit(void)
+{
+ kern_unmount(kvm_gmem_mnt);
+ unregister_filesystem(&kvm_gmem_fs_type);
}
static int kvm_gmem_migrate_folio(struct address_space *mapping,
@@ -394,15 +481,23 @@ static const struct inode_operations kvm_gmem_iops = {
static int __kvm_gmem_create(struct kvm *kvm, loff_t size, u64 flags)
{
- const char *anon_name = "[kvm-gmem]";
+ const char *gmem_name = "[kvm-gmem]";
+ struct kvm_gmem_inode *i_gmem;
struct kvm_gmem *gmem;
struct inode *inode;
struct file *file;
int fd, err;
+ i_gmem = kvzalloc(sizeof(struct kvm_gmem_inode), GFP_KERNEL);
+ if (!i_gmem)
+ return -ENOMEM;
+ i_gmem->flags = flags;
+
fd = get_unused_fd_flags(0);
- if (fd < 0)
- return fd;
+ if (fd < 0) {
+ err = fd;
+ goto err_i_gmem;
+ }
gmem = kzalloc(sizeof(*gmem), GFP_KERNEL);
if (!gmem) {
@@ -410,19 +505,19 @@ static int __kvm_gmem_create(struct kvm *kvm, loff_t size, u64 flags)
goto err_fd;
}
- file = anon_inode_create_getfile(anon_name, &kvm_gmem_fops, gmem,
- O_RDWR, NULL);
+ file = kvm_gmem_create_file(gmem_name, &kvm_gmem_fops);
if (IS_ERR(file)) {
err = PTR_ERR(file);
goto err_gmem;
}
+ inode = file->f_inode;
+
+ file->f_mapping = inode->i_mapping;
+ file->private_data = gmem;
file->f_flags |= O_LARGEFILE;
- inode = file->f_inode;
- WARN_ON(file->f_mapping != inode->i_mapping);
-
- inode->i_private = (void *)(unsigned long)flags;
+ inode->i_private = i_gmem;
inode->i_op = &kvm_gmem_iops;
inode->i_mapping->a_ops = &kvm_gmem_aops;
inode->i_mode |= S_IFREG;
@@ -444,6 +539,8 @@ static int __kvm_gmem_create(struct kvm *kvm, loff_t size, u64 flags)
kfree(gmem);
err_fd:
put_unused_fd(fd);
+err_i_gmem:
+ kvfree(i_gmem);
return err;
}
@@ -6504,7 +6504,9 @@ int kvm_init(unsigned vcpu_size, unsigned vcpu_align, struct module *module)
if (WARN_ON_ONCE(r))
goto err_vfio;
- kvm_gmem_init(module);
+ r = kvm_gmem_init(module);
+ if (r)
+ goto err_gmem;
r = kvm_init_virtualization();
if (r)
@@ -6525,6 +6527,8 @@ int kvm_init(unsigned vcpu_size, unsigned vcpu_align, struct module *module)
err_register:
kvm_uninit_virtualization();
err_virt:
+ kvm_gmem_exit();
+err_gmem:
kvm_vfio_ops_exit();
err_vfio:
kvm_async_pf_deinit();
@@ -6556,6 +6560,7 @@ void kvm_exit(void)
for_each_possible_cpu(cpu)
free_cpumask_var(per_cpu(cpu_kick_mask, cpu));
kmem_cache_destroy(kvm_vcpu_cache);
+ kvm_gmem_exit();
kvm_vfio_ops_exit();
kvm_async_pf_deinit();
kvm_irqfd_exit();
@@ -36,15 +36,17 @@ static inline void gfn_to_pfn_cache_invalidate_start(struct kvm *kvm,
#endif /* HAVE_KVM_PFNCACHE */
#ifdef CONFIG_KVM_PRIVATE_MEM
-void kvm_gmem_init(struct module *module);
+int kvm_gmem_init(struct module *module);
+void kvm_gmem_exit(void);
int kvm_gmem_create(struct kvm *kvm, struct kvm_create_guest_memfd *args);
int kvm_gmem_bind(struct kvm *kvm, struct kvm_memory_slot *slot,
unsigned int fd, loff_t offset);
void kvm_gmem_unbind(struct kvm_memory_slot *slot);
#else
-static inline void kvm_gmem_init(struct module *module)
+static inline void kvm_gmem_exit(void) {}
+static inline int kvm_gmem_init(struct module *module)
{
-
+ return 0;
}
static inline int kvm_gmem_bind(struct kvm *kvm,
In preparation for removing the usage of the uptodate flag, reintroduce the gmem filesystem type. We need it in order to free the private inode information. Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> --- include/uapi/linux/magic.h | 1 + virt/kvm/guest_memfd.c | 117 +++++++++++++++++++++++++++++++++---- virt/kvm/kvm_main.c | 7 ++- virt/kvm/kvm_mm.h | 8 ++- 4 files changed, 119 insertions(+), 14 deletions(-)