@@ -13,4 +13,26 @@ static inline long memfd_fcntl(struct file *f, unsigned int c, unsigned long a)
}
#endif
+#ifdef CONFIG_MEMFD_OPS
+struct memfd_falloc_notifier {
+ void (*invalidate_page_range)(struct inode *inode, void *owner,
+ pgoff_t start, pgoff_t end);
+ void (*fallocate)(struct inode *inode, void *owner,
+ pgoff_t start, pgoff_t end);
+ bool (*get_owner)(void *owner);
+ void (*put_owner)(void *owner);
+};
+
+struct memfd_pfn_ops {
+ long (*get_lock_pfn)(struct inode *inode, pgoff_t offset, int *order);
+ void (*put_unlock_pfn)(unsigned long pfn);
+
+};
+
+extern int memfd_register_falloc_notifier(struct inode *inode, void *owner,
+ const struct memfd_falloc_notifier *notifier,
+ const struct memfd_pfn_ops **pfn_ops);
+extern void memfd_unregister_falloc_notifier(struct inode *inode);
+#endif
+
#endif /* __LINUX_MEMFD_H */
@@ -12,6 +12,11 @@
/* inode in-kernel data */
+#ifdef CONFIG_MEMFD_OPS
+struct memfd_falloc_notifier;
+struct memfd_pfn_ops;
+#endif
+
struct shmem_inode_info {
spinlock_t lock;
unsigned int seals; /* shmem seals */
@@ -24,6 +29,10 @@ struct shmem_inode_info {
struct shared_policy policy; /* NUMA memory alloc policy */
struct simple_xattrs xattrs; /* list of xattrs */
atomic_t stop_eviction; /* hold when working on inode */
+#ifdef CONFIG_MEMFD_OPS
+ void *owner;
+ const struct memfd_falloc_notifier *falloc_notifier;
+#endif
struct inode vfs_inode;
};
@@ -96,6 +105,13 @@ extern unsigned long shmem_swap_usage(struct vm_area_struct *vma);
extern unsigned long shmem_partial_swap_usage(struct address_space *mapping,
pgoff_t start, pgoff_t end);
+#ifdef CONFIG_MEMFD_OPS
+extern int shmem_register_falloc_notifier(struct inode *inode, void *owner,
+ const struct memfd_falloc_notifier *notifier,
+ const struct memfd_pfn_ops **pfn_ops);
+extern void shmem_unregister_falloc_notifier(struct inode *inode);
+#endif
+
/* Flag allocation requirements to shmem_getpage */
enum sgp_type {
SGP_READ, /* don't exceed i_size, don't allocate page */
@@ -900,6 +900,10 @@ config IO_MAPPING
config SECRETMEM
def_bool ARCH_HAS_SET_DIRECT_MAP && !EMBEDDED
+config MEMFD_OPS
+ bool
+ depends on MEMFD_CREATE
+
source "mm/damon/Kconfig"
endmenu
@@ -130,6 +130,27 @@ static unsigned int *memfd_file_seals_ptr(struct file *file)
return NULL;
}
+#ifdef CONFIG_MEMFD_OPS
+int memfd_register_falloc_notifier(struct inode *inode, void *owner,
+ const struct memfd_falloc_notifier *notifier,
+ const struct memfd_pfn_ops **pfn_ops)
+{
+ if (shmem_mapping(inode->i_mapping))
+ return shmem_register_falloc_notifier(inode, owner,
+ notifier, pfn_ops);
+
+ return -EINVAL;
+}
+EXPORT_SYMBOL_GPL(memfd_register_falloc_notifier);
+
+void memfd_unregister_falloc_notifier(struct inode *inode)
+{
+ if (shmem_mapping(inode->i_mapping))
+ shmem_unregister_falloc_notifier(inode);
+}
+EXPORT_SYMBOL_GPL(memfd_unregister_falloc_notifier);
+#endif
+
#define F_ALL_SEALS (F_SEAL_SEAL | \
F_SEAL_SHRINK | \
F_SEAL_GROW | \
@@ -78,6 +78,7 @@ static struct vfsmount *shm_mnt;
#include <linux/userfaultfd_k.h>
#include <linux/rmap.h>
#include <linux/uuid.h>
+#include <linux/memfd.h>
#include <linux/uaccess.h>
@@ -906,6 +907,68 @@ static bool shmem_punch_compound(struct page *page, pgoff_t start, pgoff_t end)
return split_huge_page(page) >= 0;
}
+static void notify_fallocate(struct inode *inode, pgoff_t start, pgoff_t end)
+{
+#ifdef CONFIG_MEMFD_OPS
+ struct shmem_inode_info *info = SHMEM_I(inode);
+ const struct memfd_falloc_notifier *notifier;
+ void *owner;
+ bool ret;
+
+ if (!info->falloc_notifier)
+ return;
+
+ spin_lock(&info->lock);
+ notifier = info->falloc_notifier;
+ if (!notifier) {
+ spin_unlock(&info->lock);
+ return;
+ }
+
+ owner = info->owner;
+ ret = notifier->get_owner(owner);
+ spin_unlock(&info->lock);
+ if (!ret)
+ return;
+
+ notifier->fallocate(inode, owner, start, end);
+ notifier->put_owner(owner);
+#endif
+}
+
+static void notify_invalidate_page(struct inode *inode, struct page *page,
+ pgoff_t start, pgoff_t end)
+{
+#ifdef CONFIG_MEMFD_OPS
+ struct shmem_inode_info *info = SHMEM_I(inode);
+ const struct memfd_falloc_notifier *notifier;
+ void *owner;
+ bool ret;
+
+ if (!info->falloc_notifier)
+ return;
+
+ spin_lock(&info->lock);
+ notifier = info->falloc_notifier;
+ if (!notifier) {
+ spin_unlock(&info->lock);
+ return;
+ }
+
+ owner = info->owner;
+ ret = notifier->get_owner(owner);
+ spin_unlock(&info->lock);
+ if (!ret)
+ return;
+
+ start = max(start, page->index);
+ end = min(end, page->index + thp_nr_pages(page));
+
+ notifier->invalidate_page_range(inode, owner, start, end);
+ notifier->put_owner(owner);
+#endif
+}
+
/*
* Remove range of pages and swap entries from page cache, and free them.
* If !unfalloc, truncate or punch hole; if unfalloc, undo failed fallocate.
@@ -949,6 +1012,8 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend,
}
index += thp_nr_pages(page) - 1;
+ notify_invalidate_page(inode, page, start, end);
+
if (!unfalloc || !PageUptodate(page))
truncate_inode_page(mapping, page);
unlock_page(page);
@@ -1025,6 +1090,9 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend,
index--;
break;
}
+
+ notify_invalidate_page(inode, page, start, end);
+
VM_BUG_ON_PAGE(PageWriteback(page), page);
if (shmem_punch_compound(page, start, end))
truncate_inode_page(mapping, page);
@@ -2815,6 +2883,7 @@ static long shmem_fallocate(struct file *file, int mode, loff_t offset,
if (!(mode & FALLOC_FL_KEEP_SIZE) && offset + len > inode->i_size)
i_size_write(inode, offset + len);
inode->i_ctime = current_time(inode);
+ notify_fallocate(inode, start, end);
undone:
spin_lock(&inode->i_lock);
inode->i_private = NULL;
@@ -3784,6 +3853,20 @@ static void shmem_destroy_inodecache(void)
kmem_cache_destroy(shmem_inode_cachep);
}
+#ifdef CONFIG_MIGRATION
+int shmem_migrate_page(struct address_space *mapping, struct page *newpage,
+ struct page *page, enum migrate_mode mode)
+{
+#ifdef CONFIG_MEMFD_OPS
+ struct inode *inode = mapping->host;
+
+ if (SHMEM_I(inode)->owner)
+ return -EOPNOTSUPP;
+#endif
+ return migrate_page(mapping, newpage, page, mode);
+}
+#endif
+
const struct address_space_operations shmem_aops = {
.writepage = shmem_writepage,
.set_page_dirty = __set_page_dirty_no_writeback,
@@ -3798,6 +3881,81 @@ const struct address_space_operations shmem_aops = {
};
EXPORT_SYMBOL(shmem_aops);
+#ifdef CONFIG_MEMFD_OPS
+static long shmem_get_lock_pfn(struct inode *inode, pgoff_t offset, int *order)
+{
+ struct page *page;
+ int ret;
+
+ ret = shmem_getpage(inode, offset, &page, SGP_NOALLOC);
+ if (ret)
+ return ret;
+
+ *order = thp_order(compound_head(page));
+
+ return page_to_pfn(page);
+}
+
+static void shmem_put_unlock_pfn(unsigned long pfn)
+{
+ struct page *page = pfn_to_page(pfn);
+
+ VM_BUG_ON_PAGE(!PageLocked(page), page);
+
+ set_page_dirty(page);
+ unlock_page(page);
+ put_page(page);
+}
+
+static const struct memfd_pfn_ops shmem_pfn_ops = {
+ .get_lock_pfn = shmem_get_lock_pfn,
+ .put_unlock_pfn = shmem_put_unlock_pfn,
+};
+
+int shmem_register_falloc_notifier(struct inode *inode, void *owner,
+ const struct memfd_falloc_notifier *notifier,
+ const struct memfd_pfn_ops **pfn_ops)
+{
+ gfp_t gfp;
+ struct shmem_inode_info *info = SHMEM_I(inode);
+
+ if (!inode || !owner || !notifier || !pfn_ops ||
+ !notifier->invalidate_page_range ||
+ !notifier->fallocate ||
+ !notifier->get_owner ||
+ !notifier->put_owner)
+ return -EINVAL;
+
+ spin_lock(&info->lock);
+ if (info->owner && info->owner != owner) {
+ spin_unlock(&info->lock);
+ return -EPERM;
+ }
+
+ info->owner = owner;
+ info->falloc_notifier = notifier;
+ spin_unlock(&info->lock);
+
+ gfp = mapping_gfp_mask(inode->i_mapping);
+ gfp &= ~__GFP_MOVABLE;
+ mapping_set_gfp_mask(inode->i_mapping, gfp);
+ mapping_set_unevictable(inode->i_mapping);
+
+ *pfn_ops = &shmem_pfn_ops;
+ return 0;
+}
+
+void shmem_unregister_falloc_notifier(struct inode *inode)
+{
+ struct shmem_inode_info *info = SHMEM_I(inode);
+
+ spin_lock(&info->lock);
+ info->owner = NULL;
+ info->falloc_notifier = NULL;
+ spin_unlock(&info->lock);
+}
+#endif
+
static const struct file_operations shmem_file_operations = {
.mmap = shmem_mmap,
.get_unmapped_area = shmem_get_unmapped_area,