Message ID | 20220310140911.50924-3-chao.p.peng@linux.intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | KVM: mm: fd-based approach for supporting KVM guest private memory | expand |
On Thu, Mar 10, 2022, Chao Peng wrote: > diff --git a/mm/Makefile b/mm/Makefile > index 70d4309c9ce3..f628256dce0d 100644 > +void memfile_notifier_invalidate(struct memfile_notifier_list *list, > + pgoff_t start, pgoff_t end) > +{ > + struct memfile_notifier *notifier; > + int id; > + > + id = srcu_read_lock(&srcu); > + list_for_each_entry_srcu(notifier, &list->head, list, > + srcu_read_lock_held(&srcu)) { > + if (notifier->ops && notifier->ops->invalidate) Any reason notifier->ops isn't mandatory? > + notifier->ops->invalidate(notifier, start, end); > + } > + srcu_read_unlock(&srcu, id); > +} > + > +void memfile_notifier_fallocate(struct memfile_notifier_list *list, > + pgoff_t start, pgoff_t end) > +{ > + struct memfile_notifier *notifier; > + int id; > + > + id = srcu_read_lock(&srcu); > + list_for_each_entry_srcu(notifier, &list->head, list, > + srcu_read_lock_held(&srcu)) { > + if (notifier->ops && notifier->ops->fallocate) > + notifier->ops->fallocate(notifier, start, end); > + } > + srcu_read_unlock(&srcu, id); > +} > + > +void memfile_register_backing_store(struct memfile_backing_store *bs) > +{ > + BUG_ON(!bs || !bs->get_notifier_list); > + > + list_add_tail(&bs->list, &backing_store_list); > +} > + > +void memfile_unregister_backing_store(struct memfile_backing_store *bs) > +{ > + list_del(&bs->list); Allowing unregistration of a backing store is broken. Using the _safe() variant is not sufficient to guard against concurrent modification. I don't see any reason to support this out of the gate, the only reason to support unregistering a backing store is if the backing store is implemented as a module, and AFAIK none of the backing stores we plan on supporting initially support being built as a module. These aren't exported, so it's not like that's even possible. Registration would also be broken if modules are allowed, I'm pretty sure module init doesn't run under a global lock. We can always add this complexity if it's needed in the future, but for now the easiest thing would be to tag memfile_register_backing_store() with __init and make backing_store_list __ro_after_init. > +} > + > +static int memfile_get_notifier_info(struct inode *inode, > + struct memfile_notifier_list **list, > + struct memfile_pfn_ops **ops) > +{ > + struct memfile_backing_store *bs, *iter; > + struct memfile_notifier_list *tmp; > + > + list_for_each_entry_safe(bs, iter, &backing_store_list, list) { > + tmp = bs->get_notifier_list(inode); > + if (tmp) { > + *list = tmp; > + if (ops) > + *ops = &bs->pfn_ops; > + return 0; > + } > + } > + return -EOPNOTSUPP; > +} > + > +int memfile_register_notifier(struct inode *inode, Taking an inode is a bit odd from a user perspective. Any reason not to take a "struct file *" and get the inode here? That would give callers a hint that they need to hold a reference to the file for the lifetime of the registration. > + struct memfile_notifier *notifier, > + struct memfile_pfn_ops **pfn_ops) > +{ > + struct memfile_notifier_list *list; > + int ret; > + > + if (!inode || !notifier | !pfn_ops) Bitwise | instead of logical ||. But IMO taking in a pfn_ops pointer is silly. More below. > + return -EINVAL; > + > + ret = memfile_get_notifier_info(inode, &list, pfn_ops); > + if (ret) > + return ret; > + > + spin_lock(&list->lock); > + list_add_rcu(¬ifier->list, &list->head); > + spin_unlock(&list->lock); > + > + return 0; > +} > +EXPORT_SYMBOL_GPL(memfile_register_notifier); > + > +void memfile_unregister_notifier(struct inode *inode, > + struct memfile_notifier *notifier) > +{ > + struct memfile_notifier_list *list; > + > + if (!inode || !notifier) > + return; > + > + BUG_ON(memfile_get_notifier_info(inode, &list, NULL)); Eww. Rather than force the caller to provide the inode/file and the notifier, what about grabbing the backing store itself in the notifier? struct memfile_notifier { struct list_head list; struct memfile_notifier_ops *ops; struct memfile_backing_store *bs; }; That also helps avoid confusing between "ops" and "pfn_ops". IMO, exposing memfile_backing_store to the caller isn't a big deal, and is preferable to having to rewalk multiple lists just to delete a notifier. Then this can become: void memfile_unregister_notifier(struct memfile_notifier *notifier) { spin_lock(¬ifier->bs->list->lock); list_del_rcu(¬ifier->list); spin_unlock(¬ifier->bs->list->lock); synchronize_srcu(&srcu); } and registration can be: int memfile_register_notifier(const struct file *file, struct memfile_notifier *notifier) { struct memfile_notifier_list *list; struct memfile_backing_store *bs; int ret; if (!file || !notifier) return -EINVAL; list_for_each_entry(bs, &backing_store_list, list) { list = bs->get_notifier_list(file_inode(file)); if (list) { notifier->bs = bs; spin_lock(&list->lock); list_add_rcu(¬ifier->list, &list->head); spin_unlock(&list->lock); return 0; } } return -EOPNOTSUPP; }
On Tue, Mar 29, 2022 at 06:45:16PM +0000, Sean Christopherson wrote: > On Thu, Mar 10, 2022, Chao Peng wrote: > > diff --git a/mm/Makefile b/mm/Makefile > > index 70d4309c9ce3..f628256dce0d 100644 > > +void memfile_notifier_invalidate(struct memfile_notifier_list *list, > > + pgoff_t start, pgoff_t end) > > +{ > > + struct memfile_notifier *notifier; > > + int id; > > + > > + id = srcu_read_lock(&srcu); > > + list_for_each_entry_srcu(notifier, &list->head, list, > > + srcu_read_lock_held(&srcu)) { > > + if (notifier->ops && notifier->ops->invalidate) > > Any reason notifier->ops isn't mandatory? Yes it's mandatory, will skip the check here. > > > + notifier->ops->invalidate(notifier, start, end); > > + } > > + srcu_read_unlock(&srcu, id); > > +} > > + > > +void memfile_notifier_fallocate(struct memfile_notifier_list *list, > > + pgoff_t start, pgoff_t end) > > +{ > > + struct memfile_notifier *notifier; > > + int id; > > + > > + id = srcu_read_lock(&srcu); > > + list_for_each_entry_srcu(notifier, &list->head, list, > > + srcu_read_lock_held(&srcu)) { > > + if (notifier->ops && notifier->ops->fallocate) > > + notifier->ops->fallocate(notifier, start, end); > > + } > > + srcu_read_unlock(&srcu, id); > > +} > > + > > +void memfile_register_backing_store(struct memfile_backing_store *bs) > > +{ > > + BUG_ON(!bs || !bs->get_notifier_list); > > + > > + list_add_tail(&bs->list, &backing_store_list); > > +} > > + > > +void memfile_unregister_backing_store(struct memfile_backing_store *bs) > > +{ > > + list_del(&bs->list); > > Allowing unregistration of a backing store is broken. Using the _safe() variant > is not sufficient to guard against concurrent modification. I don't see any reason > to support this out of the gate, the only reason to support unregistering a backing > store is if the backing store is implemented as a module, and AFAIK none of the > backing stores we plan on supporting initially support being built as a module. > These aren't exported, so it's not like that's even possible. Registration would > also be broken if modules are allowed, I'm pretty sure module init doesn't run > under a global lock. > > We can always add this complexity if it's needed in the future, but for now the > easiest thing would be to tag memfile_register_backing_store() with __init and > make backing_store_list __ro_after_init. The only currently supported backing store shmem does not need this so can remove it for now. > > > +} > > + > > +static int memfile_get_notifier_info(struct inode *inode, > > + struct memfile_notifier_list **list, > > + struct memfile_pfn_ops **ops) > > +{ > > + struct memfile_backing_store *bs, *iter; > > + struct memfile_notifier_list *tmp; > > + > > + list_for_each_entry_safe(bs, iter, &backing_store_list, list) { > > + tmp = bs->get_notifier_list(inode); > > + if (tmp) { > > + *list = tmp; > > + if (ops) > > + *ops = &bs->pfn_ops; > > + return 0; > > + } > > + } > > + return -EOPNOTSUPP; > > +} > > + > > +int memfile_register_notifier(struct inode *inode, > > Taking an inode is a bit odd from a user perspective. Any reason not to take a > "struct file *" and get the inode here? That would give callers a hint that they > need to hold a reference to the file for the lifetime of the registration. Yes, I can change. > > > + struct memfile_notifier *notifier, > > + struct memfile_pfn_ops **pfn_ops) > > +{ > > + struct memfile_notifier_list *list; > > + int ret; > > + > > + if (!inode || !notifier | !pfn_ops) > > Bitwise | instead of logical ||. But IMO taking in a pfn_ops pointer is silly. > More below. > > > + return -EINVAL; > > + > > + ret = memfile_get_notifier_info(inode, &list, pfn_ops); > > + if (ret) > > + return ret; > > + > > + spin_lock(&list->lock); > > + list_add_rcu(¬ifier->list, &list->head); > > + spin_unlock(&list->lock); > > + > > + return 0; > > +} > > +EXPORT_SYMBOL_GPL(memfile_register_notifier); > > + > > +void memfile_unregister_notifier(struct inode *inode, > > + struct memfile_notifier *notifier) > > +{ > > + struct memfile_notifier_list *list; > > + > > + if (!inode || !notifier) > > + return; > > + > > + BUG_ON(memfile_get_notifier_info(inode, &list, NULL)); > > Eww. Rather than force the caller to provide the inode/file and the notifier, > what about grabbing the backing store itself in the notifier? > > struct memfile_notifier { > struct list_head list; > struct memfile_notifier_ops *ops; > > struct memfile_backing_store *bs; > }; > > That also helps avoid confusing between "ops" and "pfn_ops". IMO, exposing > memfile_backing_store to the caller isn't a big deal, and is preferable to having > to rewalk multiple lists just to delete a notifier. Agreed, good suggestion. > > Then this can become: > > void memfile_unregister_notifier(struct memfile_notifier *notifier) > { > spin_lock(¬ifier->bs->list->lock); > list_del_rcu(¬ifier->list); > spin_unlock(¬ifier->bs->list->lock); > > synchronize_srcu(&srcu); > } > > and registration can be: > > int memfile_register_notifier(const struct file *file, > struct memfile_notifier *notifier) > { > struct memfile_notifier_list *list; > struct memfile_backing_store *bs; > int ret; > > if (!file || !notifier) > return -EINVAL; > > list_for_each_entry(bs, &backing_store_list, list) { > list = bs->get_notifier_list(file_inode(file)); > if (list) { > notifier->bs = bs; > > spin_lock(&list->lock); > list_add_rcu(¬ifier->list, &list->head); > spin_unlock(&list->lock); > return 0; > } > } > > return -EOPNOTSUPP; > }
diff --git a/include/linux/memfile_notifier.h b/include/linux/memfile_notifier.h new file mode 100644 index 000000000000..e8d400558adb --- /dev/null +++ b/include/linux/memfile_notifier.h @@ -0,0 +1,64 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_MEMFILE_NOTIFIER_H +#define _LINUX_MEMFILE_NOTIFIER_H + +#include <linux/rculist.h> +#include <linux/spinlock.h> +#include <linux/srcu.h> +#include <linux/fs.h> + +struct memfile_notifier; + +struct memfile_notifier_ops { + void (*invalidate)(struct memfile_notifier *notifier, + pgoff_t start, pgoff_t end); + void (*fallocate)(struct memfile_notifier *notifier, + pgoff_t start, pgoff_t end); +}; + +struct memfile_pfn_ops { + long (*get_lock_pfn)(struct inode *inode, pgoff_t offset, int *order); + void (*put_unlock_pfn)(unsigned long pfn); +}; + +struct memfile_notifier { + struct list_head list; + struct memfile_notifier_ops *ops; +}; + +struct memfile_notifier_list { + struct list_head head; + spinlock_t lock; +}; + +struct memfile_backing_store { + struct list_head list; + struct memfile_pfn_ops pfn_ops; + struct memfile_notifier_list* (*get_notifier_list)(struct inode *inode); +}; + +#ifdef CONFIG_MEMFILE_NOTIFIER +/* APIs for backing stores */ +static inline void memfile_notifier_list_init(struct memfile_notifier_list *list) +{ + INIT_LIST_HEAD(&list->head); + spin_lock_init(&list->lock); +} + +extern void memfile_notifier_invalidate(struct memfile_notifier_list *list, + pgoff_t start, pgoff_t end); +extern void memfile_notifier_fallocate(struct memfile_notifier_list *list, + pgoff_t start, pgoff_t end); +extern void memfile_register_backing_store(struct memfile_backing_store *bs); +extern void memfile_unregister_backing_store(struct memfile_backing_store *bs); + +/*APIs for notifier consumers */ +extern int memfile_register_notifier(struct inode *inode, + struct memfile_notifier *notifier, + struct memfile_pfn_ops **pfn_ops); +extern void memfile_unregister_notifier(struct inode *inode, + struct memfile_notifier *notifier); + +#endif /* CONFIG_MEMFILE_NOTIFIER */ + +#endif /* _LINUX_MEMFILE_NOTIFIER_H */ diff --git a/mm/Kconfig b/mm/Kconfig index 3326ee3903f3..7c6b1ad3dade 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -892,6 +892,10 @@ config ANON_VMA_NAME area from being merged with adjacent virtual memory areas due to the difference in their name. +config MEMFILE_NOTIFIER + bool + select SRCU + source "mm/damon/Kconfig" endmenu diff --git a/mm/Makefile b/mm/Makefile index 70d4309c9ce3..f628256dce0d 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -132,3 +132,4 @@ obj-$(CONFIG_PAGE_REPORTING) += page_reporting.o obj-$(CONFIG_IO_MAPPING) += io-mapping.o obj-$(CONFIG_HAVE_BOOTMEM_INFO_NODE) += bootmem_info.o obj-$(CONFIG_GENERIC_IOREMAP) += ioremap.o +obj-$(CONFIG_MEMFILE_NOTIFIER) += memfile_notifier.o diff --git a/mm/memfile_notifier.c b/mm/memfile_notifier.c new file mode 100644 index 000000000000..a405db56fde2 --- /dev/null +++ b/mm/memfile_notifier.c @@ -0,0 +1,114 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * linux/mm/memfile_notifier.c + * + * Copyright (C) 2022 Intel Corporation. + * Chao Peng <chao.p.peng@linux.intel.com> + */ + +#include <linux/memfile_notifier.h> +#include <linux/srcu.h> + +DEFINE_STATIC_SRCU(srcu); +static LIST_HEAD(backing_store_list); + +void memfile_notifier_invalidate(struct memfile_notifier_list *list, + pgoff_t start, pgoff_t end) +{ + struct memfile_notifier *notifier; + int id; + + id = srcu_read_lock(&srcu); + list_for_each_entry_srcu(notifier, &list->head, list, + srcu_read_lock_held(&srcu)) { + if (notifier->ops && notifier->ops->invalidate) + notifier->ops->invalidate(notifier, start, end); + } + srcu_read_unlock(&srcu, id); +} + +void memfile_notifier_fallocate(struct memfile_notifier_list *list, + pgoff_t start, pgoff_t end) +{ + struct memfile_notifier *notifier; + int id; + + id = srcu_read_lock(&srcu); + list_for_each_entry_srcu(notifier, &list->head, list, + srcu_read_lock_held(&srcu)) { + if (notifier->ops && notifier->ops->fallocate) + notifier->ops->fallocate(notifier, start, end); + } + srcu_read_unlock(&srcu, id); +} + +void memfile_register_backing_store(struct memfile_backing_store *bs) +{ + BUG_ON(!bs || !bs->get_notifier_list); + + list_add_tail(&bs->list, &backing_store_list); +} + +void memfile_unregister_backing_store(struct memfile_backing_store *bs) +{ + list_del(&bs->list); +} + +static int memfile_get_notifier_info(struct inode *inode, + struct memfile_notifier_list **list, + struct memfile_pfn_ops **ops) +{ + struct memfile_backing_store *bs, *iter; + struct memfile_notifier_list *tmp; + + list_for_each_entry_safe(bs, iter, &backing_store_list, list) { + tmp = bs->get_notifier_list(inode); + if (tmp) { + *list = tmp; + if (ops) + *ops = &bs->pfn_ops; + return 0; + } + } + return -EOPNOTSUPP; +} + +int memfile_register_notifier(struct inode *inode, + struct memfile_notifier *notifier, + struct memfile_pfn_ops **pfn_ops) +{ + struct memfile_notifier_list *list; + int ret; + + if (!inode || !notifier | !pfn_ops) + return -EINVAL; + + ret = memfile_get_notifier_info(inode, &list, pfn_ops); + if (ret) + return ret; + + spin_lock(&list->lock); + list_add_rcu(¬ifier->list, &list->head); + spin_unlock(&list->lock); + + return 0; +} +EXPORT_SYMBOL_GPL(memfile_register_notifier); + +void memfile_unregister_notifier(struct inode *inode, + struct memfile_notifier *notifier) +{ + struct memfile_notifier_list *list; + + if (!inode || !notifier) + return; + + BUG_ON(memfile_get_notifier_info(inode, &list, NULL)); + + spin_lock(&list->lock); + list_del_rcu(¬ifier->list); + spin_unlock(&list->lock); + + synchronize_srcu(&srcu); +} +EXPORT_SYMBOL_GPL(memfile_unregister_notifier);