Message ID | 20231017232152.2605440-2-nphamcs@gmail.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | workload-specific and memory pressure-driven zswap writeback | expand |
On Tue, Oct 17, 2023 at 4:21 PM Nhat Pham <nphamcs@gmail.com> wrote: > > The interface of list_lru is based on the assumption that objects are > allocated on the correct node/memcg, with this change it is introduced the > possibility to explicitly specify numa node and memcgroup when adding and > removing objects. This is so that users of list_lru can track node/memcg > of the items outside of the list_lru, like in zswap, where the allocations > can be made by kswapd for data that's charged to a different cgroup. > > Signed-off-by: Nhat Pham <nphamcs@gmail.com> I prefer what Johannes suggested, making list_lru_add() and friends take in the memcg and nid, and add list_lru_add_obj() (or similar) and friends that assume the object is on the right node and memcg. This is clearer and more explicit imo. I am not very familiar with list_lrus though, so I'll leave this to folks who actually are. > --- > include/linux/list_lru.h | 38 +++++++++++++++++++++++++++++++++++ > mm/list_lru.c | 43 +++++++++++++++++++++++++++++++++++----- > 2 files changed, 76 insertions(+), 5 deletions(-) > > diff --git a/include/linux/list_lru.h b/include/linux/list_lru.h > index b35968ee9fb5..0f5f39cacbbb 100644 > --- a/include/linux/list_lru.h > +++ b/include/linux/list_lru.h > @@ -89,6 +89,24 @@ void memcg_reparent_list_lrus(struct mem_cgroup *memcg, struct mem_cgroup *paren > */ > bool list_lru_add(struct list_lru *lru, struct list_head *item); > > +/** > + * __list_lru_add: add an element to a specific sublist. > + * @list_lru: the lru pointer > + * @item: the item to be added. > + * @memcg: the cgroup of the sublist to add the item to. > + * @nid: the node id of the sublist to add the item to. > + * > + * This function is similar to list_lru_add(), but it allows the caller to > + * specify the sublist to which the item should be added. This can be useful > + * when the list_head node is not necessarily in the same cgroup and NUMA node > + * as the data it represents, such as zswap, where the list_head node could be > + * from kswapd and the data from a different cgroup altogether. > + * > + * Return value: true if the list was updated, false otherwise > + */ > +bool __list_lru_add(struct list_lru *lru, struct list_head *item, int nid, > + struct mem_cgroup *memcg); > + > /** > * list_lru_del: delete an element to the lru list > * @list_lru: the lru pointer > @@ -102,6 +120,18 @@ bool list_lru_add(struct list_lru *lru, struct list_head *item); > */ > bool list_lru_del(struct list_lru *lru, struct list_head *item); > > +/** > + * __list_lru_del: delete an element from a specific sublist. > + * @list_lru: the lru pointer > + * @item: the item to be deleted. > + * @memcg: the cgroup of the sublist to delete the item from. > + * @nid: the node id of the sublist to delete the item from. > + * > + * Return value: true if the list was updated, false otherwise. > + */ > +bool __list_lru_del(struct list_lru *lru, struct list_head *item, int nid, > + struct mem_cgroup *memcg); > + > /** > * list_lru_count_one: return the number of objects currently held by @lru > * @lru: the lru pointer. > @@ -136,6 +166,14 @@ static inline unsigned long list_lru_count(struct list_lru *lru) > void list_lru_isolate(struct list_lru_one *list, struct list_head *item); > void list_lru_isolate_move(struct list_lru_one *list, struct list_head *item, > struct list_head *head); > +/* > + * list_lru_putback: undo list_lru_isolate. > + * > + * Since we might have dropped the LRU lock in between, recompute list_lru_one > + * from the node's id and memcg. > + */ > +void list_lru_putback(struct list_lru *lru, struct list_head *item, int nid, > + struct mem_cgroup *memcg); > > typedef enum lru_status (*list_lru_walk_cb)(struct list_head *item, > struct list_lru_one *list, spinlock_t *lock, void *cb_arg); > diff --git a/mm/list_lru.c b/mm/list_lru.c > index a05e5bef3b40..63b75163c6ad 100644 > --- a/mm/list_lru.c > +++ b/mm/list_lru.c > @@ -119,13 +119,22 @@ list_lru_from_kmem(struct list_lru *lru, int nid, void *ptr, > bool list_lru_add(struct list_lru *lru, struct list_head *item) > { > int nid = page_to_nid(virt_to_page(item)); > + struct mem_cgroup *memcg = list_lru_memcg_aware(lru) ? > + mem_cgroup_from_slab_obj(item) : NULL; > + > + return __list_lru_add(lru, item, nid, memcg); > +} > +EXPORT_SYMBOL_GPL(list_lru_add); > + > +bool __list_lru_add(struct list_lru *lru, struct list_head *item, int nid, > + struct mem_cgroup *memcg) > +{ > struct list_lru_node *nlru = &lru->node[nid]; > - struct mem_cgroup *memcg; > struct list_lru_one *l; > > spin_lock(&nlru->lock); > if (list_empty(item)) { > - l = list_lru_from_kmem(lru, nid, item, &memcg); > + l = list_lru_from_memcg_idx(lru, nid, memcg_kmem_id(memcg)); > list_add_tail(item, &l->list); > /* Set shrinker bit if the first element was added */ > if (!l->nr_items++) > @@ -138,17 +147,27 @@ bool list_lru_add(struct list_lru *lru, struct list_head *item) > spin_unlock(&nlru->lock); > return false; > } > -EXPORT_SYMBOL_GPL(list_lru_add); > +EXPORT_SYMBOL_GPL(__list_lru_add); > > bool list_lru_del(struct list_lru *lru, struct list_head *item) > { > int nid = page_to_nid(virt_to_page(item)); > + struct mem_cgroup *memcg = list_lru_memcg_aware(lru) ? > + mem_cgroup_from_slab_obj(item) : NULL; > + > + return __list_lru_del(lru, item, nid, memcg); > +} > +EXPORT_SYMBOL_GPL(list_lru_del); > + > +bool __list_lru_del(struct list_lru *lru, struct list_head *item, int nid, > + struct mem_cgroup *memcg) > +{ > struct list_lru_node *nlru = &lru->node[nid]; > struct list_lru_one *l; > > spin_lock(&nlru->lock); > if (!list_empty(item)) { > - l = list_lru_from_kmem(lru, nid, item, NULL); > + l = list_lru_from_memcg_idx(lru, nid, memcg_kmem_id(memcg)); > list_del_init(item); > l->nr_items--; > nlru->nr_items--; > @@ -158,7 +177,7 @@ bool list_lru_del(struct list_lru *lru, struct list_head *item) > spin_unlock(&nlru->lock); > return false; > } > -EXPORT_SYMBOL_GPL(list_lru_del); > +EXPORT_SYMBOL_GPL(__list_lru_del); > > void list_lru_isolate(struct list_lru_one *list, struct list_head *item) > { > @@ -175,6 +194,20 @@ void list_lru_isolate_move(struct list_lru_one *list, struct list_head *item, > } > EXPORT_SYMBOL_GPL(list_lru_isolate_move); > > +void list_lru_putback(struct list_lru *lru, struct list_head *item, int nid, > + struct mem_cgroup *memcg) > +{ > + struct list_lru_one *list = > + list_lru_from_memcg_idx(lru, nid, memcg_kmem_id(memcg)); > + > + if (list_empty(item)) { > + list_add_tail(item, &list->list); > + if (!list->nr_items++) > + set_shrinker_bit(memcg, nid, lru_shrinker_id(lru)); > + } > +} > +EXPORT_SYMBOL_GPL(list_lru_putback); > + > unsigned long list_lru_count_one(struct list_lru *lru, > int nid, struct mem_cgroup *memcg) > { > -- > 2.34.1
On Wed, Oct 18, 2023 at 3:27 PM Yosry Ahmed <yosryahmed@google.com> wrote: > > On Tue, Oct 17, 2023 at 4:21 PM Nhat Pham <nphamcs@gmail.com> wrote: > > > > The interface of list_lru is based on the assumption that objects are > > allocated on the correct node/memcg, with this change it is introduced the > > possibility to explicitly specify numa node and memcgroup when adding and > > removing objects. This is so that users of list_lru can track node/memcg > > of the items outside of the list_lru, like in zswap, where the allocations > > can be made by kswapd for data that's charged to a different cgroup. > > > > Signed-off-by: Nhat Pham <nphamcs@gmail.com> > > I prefer what Johannes suggested, making list_lru_add() and friends > take in the memcg and nid, and add list_lru_add_obj() (or similar) and > friends that assume the object is on the right node and memcg. This is > clearer and more explicit imo. I am not very familiar with list_lrus > though, so I'll leave this to folks who actually are. Yeah the original naming is... most unfortunate, to say the least :) I create a new function to avoid renaming list_lru_add's usage everywhere, but if the consensus is that everyone prefers list_lru_add() to be the one taking memcg + nid (and the original renamed to list_lru_add_obj()), I can go around fixing all of it :) Seems like a separate endeavour though. > > > --- > > include/linux/list_lru.h | 38 +++++++++++++++++++++++++++++++++++ > > mm/list_lru.c | 43 +++++++++++++++++++++++++++++++++++----- > > 2 files changed, 76 insertions(+), 5 deletions(-) > > > > diff --git a/include/linux/list_lru.h b/include/linux/list_lru.h > > index b35968ee9fb5..0f5f39cacbbb 100644 > > --- a/include/linux/list_lru.h > > +++ b/include/linux/list_lru.h > > @@ -89,6 +89,24 @@ void memcg_reparent_list_lrus(struct mem_cgroup *memcg, struct mem_cgroup *paren > > */ > > bool list_lru_add(struct list_lru *lru, struct list_head *item); > > > > +/** > > + * __list_lru_add: add an element to a specific sublist. > > + * @list_lru: the lru pointer > > + * @item: the item to be added. > > + * @memcg: the cgroup of the sublist to add the item to. > > + * @nid: the node id of the sublist to add the item to. > > + * > > + * This function is similar to list_lru_add(), but it allows the caller to > > + * specify the sublist to which the item should be added. This can be useful > > + * when the list_head node is not necessarily in the same cgroup and NUMA node > > + * as the data it represents, such as zswap, where the list_head node could be > > + * from kswapd and the data from a different cgroup altogether. > > + * > > + * Return value: true if the list was updated, false otherwise > > + */ > > +bool __list_lru_add(struct list_lru *lru, struct list_head *item, int nid, > > + struct mem_cgroup *memcg); > > + > > /** > > * list_lru_del: delete an element to the lru list > > * @list_lru: the lru pointer > > @@ -102,6 +120,18 @@ bool list_lru_add(struct list_lru *lru, struct list_head *item); > > */ > > bool list_lru_del(struct list_lru *lru, struct list_head *item); > > > > +/** > > + * __list_lru_del: delete an element from a specific sublist. > > + * @list_lru: the lru pointer > > + * @item: the item to be deleted. > > + * @memcg: the cgroup of the sublist to delete the item from. > > + * @nid: the node id of the sublist to delete the item from. > > + * > > + * Return value: true if the list was updated, false otherwise. > > + */ > > +bool __list_lru_del(struct list_lru *lru, struct list_head *item, int nid, > > + struct mem_cgroup *memcg); > > + > > /** > > * list_lru_count_one: return the number of objects currently held by @lru > > * @lru: the lru pointer. > > @@ -136,6 +166,14 @@ static inline unsigned long list_lru_count(struct list_lru *lru) > > void list_lru_isolate(struct list_lru_one *list, struct list_head *item); > > void list_lru_isolate_move(struct list_lru_one *list, struct list_head *item, > > struct list_head *head); > > +/* > > + * list_lru_putback: undo list_lru_isolate. > > + * > > + * Since we might have dropped the LRU lock in between, recompute list_lru_one > > + * from the node's id and memcg. > > + */ > > +void list_lru_putback(struct list_lru *lru, struct list_head *item, int nid, > > + struct mem_cgroup *memcg); > > > > typedef enum lru_status (*list_lru_walk_cb)(struct list_head *item, > > struct list_lru_one *list, spinlock_t *lock, void *cb_arg); > > diff --git a/mm/list_lru.c b/mm/list_lru.c > > index a05e5bef3b40..63b75163c6ad 100644 > > --- a/mm/list_lru.c > > +++ b/mm/list_lru.c > > @@ -119,13 +119,22 @@ list_lru_from_kmem(struct list_lru *lru, int nid, void *ptr, > > bool list_lru_add(struct list_lru *lru, struct list_head *item) > > { > > int nid = page_to_nid(virt_to_page(item)); > > + struct mem_cgroup *memcg = list_lru_memcg_aware(lru) ? > > + mem_cgroup_from_slab_obj(item) : NULL; > > + > > + return __list_lru_add(lru, item, nid, memcg); > > +} > > +EXPORT_SYMBOL_GPL(list_lru_add); > > + > > +bool __list_lru_add(struct list_lru *lru, struct list_head *item, int nid, > > + struct mem_cgroup *memcg) > > +{ > > struct list_lru_node *nlru = &lru->node[nid]; > > - struct mem_cgroup *memcg; > > struct list_lru_one *l; > > > > spin_lock(&nlru->lock); > > if (list_empty(item)) { > > - l = list_lru_from_kmem(lru, nid, item, &memcg); > > + l = list_lru_from_memcg_idx(lru, nid, memcg_kmem_id(memcg)); > > list_add_tail(item, &l->list); > > /* Set shrinker bit if the first element was added */ > > if (!l->nr_items++) > > @@ -138,17 +147,27 @@ bool list_lru_add(struct list_lru *lru, struct list_head *item) > > spin_unlock(&nlru->lock); > > return false; > > } > > -EXPORT_SYMBOL_GPL(list_lru_add); > > +EXPORT_SYMBOL_GPL(__list_lru_add); > > > > bool list_lru_del(struct list_lru *lru, struct list_head *item) > > { > > int nid = page_to_nid(virt_to_page(item)); > > + struct mem_cgroup *memcg = list_lru_memcg_aware(lru) ? > > + mem_cgroup_from_slab_obj(item) : NULL; > > + > > + return __list_lru_del(lru, item, nid, memcg); > > +} > > +EXPORT_SYMBOL_GPL(list_lru_del); > > + > > +bool __list_lru_del(struct list_lru *lru, struct list_head *item, int nid, > > + struct mem_cgroup *memcg) > > +{ > > struct list_lru_node *nlru = &lru->node[nid]; > > struct list_lru_one *l; > > > > spin_lock(&nlru->lock); > > if (!list_empty(item)) { > > - l = list_lru_from_kmem(lru, nid, item, NULL); > > + l = list_lru_from_memcg_idx(lru, nid, memcg_kmem_id(memcg)); > > list_del_init(item); > > l->nr_items--; > > nlru->nr_items--; > > @@ -158,7 +177,7 @@ bool list_lru_del(struct list_lru *lru, struct list_head *item) > > spin_unlock(&nlru->lock); > > return false; > > } > > -EXPORT_SYMBOL_GPL(list_lru_del); > > +EXPORT_SYMBOL_GPL(__list_lru_del); > > > > void list_lru_isolate(struct list_lru_one *list, struct list_head *item) > > { > > @@ -175,6 +194,20 @@ void list_lru_isolate_move(struct list_lru_one *list, struct list_head *item, > > } > > EXPORT_SYMBOL_GPL(list_lru_isolate_move); > > > > +void list_lru_putback(struct list_lru *lru, struct list_head *item, int nid, > > + struct mem_cgroup *memcg) > > +{ > > + struct list_lru_one *list = > > + list_lru_from_memcg_idx(lru, nid, memcg_kmem_id(memcg)); > > + > > + if (list_empty(item)) { > > + list_add_tail(item, &list->list); > > + if (!list->nr_items++) > > + set_shrinker_bit(memcg, nid, lru_shrinker_id(lru)); > > + } > > +} > > +EXPORT_SYMBOL_GPL(list_lru_putback); > > + > > unsigned long list_lru_count_one(struct list_lru *lru, > > int nid, struct mem_cgroup *memcg) > > { > > -- > > 2.34.1
diff --git a/include/linux/list_lru.h b/include/linux/list_lru.h index b35968ee9fb5..0f5f39cacbbb 100644 --- a/include/linux/list_lru.h +++ b/include/linux/list_lru.h @@ -89,6 +89,24 @@ void memcg_reparent_list_lrus(struct mem_cgroup *memcg, struct mem_cgroup *paren */ bool list_lru_add(struct list_lru *lru, struct list_head *item); +/** + * __list_lru_add: add an element to a specific sublist. + * @list_lru: the lru pointer + * @item: the item to be added. + * @memcg: the cgroup of the sublist to add the item to. + * @nid: the node id of the sublist to add the item to. + * + * This function is similar to list_lru_add(), but it allows the caller to + * specify the sublist to which the item should be added. This can be useful + * when the list_head node is not necessarily in the same cgroup and NUMA node + * as the data it represents, such as zswap, where the list_head node could be + * from kswapd and the data from a different cgroup altogether. + * + * Return value: true if the list was updated, false otherwise + */ +bool __list_lru_add(struct list_lru *lru, struct list_head *item, int nid, + struct mem_cgroup *memcg); + /** * list_lru_del: delete an element to the lru list * @list_lru: the lru pointer @@ -102,6 +120,18 @@ bool list_lru_add(struct list_lru *lru, struct list_head *item); */ bool list_lru_del(struct list_lru *lru, struct list_head *item); +/** + * __list_lru_del: delete an element from a specific sublist. + * @list_lru: the lru pointer + * @item: the item to be deleted. + * @memcg: the cgroup of the sublist to delete the item from. + * @nid: the node id of the sublist to delete the item from. + * + * Return value: true if the list was updated, false otherwise. + */ +bool __list_lru_del(struct list_lru *lru, struct list_head *item, int nid, + struct mem_cgroup *memcg); + /** * list_lru_count_one: return the number of objects currently held by @lru * @lru: the lru pointer. @@ -136,6 +166,14 @@ static inline unsigned long list_lru_count(struct list_lru *lru) void list_lru_isolate(struct list_lru_one *list, struct list_head *item); void list_lru_isolate_move(struct list_lru_one *list, struct list_head *item, struct list_head *head); +/* + * list_lru_putback: undo list_lru_isolate. + * + * Since we might have dropped the LRU lock in between, recompute list_lru_one + * from the node's id and memcg. + */ +void list_lru_putback(struct list_lru *lru, struct list_head *item, int nid, + struct mem_cgroup *memcg); typedef enum lru_status (*list_lru_walk_cb)(struct list_head *item, struct list_lru_one *list, spinlock_t *lock, void *cb_arg); diff --git a/mm/list_lru.c b/mm/list_lru.c index a05e5bef3b40..63b75163c6ad 100644 --- a/mm/list_lru.c +++ b/mm/list_lru.c @@ -119,13 +119,22 @@ list_lru_from_kmem(struct list_lru *lru, int nid, void *ptr, bool list_lru_add(struct list_lru *lru, struct list_head *item) { int nid = page_to_nid(virt_to_page(item)); + struct mem_cgroup *memcg = list_lru_memcg_aware(lru) ? + mem_cgroup_from_slab_obj(item) : NULL; + + return __list_lru_add(lru, item, nid, memcg); +} +EXPORT_SYMBOL_GPL(list_lru_add); + +bool __list_lru_add(struct list_lru *lru, struct list_head *item, int nid, + struct mem_cgroup *memcg) +{ struct list_lru_node *nlru = &lru->node[nid]; - struct mem_cgroup *memcg; struct list_lru_one *l; spin_lock(&nlru->lock); if (list_empty(item)) { - l = list_lru_from_kmem(lru, nid, item, &memcg); + l = list_lru_from_memcg_idx(lru, nid, memcg_kmem_id(memcg)); list_add_tail(item, &l->list); /* Set shrinker bit if the first element was added */ if (!l->nr_items++) @@ -138,17 +147,27 @@ bool list_lru_add(struct list_lru *lru, struct list_head *item) spin_unlock(&nlru->lock); return false; } -EXPORT_SYMBOL_GPL(list_lru_add); +EXPORT_SYMBOL_GPL(__list_lru_add); bool list_lru_del(struct list_lru *lru, struct list_head *item) { int nid = page_to_nid(virt_to_page(item)); + struct mem_cgroup *memcg = list_lru_memcg_aware(lru) ? + mem_cgroup_from_slab_obj(item) : NULL; + + return __list_lru_del(lru, item, nid, memcg); +} +EXPORT_SYMBOL_GPL(list_lru_del); + +bool __list_lru_del(struct list_lru *lru, struct list_head *item, int nid, + struct mem_cgroup *memcg) +{ struct list_lru_node *nlru = &lru->node[nid]; struct list_lru_one *l; spin_lock(&nlru->lock); if (!list_empty(item)) { - l = list_lru_from_kmem(lru, nid, item, NULL); + l = list_lru_from_memcg_idx(lru, nid, memcg_kmem_id(memcg)); list_del_init(item); l->nr_items--; nlru->nr_items--; @@ -158,7 +177,7 @@ bool list_lru_del(struct list_lru *lru, struct list_head *item) spin_unlock(&nlru->lock); return false; } -EXPORT_SYMBOL_GPL(list_lru_del); +EXPORT_SYMBOL_GPL(__list_lru_del); void list_lru_isolate(struct list_lru_one *list, struct list_head *item) { @@ -175,6 +194,20 @@ void list_lru_isolate_move(struct list_lru_one *list, struct list_head *item, } EXPORT_SYMBOL_GPL(list_lru_isolate_move); +void list_lru_putback(struct list_lru *lru, struct list_head *item, int nid, + struct mem_cgroup *memcg) +{ + struct list_lru_one *list = + list_lru_from_memcg_idx(lru, nid, memcg_kmem_id(memcg)); + + if (list_empty(item)) { + list_add_tail(item, &list->list); + if (!list->nr_items++) + set_shrinker_bit(memcg, nid, lru_shrinker_id(lru)); + } +} +EXPORT_SYMBOL_GPL(list_lru_putback); + unsigned long list_lru_count_one(struct list_lru *lru, int nid, struct mem_cgroup *memcg) {
The interface of list_lru is based on the assumption that objects are allocated on the correct node/memcg, with this change it is introduced the possibility to explicitly specify numa node and memcgroup when adding and removing objects. This is so that users of list_lru can track node/memcg of the items outside of the list_lru, like in zswap, where the allocations can be made by kswapd for data that's charged to a different cgroup. Signed-off-by: Nhat Pham <nphamcs@gmail.com> --- include/linux/list_lru.h | 38 +++++++++++++++++++++++++++++++++++ mm/list_lru.c | 43 +++++++++++++++++++++++++++++++++++----- 2 files changed, 76 insertions(+), 5 deletions(-)