Message ID | 20230223132725.11685-2-zhengqi.arch@bytedance.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | make slab shrink lockless | expand |
On 2023/2/23 21:27, Qi Zheng wrote: > To prepare for the subsequent lockless memcg slab shrink, > add a map_nr_max field to struct shrinker_info to records > its own real shrinker_nr_max. > > No functional changes. > > Signed-off-by: Qi Zheng <zhengqi.arch@bytedance.com> I missed Suggested-by here, hi Kirill, can I add it? Suggested-by: Kirill Tkhai <tkhai@ya.ru> > --- > include/linux/memcontrol.h | 1 + > mm/vmscan.c | 29 ++++++++++++++++++----------- > 2 files changed, 19 insertions(+), 11 deletions(-) > > diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h > index b6eda2ab205d..aa69ea98e2d8 100644 > --- a/include/linux/memcontrol.h > +++ b/include/linux/memcontrol.h > @@ -97,6 +97,7 @@ struct shrinker_info { > struct rcu_head rcu; > atomic_long_t *nr_deferred; > unsigned long *map; > + int map_nr_max; > }; > > struct lruvec_stats_percpu { > diff --git a/mm/vmscan.c b/mm/vmscan.c > index 9c1c5e8b24b8..9f895ca6216c 100644 > --- a/mm/vmscan.c > +++ b/mm/vmscan.c > @@ -224,9 +224,16 @@ static struct shrinker_info *shrinker_info_protected(struct mem_cgroup *memcg, > lockdep_is_held(&shrinker_rwsem)); > } > > +static inline bool need_expand(int new_nr_max, int old_nr_max) > +{ > + return round_up(new_nr_max, BITS_PER_LONG) > > + round_up(old_nr_max, BITS_PER_LONG); > +} > + > static int expand_one_shrinker_info(struct mem_cgroup *memcg, > int map_size, int defer_size, > - int old_map_size, int old_defer_size) > + int old_map_size, int old_defer_size, > + int new_nr_max) > { > struct shrinker_info *new, *old; > struct mem_cgroup_per_node *pn; > @@ -240,12 +247,16 @@ static int expand_one_shrinker_info(struct mem_cgroup *memcg, > if (!old) > return 0; > > + if (!need_expand(new_nr_max, old->map_nr_max)) > + return 0; > + > new = kvmalloc_node(sizeof(*new) + size, GFP_KERNEL, nid); > if (!new) > return -ENOMEM; > > new->nr_deferred = (atomic_long_t *)(new + 1); > new->map = (void *)new->nr_deferred + defer_size; > + new->map_nr_max = new_nr_max; > > /* map: set all old bits, clear all new bits */ > memset(new->map, (int)0xff, old_map_size); > @@ -295,6 +306,7 @@ int alloc_shrinker_info(struct mem_cgroup *memcg) > } > info->nr_deferred = (atomic_long_t *)(info + 1); > info->map = (void *)info->nr_deferred + defer_size; > + info->map_nr_max = shrinker_nr_max; > rcu_assign_pointer(memcg->nodeinfo[nid]->shrinker_info, info); > } > up_write(&shrinker_rwsem); > @@ -302,12 +314,6 @@ int alloc_shrinker_info(struct mem_cgroup *memcg) > return ret; > } > > -static inline bool need_expand(int nr_max) > -{ > - return round_up(nr_max, BITS_PER_LONG) > > - round_up(shrinker_nr_max, BITS_PER_LONG); > -} > - > static int expand_shrinker_info(int new_id) > { > int ret = 0; > @@ -316,7 +322,7 @@ static int expand_shrinker_info(int new_id) > int old_map_size, old_defer_size = 0; > struct mem_cgroup *memcg; > > - if (!need_expand(new_nr_max)) > + if (!need_expand(new_nr_max, shrinker_nr_max)) > goto out; > > if (!root_mem_cgroup) > @@ -332,7 +338,8 @@ static int expand_shrinker_info(int new_id) > memcg = mem_cgroup_iter(NULL, NULL, NULL); > do { > ret = expand_one_shrinker_info(memcg, map_size, defer_size, > - old_map_size, old_defer_size); > + old_map_size, old_defer_size, > + new_nr_max); > if (ret) { > mem_cgroup_iter_break(NULL, memcg); > goto out; > @@ -432,7 +439,7 @@ void reparent_shrinker_deferred(struct mem_cgroup *memcg) > for_each_node(nid) { > child_info = shrinker_info_protected(memcg, nid); > parent_info = shrinker_info_protected(parent, nid); > - for (i = 0; i < shrinker_nr_max; i++) { > + for (i = 0; i < child_info->map_nr_max; i++) { > nr = atomic_long_read(&child_info->nr_deferred[i]); > atomic_long_add(nr, &parent_info->nr_deferred[i]); > } > @@ -899,7 +906,7 @@ static unsigned long shrink_slab_memcg(gfp_t gfp_mask, int nid, > if (unlikely(!info)) > goto unlock; > > - for_each_set_bit(i, info->map, shrinker_nr_max) { > + for_each_set_bit(i, info->map, info->map_nr_max) { > struct shrink_control sc = { > .gfp_mask = gfp_mask, > .nid = nid,
Hi Qi, On 25.02.2023 11:18, Qi Zheng wrote: > > > On 2023/2/23 21:27, Qi Zheng wrote: >> To prepare for the subsequent lockless memcg slab shrink, >> add a map_nr_max field to struct shrinker_info to records >> its own real shrinker_nr_max. >> >> No functional changes. >> >> Signed-off-by: Qi Zheng <zhengqi.arch@bytedance.com> > > I missed Suggested-by here, hi Kirill, can I add it? > > Suggested-by: Kirill Tkhai <tkhai@ya.ru> Yes, feel free to add this tag. There is a comment below. >> --- >> include/linux/memcontrol.h | 1 + >> mm/vmscan.c | 29 ++++++++++++++++++----------- >> 2 files changed, 19 insertions(+), 11 deletions(-) >> >> diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h >> index b6eda2ab205d..aa69ea98e2d8 100644 >> --- a/include/linux/memcontrol.h >> +++ b/include/linux/memcontrol.h >> @@ -97,6 +97,7 @@ struct shrinker_info { >> struct rcu_head rcu; >> atomic_long_t *nr_deferred; >> unsigned long *map; >> + int map_nr_max; >> }; >> struct lruvec_stats_percpu { >> diff --git a/mm/vmscan.c b/mm/vmscan.c >> index 9c1c5e8b24b8..9f895ca6216c 100644 >> --- a/mm/vmscan.c >> +++ b/mm/vmscan.c >> @@ -224,9 +224,16 @@ static struct shrinker_info *shrinker_info_protected(struct mem_cgroup *memcg, >> lockdep_is_held(&shrinker_rwsem)); >> } >> +static inline bool need_expand(int new_nr_max, int old_nr_max) >> +{ >> + return round_up(new_nr_max, BITS_PER_LONG) > >> + round_up(old_nr_max, BITS_PER_LONG); >> +} >> + >> static int expand_one_shrinker_info(struct mem_cgroup *memcg, >> int map_size, int defer_size, >> - int old_map_size, int old_defer_size) >> + int old_map_size, int old_defer_size, >> + int new_nr_max) >> { >> struct shrinker_info *new, *old; >> struct mem_cgroup_per_node *pn; >> @@ -240,12 +247,16 @@ static int expand_one_shrinker_info(struct mem_cgroup *memcg, >> if (!old) >> return 0; >> + if (!need_expand(new_nr_max, old->map_nr_max)) >> + return 0; >> + >> new = kvmalloc_node(sizeof(*new) + size, GFP_KERNEL, nid); >> if (!new) >> return -ENOMEM; >> new->nr_deferred = (atomic_long_t *)(new + 1); >> new->map = (void *)new->nr_deferred + defer_size; >> + new->map_nr_max = new_nr_max; >> /* map: set all old bits, clear all new bits */ >> memset(new->map, (int)0xff, old_map_size); >> @@ -295,6 +306,7 @@ int alloc_shrinker_info(struct mem_cgroup *memcg) >> } >> info->nr_deferred = (atomic_long_t *)(info + 1); >> info->map = (void *)info->nr_deferred + defer_size; >> + info->map_nr_max = shrinker_nr_max; >> rcu_assign_pointer(memcg->nodeinfo[nid]->shrinker_info, info); >> } >> up_write(&shrinker_rwsem); >> @@ -302,12 +314,6 @@ int alloc_shrinker_info(struct mem_cgroup *memcg) >> return ret; >> } >> -static inline bool need_expand(int nr_max) >> -{ >> - return round_up(nr_max, BITS_PER_LONG) > >> - round_up(shrinker_nr_max, BITS_PER_LONG); >> -} >> - >> static int expand_shrinker_info(int new_id) >> { >> int ret = 0; >> @@ -316,7 +322,7 @@ static int expand_shrinker_info(int new_id) >> int old_map_size, old_defer_size = 0; >> struct mem_cgroup *memcg; >> - if (!need_expand(new_nr_max)) >> + if (!need_expand(new_nr_max, shrinker_nr_max)) >> goto out; >> if (!root_mem_cgroup) >> @@ -332,7 +338,8 @@ static int expand_shrinker_info(int new_id) >> memcg = mem_cgroup_iter(NULL, NULL, NULL); >> do { >> ret = expand_one_shrinker_info(memcg, map_size, defer_size, >> - old_map_size, old_defer_size); >> + old_map_size, old_defer_size, >> + new_nr_max); >> if (ret) { >> mem_cgroup_iter_break(NULL, memcg); >> goto out; >> @@ -432,7 +439,7 @@ void reparent_shrinker_deferred(struct mem_cgroup *memcg) >> for_each_node(nid) { >> child_info = shrinker_info_protected(memcg, nid); >> parent_info = shrinker_info_protected(parent, nid); >> - for (i = 0; i < shrinker_nr_max; i++) { >> + for (i = 0; i < child_info->map_nr_max; i++) { >> nr = atomic_long_read(&child_info->nr_deferred[i]); >> atomic_long_add(nr, &parent_info->nr_deferred[i]); >> } >> @@ -899,7 +906,7 @@ static unsigned long shrink_slab_memcg(gfp_t gfp_mask, int nid, >> if (unlikely(!info)) >> goto unlock; >> - for_each_set_bit(i, info->map, shrinker_nr_max) { >> + for_each_set_bit(i, info->map, info->map_nr_max) { >> struct shrink_control sc = { >> .gfp_mask = gfp_mask, >> .nid = nid, The patch as whole thing won't work as expected. It won't ever call shrinker with ids from [round_down(shrinker_nr_max, sizeof(unsigned long)) + 1, shrinker_nr_max - 1] Just replay the sequence we add new shrinkers: 1)We add shrinker #0: shrinker_nr_max = 0; prealloc_memcg_shrinker() id = 0; expand_shrinker_info(0) new_nr_max = 1; expand_one_shrinker_info(new_nr_max = 1) new->map_nr_max = 1; shrinker_nr_max = 1; 2)We add shrinker #1: prealloc_memcg_shrinker() id = 1; expand_shrinker_info(1) new_nr_max = 2; need_expand(2, 1) => false => ignore expand shrinker_nr_max = 2; 3)Then we call shrinker: shrink_slab_memcg() for_each_set_bit(i, info->map, 1/* info->map_nr_max */ ) { } => ignore shrinker #1 I'd fixed this patch by something like the below: diff --git a/mm/vmscan.c b/mm/vmscan.c index 9f895ca6216c..bb617a3871f1 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -224,12 +224,6 @@ static struct shrinker_info *shrinker_info_protected(struct mem_cgroup *memcg, lockdep_is_held(&shrinker_rwsem)); } -static inline bool need_expand(int new_nr_max, int old_nr_max) -{ - return round_up(new_nr_max, BITS_PER_LONG) > - round_up(old_nr_max, BITS_PER_LONG); -} - static int expand_one_shrinker_info(struct mem_cgroup *memcg, int map_size, int defer_size, int old_map_size, int old_defer_size, @@ -247,9 +241,6 @@ static int expand_one_shrinker_info(struct mem_cgroup *memcg, if (!old) return 0; - if (!need_expand(new_nr_max, old->map_nr_max)) - return 0; - new = kvmalloc_node(sizeof(*new) + size, GFP_KERNEL, nid); if (!new) return -ENOMEM; @@ -317,14 +308,11 @@ int alloc_shrinker_info(struct mem_cgroup *memcg) static int expand_shrinker_info(int new_id) { int ret = 0; - int new_nr_max = new_id + 1; + int new_nr_max = round_up(new_id + 1, BITS_PER_LONG); int map_size, defer_size = 0; int old_map_size, old_defer_size = 0; struct mem_cgroup *memcg; - if (!need_expand(new_nr_max, shrinker_nr_max)) - goto out; - if (!root_mem_cgroup) goto out; @@ -359,9 +347,11 @@ void set_shrinker_bit(struct mem_cgroup *memcg, int nid, int shrinker_id) rcu_read_lock(); info = rcu_dereference(memcg->nodeinfo[nid]->shrinker_info); - /* Pairs with smp mb in shrink_slab() */ - smp_mb__before_atomic(); - set_bit(shrinker_id, info->map); + if (!WARN_ON_ONCE(shrinker_id >= info->map_nr_max)) { + /* Pairs with smp mb in shrink_slab() */ + smp_mb__before_atomic(); + set_bit(shrinker_id, info->map); + } rcu_read_unlock(); } } (I also added a new check into set_shrinker_bit() for safety). Kirill
On 2023/2/25 23:14, Kirill Tkhai wrote: > Hi Qi, > > On 25.02.2023 11:18, Qi Zheng wrote: >> >> >> On 2023/2/23 21:27, Qi Zheng wrote: >>> To prepare for the subsequent lockless memcg slab shrink, >>> add a map_nr_max field to struct shrinker_info to records >>> its own real shrinker_nr_max. >>> >>> No functional changes. >>> >>> Signed-off-by: Qi Zheng <zhengqi.arch@bytedance.com> >> >> I missed Suggested-by here, hi Kirill, can I add it? >> >> Suggested-by: Kirill Tkhai <tkhai@ya.ru> > > Yes, feel free to add this tag. Thanks. > > There is a comment below. > >>> --- >>> include/linux/memcontrol.h | 1 + >>> mm/vmscan.c | 29 ++++++++++++++++++----------- >>> 2 files changed, 19 insertions(+), 11 deletions(-) >>> >>> diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h >>> index b6eda2ab205d..aa69ea98e2d8 100644 >>> --- a/include/linux/memcontrol.h >>> +++ b/include/linux/memcontrol.h >>> @@ -97,6 +97,7 @@ struct shrinker_info { >>> struct rcu_head rcu; >>> atomic_long_t *nr_deferred; >>> unsigned long *map; >>> + int map_nr_max; >>> }; >>> struct lruvec_stats_percpu { >>> diff --git a/mm/vmscan.c b/mm/vmscan.c >>> index 9c1c5e8b24b8..9f895ca6216c 100644 >>> --- a/mm/vmscan.c >>> +++ b/mm/vmscan.c >>> @@ -224,9 +224,16 @@ static struct shrinker_info *shrinker_info_protected(struct mem_cgroup *memcg, >>> lockdep_is_held(&shrinker_rwsem)); >>> } >>> +static inline bool need_expand(int new_nr_max, int old_nr_max) >>> +{ >>> + return round_up(new_nr_max, BITS_PER_LONG) > >>> + round_up(old_nr_max, BITS_PER_LONG); >>> +} >>> + >>> static int expand_one_shrinker_info(struct mem_cgroup *memcg, >>> int map_size, int defer_size, >>> - int old_map_size, int old_defer_size) >>> + int old_map_size, int old_defer_size, >>> + int new_nr_max) >>> { >>> struct shrinker_info *new, *old; >>> struct mem_cgroup_per_node *pn; >>> @@ -240,12 +247,16 @@ static int expand_one_shrinker_info(struct mem_cgroup *memcg, >>> if (!old) >>> return 0; >>> + if (!need_expand(new_nr_max, old->map_nr_max)) >>> + return 0; >>> + >>> new = kvmalloc_node(sizeof(*new) + size, GFP_KERNEL, nid); >>> if (!new) >>> return -ENOMEM; >>> new->nr_deferred = (atomic_long_t *)(new + 1); >>> new->map = (void *)new->nr_deferred + defer_size; >>> + new->map_nr_max = new_nr_max; >>> /* map: set all old bits, clear all new bits */ >>> memset(new->map, (int)0xff, old_map_size); >>> @@ -295,6 +306,7 @@ int alloc_shrinker_info(struct mem_cgroup *memcg) >>> } >>> info->nr_deferred = (atomic_long_t *)(info + 1); >>> info->map = (void *)info->nr_deferred + defer_size; >>> + info->map_nr_max = shrinker_nr_max; >>> rcu_assign_pointer(memcg->nodeinfo[nid]->shrinker_info, info); >>> } >>> up_write(&shrinker_rwsem); >>> @@ -302,12 +314,6 @@ int alloc_shrinker_info(struct mem_cgroup *memcg) >>> return ret; >>> } >>> -static inline bool need_expand(int nr_max) >>> -{ >>> - return round_up(nr_max, BITS_PER_LONG) > >>> - round_up(shrinker_nr_max, BITS_PER_LONG); >>> -} >>> - >>> static int expand_shrinker_info(int new_id) >>> { >>> int ret = 0; >>> @@ -316,7 +322,7 @@ static int expand_shrinker_info(int new_id) >>> int old_map_size, old_defer_size = 0; >>> struct mem_cgroup *memcg; >>> - if (!need_expand(new_nr_max)) >>> + if (!need_expand(new_nr_max, shrinker_nr_max)) >>> goto out; >>> if (!root_mem_cgroup) >>> @@ -332,7 +338,8 @@ static int expand_shrinker_info(int new_id) >>> memcg = mem_cgroup_iter(NULL, NULL, NULL); >>> do { >>> ret = expand_one_shrinker_info(memcg, map_size, defer_size, >>> - old_map_size, old_defer_size); >>> + old_map_size, old_defer_size, >>> + new_nr_max); >>> if (ret) { >>> mem_cgroup_iter_break(NULL, memcg); >>> goto out; >>> @@ -432,7 +439,7 @@ void reparent_shrinker_deferred(struct mem_cgroup *memcg) >>> for_each_node(nid) { >>> child_info = shrinker_info_protected(memcg, nid); >>> parent_info = shrinker_info_protected(parent, nid); >>> - for (i = 0; i < shrinker_nr_max; i++) { >>> + for (i = 0; i < child_info->map_nr_max; i++) { >>> nr = atomic_long_read(&child_info->nr_deferred[i]); >>> atomic_long_add(nr, &parent_info->nr_deferred[i]); >>> } >>> @@ -899,7 +906,7 @@ static unsigned long shrink_slab_memcg(gfp_t gfp_mask, int nid, >>> if (unlikely(!info)) >>> goto unlock; >>> - for_each_set_bit(i, info->map, shrinker_nr_max) { >>> + for_each_set_bit(i, info->map, info->map_nr_max) { >>> struct shrink_control sc = { >>> .gfp_mask = gfp_mask, >>> .nid = nid, > > The patch as whole thing won't work as expected. It won't ever call shrinker with ids from [round_down(shrinker_nr_max, sizeof(unsigned long)) + 1, shrinker_nr_max - 1] > > Just replay the sequence we add new shrinkers: > > 1)We add shrinker #0: > shrinker_nr_max = 0; > > prealloc_memcg_shrinker() > id = 0; > expand_shrinker_info(0) > new_nr_max = 1; > expand_one_shrinker_info(new_nr_max = 1) > new->map_nr_max = 1; > shrinker_nr_max = 1; > > 2)We add shrinker #1: > prealloc_memcg_shrinker() > id = 1; > expand_shrinker_info(1) > new_nr_max = 2; > need_expand(2, 1) => false => ignore expand > shrinker_nr_max = 2; > > 3)Then we call shrinker: > shrink_slab_memcg() > for_each_set_bit(i, info->map, 1/* info->map_nr_max */ ) { > } => ignore shrinker #1 Oh, I got it. > > I'd fixed this patch by something like the below: The fix below looks good to me, will add them to the next version. :) Thanks, Qi > > diff --git a/mm/vmscan.c b/mm/vmscan.c > index 9f895ca6216c..bb617a3871f1 100644 > --- a/mm/vmscan.c > +++ b/mm/vmscan.c > @@ -224,12 +224,6 @@ static struct shrinker_info *shrinker_info_protected(struct mem_cgroup *memcg, > lockdep_is_held(&shrinker_rwsem)); > } > > -static inline bool need_expand(int new_nr_max, int old_nr_max) > -{ > - return round_up(new_nr_max, BITS_PER_LONG) > > - round_up(old_nr_max, BITS_PER_LONG); > -} > - > static int expand_one_shrinker_info(struct mem_cgroup *memcg, > int map_size, int defer_size, > int old_map_size, int old_defer_size, > @@ -247,9 +241,6 @@ static int expand_one_shrinker_info(struct mem_cgroup *memcg, > if (!old) > return 0; > > - if (!need_expand(new_nr_max, old->map_nr_max)) > - return 0; > - > new = kvmalloc_node(sizeof(*new) + size, GFP_KERNEL, nid); > if (!new) > return -ENOMEM; > @@ -317,14 +308,11 @@ int alloc_shrinker_info(struct mem_cgroup *memcg) > static int expand_shrinker_info(int new_id) > { > int ret = 0; > - int new_nr_max = new_id + 1; > + int new_nr_max = round_up(new_id + 1, BITS_PER_LONG); > int map_size, defer_size = 0; > int old_map_size, old_defer_size = 0; > struct mem_cgroup *memcg; > > - if (!need_expand(new_nr_max, shrinker_nr_max)) > - goto out; > - > if (!root_mem_cgroup) > goto out; > > @@ -359,9 +347,11 @@ void set_shrinker_bit(struct mem_cgroup *memcg, int nid, int shrinker_id) > > rcu_read_lock(); > info = rcu_dereference(memcg->nodeinfo[nid]->shrinker_info); > - /* Pairs with smp mb in shrink_slab() */ > - smp_mb__before_atomic(); > - set_bit(shrinker_id, info->map); > + if (!WARN_ON_ONCE(shrinker_id >= info->map_nr_max)) { > + /* Pairs with smp mb in shrink_slab() */ > + smp_mb__before_atomic(); > + set_bit(shrinker_id, info->map); > + } > rcu_read_unlock(); > } > } > > (I also added a new check into set_shrinker_bit() for safety). > > Kirill
On 2023/2/25 23:14, Kirill Tkhai wrote: > Hi Qi, > > On 25.02.2023 11:18, Qi Zheng wrote: >> >> >> On 2023/2/23 21:27, Qi Zheng wrote: >>> To prepare for the subsequent lockless memcg slab shrink, >>> add a map_nr_max field to struct shrinker_info to records >>> its own real shrinker_nr_max. >>> >>> No functional changes. >>> >>> Signed-off-by: Qi Zheng <zhengqi.arch@bytedance.com> >> >> I missed Suggested-by here, hi Kirill, can I add it? >> >> Suggested-by: Kirill Tkhai <tkhai@ya.ru> > > Yes, feel free to add this tag. > > There is a comment below. > >>> --- >>> include/linux/memcontrol.h | 1 + >>> mm/vmscan.c | 29 ++++++++++++++++++----------- >>> 2 files changed, 19 insertions(+), 11 deletions(-) >>> >>> diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h >>> index b6eda2ab205d..aa69ea98e2d8 100644 >>> --- a/include/linux/memcontrol.h >>> +++ b/include/linux/memcontrol.h >>> @@ -97,6 +97,7 @@ struct shrinker_info { >>> struct rcu_head rcu; >>> atomic_long_t *nr_deferred; >>> unsigned long *map; >>> + int map_nr_max; >>> }; >>> struct lruvec_stats_percpu { >>> diff --git a/mm/vmscan.c b/mm/vmscan.c >>> index 9c1c5e8b24b8..9f895ca6216c 100644 >>> --- a/mm/vmscan.c >>> +++ b/mm/vmscan.c >>> @@ -224,9 +224,16 @@ static struct shrinker_info *shrinker_info_protected(struct mem_cgroup *memcg, >>> lockdep_is_held(&shrinker_rwsem)); >>> } >>> +static inline bool need_expand(int new_nr_max, int old_nr_max) >>> +{ >>> + return round_up(new_nr_max, BITS_PER_LONG) > >>> + round_up(old_nr_max, BITS_PER_LONG); >>> +} >>> + >>> static int expand_one_shrinker_info(struct mem_cgroup *memcg, >>> int map_size, int defer_size, >>> - int old_map_size, int old_defer_size) >>> + int old_map_size, int old_defer_size, >>> + int new_nr_max) >>> { >>> struct shrinker_info *new, *old; >>> struct mem_cgroup_per_node *pn; >>> @@ -240,12 +247,16 @@ static int expand_one_shrinker_info(struct mem_cgroup *memcg, >>> if (!old) >>> return 0; >>> + if (!need_expand(new_nr_max, old->map_nr_max)) >>> + return 0; >>> + >>> new = kvmalloc_node(sizeof(*new) + size, GFP_KERNEL, nid); >>> if (!new) >>> return -ENOMEM; >>> new->nr_deferred = (atomic_long_t *)(new + 1); >>> new->map = (void *)new->nr_deferred + defer_size; >>> + new->map_nr_max = new_nr_max; >>> /* map: set all old bits, clear all new bits */ >>> memset(new->map, (int)0xff, old_map_size); >>> @@ -295,6 +306,7 @@ int alloc_shrinker_info(struct mem_cgroup *memcg) >>> } >>> info->nr_deferred = (atomic_long_t *)(info + 1); >>> info->map = (void *)info->nr_deferred + defer_size; >>> + info->map_nr_max = shrinker_nr_max; >>> rcu_assign_pointer(memcg->nodeinfo[nid]->shrinker_info, info); >>> } >>> up_write(&shrinker_rwsem); >>> @@ -302,12 +314,6 @@ int alloc_shrinker_info(struct mem_cgroup *memcg) >>> return ret; >>> } >>> -static inline bool need_expand(int nr_max) >>> -{ >>> - return round_up(nr_max, BITS_PER_LONG) > >>> - round_up(shrinker_nr_max, BITS_PER_LONG); >>> -} >>> - >>> static int expand_shrinker_info(int new_id) >>> { >>> int ret = 0; >>> @@ -316,7 +322,7 @@ static int expand_shrinker_info(int new_id) >>> int old_map_size, old_defer_size = 0; >>> struct mem_cgroup *memcg; >>> - if (!need_expand(new_nr_max)) >>> + if (!need_expand(new_nr_max, shrinker_nr_max)) >>> goto out; >>> if (!root_mem_cgroup) >>> @@ -332,7 +338,8 @@ static int expand_shrinker_info(int new_id) >>> memcg = mem_cgroup_iter(NULL, NULL, NULL); >>> do { >>> ret = expand_one_shrinker_info(memcg, map_size, defer_size, >>> - old_map_size, old_defer_size); >>> + old_map_size, old_defer_size, >>> + new_nr_max); >>> if (ret) { >>> mem_cgroup_iter_break(NULL, memcg); >>> goto out; >>> @@ -432,7 +439,7 @@ void reparent_shrinker_deferred(struct mem_cgroup *memcg) >>> for_each_node(nid) { >>> child_info = shrinker_info_protected(memcg, nid); >>> parent_info = shrinker_info_protected(parent, nid); >>> - for (i = 0; i < shrinker_nr_max; i++) { >>> + for (i = 0; i < child_info->map_nr_max; i++) { >>> nr = atomic_long_read(&child_info->nr_deferred[i]); >>> atomic_long_add(nr, &parent_info->nr_deferred[i]); >>> } >>> @@ -899,7 +906,7 @@ static unsigned long shrink_slab_memcg(gfp_t gfp_mask, int nid, >>> if (unlikely(!info)) >>> goto unlock; >>> - for_each_set_bit(i, info->map, shrinker_nr_max) { >>> + for_each_set_bit(i, info->map, info->map_nr_max) { >>> struct shrink_control sc = { >>> .gfp_mask = gfp_mask, >>> .nid = nid, > > The patch as whole thing won't work as expected. It won't ever call shrinker with ids from [round_down(shrinker_nr_max, sizeof(unsigned long)) + 1, shrinker_nr_max - 1] > > Just replay the sequence we add new shrinkers: > > 1)We add shrinker #0: > shrinker_nr_max = 0; > > prealloc_memcg_shrinker() > id = 0; > expand_shrinker_info(0) > new_nr_max = 1; > expand_one_shrinker_info(new_nr_max = 1) > new->map_nr_max = 1; > shrinker_nr_max = 1; > > 2)We add shrinker #1: > prealloc_memcg_shrinker() > id = 1; > expand_shrinker_info(1) > new_nr_max = 2; > need_expand(2, 1) => false => ignore expand > shrinker_nr_max = 2; > > 3)Then we call shrinker: > shrink_slab_memcg() > for_each_set_bit(i, info->map, 1/* info->map_nr_max */ ) { > } => ignore shrinker #1 > > I'd fixed this patch by something like the below: > > diff --git a/mm/vmscan.c b/mm/vmscan.c > index 9f895ca6216c..bb617a3871f1 100644 > --- a/mm/vmscan.c > +++ b/mm/vmscan.c > @@ -224,12 +224,6 @@ static struct shrinker_info *shrinker_info_protected(struct mem_cgroup *memcg, > lockdep_is_held(&shrinker_rwsem)); > } > > -static inline bool need_expand(int new_nr_max, int old_nr_max) > -{ > - return round_up(new_nr_max, BITS_PER_LONG) > > - round_up(old_nr_max, BITS_PER_LONG); > -} > - > static int expand_one_shrinker_info(struct mem_cgroup *memcg, > int map_size, int defer_size, > int old_map_size, int old_defer_size, > @@ -247,9 +241,6 @@ static int expand_one_shrinker_info(struct mem_cgroup *memcg, > if (!old) > return 0; > > - if (!need_expand(new_nr_max, old->map_nr_max)) > - return 0; > - Maybe we can keep this. For example, when we failed to allocate memory by calling kvmalloc_node() last time, some shrinker_info may have been expanded, and these shrinker_info do not need to be expanded again. > new = kvmalloc_node(sizeof(*new) + size, GFP_KERNEL, nid); > if (!new) > return -ENOMEM; > @@ -317,14 +308,11 @@ int alloc_shrinker_info(struct mem_cgroup *memcg) > static int expand_shrinker_info(int new_id) > { > int ret = 0; > - int new_nr_max = new_id + 1; > + int new_nr_max = round_up(new_id + 1, BITS_PER_LONG); > int map_size, defer_size = 0; > int old_map_size, old_defer_size = 0; > struct mem_cgroup *memcg; > > - if (!need_expand(new_nr_max, shrinker_nr_max)) > - goto out; > - > if (!root_mem_cgroup) > goto out; > > @@ -359,9 +347,11 @@ void set_shrinker_bit(struct mem_cgroup *memcg, int nid, int shrinker_id) > > rcu_read_lock(); > info = rcu_dereference(memcg->nodeinfo[nid]->shrinker_info); > - /* Pairs with smp mb in shrink_slab() */ > - smp_mb__before_atomic(); > - set_bit(shrinker_id, info->map); > + if (!WARN_ON_ONCE(shrinker_id >= info->map_nr_max)) { > + /* Pairs with smp mb in shrink_slab() */ > + smp_mb__before_atomic(); > + set_bit(shrinker_id, info->map); > + } > rcu_read_unlock(); > } > } > > (I also added a new check into set_shrinker_bit() for safety). > > Kirill
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index b6eda2ab205d..aa69ea98e2d8 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -97,6 +97,7 @@ struct shrinker_info { struct rcu_head rcu; atomic_long_t *nr_deferred; unsigned long *map; + int map_nr_max; }; struct lruvec_stats_percpu { diff --git a/mm/vmscan.c b/mm/vmscan.c index 9c1c5e8b24b8..9f895ca6216c 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -224,9 +224,16 @@ static struct shrinker_info *shrinker_info_protected(struct mem_cgroup *memcg, lockdep_is_held(&shrinker_rwsem)); } +static inline bool need_expand(int new_nr_max, int old_nr_max) +{ + return round_up(new_nr_max, BITS_PER_LONG) > + round_up(old_nr_max, BITS_PER_LONG); +} + static int expand_one_shrinker_info(struct mem_cgroup *memcg, int map_size, int defer_size, - int old_map_size, int old_defer_size) + int old_map_size, int old_defer_size, + int new_nr_max) { struct shrinker_info *new, *old; struct mem_cgroup_per_node *pn; @@ -240,12 +247,16 @@ static int expand_one_shrinker_info(struct mem_cgroup *memcg, if (!old) return 0; + if (!need_expand(new_nr_max, old->map_nr_max)) + return 0; + new = kvmalloc_node(sizeof(*new) + size, GFP_KERNEL, nid); if (!new) return -ENOMEM; new->nr_deferred = (atomic_long_t *)(new + 1); new->map = (void *)new->nr_deferred + defer_size; + new->map_nr_max = new_nr_max; /* map: set all old bits, clear all new bits */ memset(new->map, (int)0xff, old_map_size); @@ -295,6 +306,7 @@ int alloc_shrinker_info(struct mem_cgroup *memcg) } info->nr_deferred = (atomic_long_t *)(info + 1); info->map = (void *)info->nr_deferred + defer_size; + info->map_nr_max = shrinker_nr_max; rcu_assign_pointer(memcg->nodeinfo[nid]->shrinker_info, info); } up_write(&shrinker_rwsem); @@ -302,12 +314,6 @@ int alloc_shrinker_info(struct mem_cgroup *memcg) return ret; } -static inline bool need_expand(int nr_max) -{ - return round_up(nr_max, BITS_PER_LONG) > - round_up(shrinker_nr_max, BITS_PER_LONG); -} - static int expand_shrinker_info(int new_id) { int ret = 0; @@ -316,7 +322,7 @@ static int expand_shrinker_info(int new_id) int old_map_size, old_defer_size = 0; struct mem_cgroup *memcg; - if (!need_expand(new_nr_max)) + if (!need_expand(new_nr_max, shrinker_nr_max)) goto out; if (!root_mem_cgroup) @@ -332,7 +338,8 @@ static int expand_shrinker_info(int new_id) memcg = mem_cgroup_iter(NULL, NULL, NULL); do { ret = expand_one_shrinker_info(memcg, map_size, defer_size, - old_map_size, old_defer_size); + old_map_size, old_defer_size, + new_nr_max); if (ret) { mem_cgroup_iter_break(NULL, memcg); goto out; @@ -432,7 +439,7 @@ void reparent_shrinker_deferred(struct mem_cgroup *memcg) for_each_node(nid) { child_info = shrinker_info_protected(memcg, nid); parent_info = shrinker_info_protected(parent, nid); - for (i = 0; i < shrinker_nr_max; i++) { + for (i = 0; i < child_info->map_nr_max; i++) { nr = atomic_long_read(&child_info->nr_deferred[i]); atomic_long_add(nr, &parent_info->nr_deferred[i]); } @@ -899,7 +906,7 @@ static unsigned long shrink_slab_memcg(gfp_t gfp_mask, int nid, if (unlikely(!info)) goto unlock; - for_each_set_bit(i, info->map, shrinker_nr_max) { + for_each_set_bit(i, info->map, info->map_nr_max) { struct shrink_control sc = { .gfp_mask = gfp_mask, .nid = nid,
To prepare for the subsequent lockless memcg slab shrink, add a map_nr_max field to struct shrinker_info to records its own real shrinker_nr_max. No functional changes. Signed-off-by: Qi Zheng <zhengqi.arch@bytedance.com> --- include/linux/memcontrol.h | 1 + mm/vmscan.c | 29 ++++++++++++++++++----------- 2 files changed, 19 insertions(+), 11 deletions(-)