diff mbox series

[v6,09/11] mm: vmscan: don't need allocate shrinker->nr_deferred for memcg aware shrinkers

Message ID 20210203172042.800474-10-shy828301@gmail.com (mailing list archive)
State New
Headers show
Series Make shrinker's nr_deferred memcg aware | expand

Commit Message

Yang Shi Feb. 3, 2021, 5:20 p.m. UTC
Now nr_deferred is available on per memcg level for memcg aware shrinkers, so don't need
allocate shrinker->nr_deferred for such shrinkers anymore.

The prealloc_memcg_shrinker() would return -ENOSYS if !CONFIG_MEMCG or memcg is disabled
by kernel command line, then shrinker's SHRINKER_MEMCG_AWARE flag would be cleared.
This makes the implementation of this patch simpler.

Acked-by: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Yang Shi <shy828301@gmail.com>
---
 mm/vmscan.c | 31 ++++++++++++++++---------------
 1 file changed, 16 insertions(+), 15 deletions(-)

Comments

Kirill Tkhai Feb. 4, 2021, 9:29 a.m. UTC | #1
On 03.02.2021 20:20, Yang Shi wrote:
> Now nr_deferred is available on per memcg level for memcg aware shrinkers, so don't need
> allocate shrinker->nr_deferred for such shrinkers anymore.
> 
> The prealloc_memcg_shrinker() would return -ENOSYS if !CONFIG_MEMCG or memcg is disabled
> by kernel command line, then shrinker's SHRINKER_MEMCG_AWARE flag would be cleared.
> This makes the implementation of this patch simpler.
> 
> Acked-by: Vlastimil Babka <vbabka@suse.cz>
> Signed-off-by: Yang Shi <shy828301@gmail.com>
> ---
>  mm/vmscan.c | 31 ++++++++++++++++---------------
>  1 file changed, 16 insertions(+), 15 deletions(-)
> 
> diff --git a/mm/vmscan.c b/mm/vmscan.c
> index 545422d2aeec..20a35d26ae12 100644
> --- a/mm/vmscan.c
> +++ b/mm/vmscan.c
> @@ -334,6 +334,9 @@ static int prealloc_memcg_shrinker(struct shrinker *shrinker)
>  {
>  	int id, ret = -ENOMEM;
>  
> +	if (mem_cgroup_disabled())
> +		return -ENOSYS;
> +
>  	down_write(&shrinker_rwsem);
>  	/* This may call shrinker, so it must use down_read_trylock() */
>  	id = idr_alloc(&shrinker_idr, shrinker, 0, 0, GFP_KERNEL);
> @@ -414,7 +417,7 @@ static bool writeback_throttling_sane(struct scan_control *sc)
>  #else
>  static int prealloc_memcg_shrinker(struct shrinker *shrinker)
>  {
> -	return 0;
> +	return -ENOSYS;
>  }
>  
>  static void unregister_memcg_shrinker(struct shrinker *shrinker)
> @@ -525,8 +528,18 @@ unsigned long lruvec_lru_size(struct lruvec *lruvec, enum lru_list lru, int zone
>   */
>  int prealloc_shrinker(struct shrinker *shrinker)
>  {
> -	unsigned int size = sizeof(*shrinker->nr_deferred);
> +	unsigned int size;
> +	int err;
> +
> +	if (shrinker->flags & SHRINKER_MEMCG_AWARE) {
> +		err = prealloc_memcg_shrinker(shrinker);
> +		if (err != -ENOSYS)
> +			return err;
>  
> +		shrinker->flags &= ~SHRINKER_MEMCG_AWARE;
> +	}
> +
> +	size = sizeof(*shrinker->nr_deferred);
>  	if (shrinker->flags & SHRINKER_NUMA_AWARE)
>  		size *= nr_node_ids;

This may sound surprisingly, but IIRC do_shrink_slab() may be called on early boot
*even before* root_mem_cgroup is allocated. AFAIR, I received syzcaller crash report
because of this, when I was implementing shrinker_maps.

This is a reason why we don't use shrinker_maps even in case of mem cgroup is not
disabled: we iterate every shrinker of shrinker_list. See check in shrink_slab():

	if (!mem_cgroup_disabled() && !mem_cgroup_is_root(memcg))

Possible, we should do the same for nr_deferred: 1)always allocate shrinker->nr_deferred,
2)use shrinker->nr_deferred in count_nr_deferred() and set_nr_deferred().

>  
> @@ -534,26 +547,14 @@ int prealloc_shrinker(struct shrinker *shrinker)
>  	if (!shrinker->nr_deferred)
>  		return -ENOMEM;
>  
> -	if (shrinker->flags & SHRINKER_MEMCG_AWARE) {
> -		if (prealloc_memcg_shrinker(shrinker))
> -			goto free_deferred;
> -	}
>  
>  	return 0;
> -
> -free_deferred:
> -	kfree(shrinker->nr_deferred);
> -	shrinker->nr_deferred = NULL;
> -	return -ENOMEM;
>  }
>  
>  void free_prealloced_shrinker(struct shrinker *shrinker)
>  {
> -	if (!shrinker->nr_deferred)
> -		return;
> -
>  	if (shrinker->flags & SHRINKER_MEMCG_AWARE)
> -		unregister_memcg_shrinker(shrinker);
> +		return unregister_memcg_shrinker(shrinker);
>  
>  	kfree(shrinker->nr_deferred);
>  	shrinker->nr_deferred = NULL;
>
Kirill Tkhai Feb. 4, 2021, 10:14 a.m. UTC | #2
On 04.02.2021 12:29, Kirill Tkhai wrote:
> On 03.02.2021 20:20, Yang Shi wrote:
>> Now nr_deferred is available on per memcg level for memcg aware shrinkers, so don't need
>> allocate shrinker->nr_deferred for such shrinkers anymore.
>>
>> The prealloc_memcg_shrinker() would return -ENOSYS if !CONFIG_MEMCG or memcg is disabled
>> by kernel command line, then shrinker's SHRINKER_MEMCG_AWARE flag would be cleared.
>> This makes the implementation of this patch simpler.
>>
>> Acked-by: Vlastimil Babka <vbabka@suse.cz>
>> Signed-off-by: Yang Shi <shy828301@gmail.com>
>> ---
>>  mm/vmscan.c | 31 ++++++++++++++++---------------
>>  1 file changed, 16 insertions(+), 15 deletions(-)
>>
>> diff --git a/mm/vmscan.c b/mm/vmscan.c
>> index 545422d2aeec..20a35d26ae12 100644
>> --- a/mm/vmscan.c
>> +++ b/mm/vmscan.c
>> @@ -334,6 +334,9 @@ static int prealloc_memcg_shrinker(struct shrinker *shrinker)
>>  {
>>  	int id, ret = -ENOMEM;
>>  
>> +	if (mem_cgroup_disabled())
>> +		return -ENOSYS;
>> +
>>  	down_write(&shrinker_rwsem);
>>  	/* This may call shrinker, so it must use down_read_trylock() */
>>  	id = idr_alloc(&shrinker_idr, shrinker, 0, 0, GFP_KERNEL);
>> @@ -414,7 +417,7 @@ static bool writeback_throttling_sane(struct scan_control *sc)
>>  #else
>>  static int prealloc_memcg_shrinker(struct shrinker *shrinker)
>>  {
>> -	return 0;
>> +	return -ENOSYS;
>>  }
>>  
>>  static void unregister_memcg_shrinker(struct shrinker *shrinker)
>> @@ -525,8 +528,18 @@ unsigned long lruvec_lru_size(struct lruvec *lruvec, enum lru_list lru, int zone
>>   */
>>  int prealloc_shrinker(struct shrinker *shrinker)
>>  {
>> -	unsigned int size = sizeof(*shrinker->nr_deferred);
>> +	unsigned int size;
>> +	int err;
>> +
>> +	if (shrinker->flags & SHRINKER_MEMCG_AWARE) {
>> +		err = prealloc_memcg_shrinker(shrinker);
>> +		if (err != -ENOSYS)
>> +			return err;
>>  
>> +		shrinker->flags &= ~SHRINKER_MEMCG_AWARE;
>> +	}
>> +
>> +	size = sizeof(*shrinker->nr_deferred);
>>  	if (shrinker->flags & SHRINKER_NUMA_AWARE)
>>  		size *= nr_node_ids;
> 
> This may sound surprisingly, but IIRC do_shrink_slab() may be called on early boot
> *even before* root_mem_cgroup is allocated. AFAIR, I received syzcaller crash report
> because of this, when I was implementing shrinker_maps.
> 
> This is a reason why we don't use shrinker_maps even in case of mem cgroup is not
> disabled: we iterate every shrinker of shrinker_list. See check in shrink_slab():
> 
> 	if (!mem_cgroup_disabled() && !mem_cgroup_is_root(memcg))
> 
> Possible, we should do the same for nr_deferred: 1)always allocate shrinker->nr_deferred,
> 2)use shrinker->nr_deferred in count_nr_deferred() and set_nr_deferred().

I looked over my mail box, and I can't find that crash report and conditions to reproduce.

Hm, let's remain this as is, and we rework this in case of such early shrinker call is still
possible, and there will be a report...

Reviewed-by: Kirill Tkhai <ktkhai@virtuozzo.com>

With only nit:
 
>>  
>> @@ -534,26 +547,14 @@ int prealloc_shrinker(struct shrinker *shrinker)
>>  	if (!shrinker->nr_deferred)
>>  		return -ENOMEM;
>>  
>> -	if (shrinker->flags & SHRINKER_MEMCG_AWARE) {
>> -		if (prealloc_memcg_shrinker(shrinker))
>> -			goto free_deferred;
>> -	}
>>  
>>  	return 0;
>> -
>> -free_deferred:
>> -	kfree(shrinker->nr_deferred);
>> -	shrinker->nr_deferred = NULL;
>> -	return -ENOMEM;
>>  }
>>  
>>  void free_prealloced_shrinker(struct shrinker *shrinker)
>>  {
>> -	if (!shrinker->nr_deferred)
>> -		return;
>> -
>>  	if (shrinker->flags & SHRINKER_MEMCG_AWARE)
>> -		unregister_memcg_shrinker(shrinker);
>> +		return unregister_memcg_shrinker(shrinker);

I've never seen return of void function in linux kernel. I'm not sure this won't confuse people.

>>  
>>  	kfree(shrinker->nr_deferred);
>>  	shrinker->nr_deferred = NULL;
>>
>
Yang Shi Feb. 4, 2021, 5:32 p.m. UTC | #3
On Thu, Feb 4, 2021 at 2:14 AM Kirill Tkhai <ktkhai@virtuozzo.com> wrote:
>
> On 04.02.2021 12:29, Kirill Tkhai wrote:
> > On 03.02.2021 20:20, Yang Shi wrote:
> >> Now nr_deferred is available on per memcg level for memcg aware shrinkers, so don't need
> >> allocate shrinker->nr_deferred for such shrinkers anymore.
> >>
> >> The prealloc_memcg_shrinker() would return -ENOSYS if !CONFIG_MEMCG or memcg is disabled
> >> by kernel command line, then shrinker's SHRINKER_MEMCG_AWARE flag would be cleared.
> >> This makes the implementation of this patch simpler.
> >>
> >> Acked-by: Vlastimil Babka <vbabka@suse.cz>
> >> Signed-off-by: Yang Shi <shy828301@gmail.com>
> >> ---
> >>  mm/vmscan.c | 31 ++++++++++++++++---------------
> >>  1 file changed, 16 insertions(+), 15 deletions(-)
> >>
> >> diff --git a/mm/vmscan.c b/mm/vmscan.c
> >> index 545422d2aeec..20a35d26ae12 100644
> >> --- a/mm/vmscan.c
> >> +++ b/mm/vmscan.c
> >> @@ -334,6 +334,9 @@ static int prealloc_memcg_shrinker(struct shrinker *shrinker)
> >>  {
> >>      int id, ret = -ENOMEM;
> >>
> >> +    if (mem_cgroup_disabled())
> >> +            return -ENOSYS;
> >> +
> >>      down_write(&shrinker_rwsem);
> >>      /* This may call shrinker, so it must use down_read_trylock() */
> >>      id = idr_alloc(&shrinker_idr, shrinker, 0, 0, GFP_KERNEL);
> >> @@ -414,7 +417,7 @@ static bool writeback_throttling_sane(struct scan_control *sc)
> >>  #else
> >>  static int prealloc_memcg_shrinker(struct shrinker *shrinker)
> >>  {
> >> -    return 0;
> >> +    return -ENOSYS;
> >>  }
> >>
> >>  static void unregister_memcg_shrinker(struct shrinker *shrinker)
> >> @@ -525,8 +528,18 @@ unsigned long lruvec_lru_size(struct lruvec *lruvec, enum lru_list lru, int zone
> >>   */
> >>  int prealloc_shrinker(struct shrinker *shrinker)
> >>  {
> >> -    unsigned int size = sizeof(*shrinker->nr_deferred);
> >> +    unsigned int size;
> >> +    int err;
> >> +
> >> +    if (shrinker->flags & SHRINKER_MEMCG_AWARE) {
> >> +            err = prealloc_memcg_shrinker(shrinker);
> >> +            if (err != -ENOSYS)
> >> +                    return err;
> >>
> >> +            shrinker->flags &= ~SHRINKER_MEMCG_AWARE;
> >> +    }
> >> +
> >> +    size = sizeof(*shrinker->nr_deferred);
> >>      if (shrinker->flags & SHRINKER_NUMA_AWARE)
> >>              size *= nr_node_ids;
> >
> > This may sound surprisingly, but IIRC do_shrink_slab() may be called on early boot
> > *even before* root_mem_cgroup is allocated. AFAIR, I received syzcaller crash report
> > because of this, when I was implementing shrinker_maps.
> >
> > This is a reason why we don't use shrinker_maps even in case of mem cgroup is not
> > disabled: we iterate every shrinker of shrinker_list. See check in shrink_slab():
> >
> >       if (!mem_cgroup_disabled() && !mem_cgroup_is_root(memcg))
> >
> > Possible, we should do the same for nr_deferred: 1)always allocate shrinker->nr_deferred,
> > 2)use shrinker->nr_deferred in count_nr_deferred() and set_nr_deferred().
>
> I looked over my mail box, and I can't find that crash report and conditions to reproduce.
>
> Hm, let's remain this as is, and we rework this in case of such early shrinker call is still
> possible, and there will be a report...

Sure. But I'm wondering how that could happen. On a very small machine?

>
> Reviewed-by: Kirill Tkhai <ktkhai@virtuozzo.com>
>
> With only nit:
>
> >>
> >> @@ -534,26 +547,14 @@ int prealloc_shrinker(struct shrinker *shrinker)
> >>      if (!shrinker->nr_deferred)
> >>              return -ENOMEM;
> >>
> >> -    if (shrinker->flags & SHRINKER_MEMCG_AWARE) {
> >> -            if (prealloc_memcg_shrinker(shrinker))
> >> -                    goto free_deferred;
> >> -    }
> >>
> >>      return 0;
> >> -
> >> -free_deferred:
> >> -    kfree(shrinker->nr_deferred);
> >> -    shrinker->nr_deferred = NULL;
> >> -    return -ENOMEM;
> >>  }
> >>
> >>  void free_prealloced_shrinker(struct shrinker *shrinker)
> >>  {
> >> -    if (!shrinker->nr_deferred)
> >> -            return;
> >> -
> >>      if (shrinker->flags & SHRINKER_MEMCG_AWARE)
> >> -            unregister_memcg_shrinker(shrinker);
> >> +            return unregister_memcg_shrinker(shrinker);
>
> I've never seen return of void function in linux kernel. I'm not sure this won't confuse people.

Will fix in v7.

>
> >>
> >>      kfree(shrinker->nr_deferred);
> >>      shrinker->nr_deferred = NULL;
> >>
> >
>
>
Kirill Tkhai Feb. 5, 2021, 2:44 p.m. UTC | #4
On 04.02.2021 20:32, Yang Shi wrote:
> On Thu, Feb 4, 2021 at 2:14 AM Kirill Tkhai <ktkhai@virtuozzo.com> wrote:
>>
>> On 04.02.2021 12:29, Kirill Tkhai wrote:
>>> On 03.02.2021 20:20, Yang Shi wrote:
>>>> Now nr_deferred is available on per memcg level for memcg aware shrinkers, so don't need
>>>> allocate shrinker->nr_deferred for such shrinkers anymore.
>>>>
>>>> The prealloc_memcg_shrinker() would return -ENOSYS if !CONFIG_MEMCG or memcg is disabled
>>>> by kernel command line, then shrinker's SHRINKER_MEMCG_AWARE flag would be cleared.
>>>> This makes the implementation of this patch simpler.
>>>>
>>>> Acked-by: Vlastimil Babka <vbabka@suse.cz>
>>>> Signed-off-by: Yang Shi <shy828301@gmail.com>
>>>> ---
>>>>  mm/vmscan.c | 31 ++++++++++++++++---------------
>>>>  1 file changed, 16 insertions(+), 15 deletions(-)
>>>>
>>>> diff --git a/mm/vmscan.c b/mm/vmscan.c
>>>> index 545422d2aeec..20a35d26ae12 100644
>>>> --- a/mm/vmscan.c
>>>> +++ b/mm/vmscan.c
>>>> @@ -334,6 +334,9 @@ static int prealloc_memcg_shrinker(struct shrinker *shrinker)
>>>>  {
>>>>      int id, ret = -ENOMEM;
>>>>
>>>> +    if (mem_cgroup_disabled())
>>>> +            return -ENOSYS;
>>>> +
>>>>      down_write(&shrinker_rwsem);
>>>>      /* This may call shrinker, so it must use down_read_trylock() */
>>>>      id = idr_alloc(&shrinker_idr, shrinker, 0, 0, GFP_KERNEL);
>>>> @@ -414,7 +417,7 @@ static bool writeback_throttling_sane(struct scan_control *sc)
>>>>  #else
>>>>  static int prealloc_memcg_shrinker(struct shrinker *shrinker)
>>>>  {
>>>> -    return 0;
>>>> +    return -ENOSYS;
>>>>  }
>>>>
>>>>  static void unregister_memcg_shrinker(struct shrinker *shrinker)
>>>> @@ -525,8 +528,18 @@ unsigned long lruvec_lru_size(struct lruvec *lruvec, enum lru_list lru, int zone
>>>>   */
>>>>  int prealloc_shrinker(struct shrinker *shrinker)
>>>>  {
>>>> -    unsigned int size = sizeof(*shrinker->nr_deferred);
>>>> +    unsigned int size;
>>>> +    int err;
>>>> +
>>>> +    if (shrinker->flags & SHRINKER_MEMCG_AWARE) {
>>>> +            err = prealloc_memcg_shrinker(shrinker);
>>>> +            if (err != -ENOSYS)
>>>> +                    return err;
>>>>
>>>> +            shrinker->flags &= ~SHRINKER_MEMCG_AWARE;
>>>> +    }
>>>> +
>>>> +    size = sizeof(*shrinker->nr_deferred);
>>>>      if (shrinker->flags & SHRINKER_NUMA_AWARE)
>>>>              size *= nr_node_ids;
>>>
>>> This may sound surprisingly, but IIRC do_shrink_slab() may be called on early boot
>>> *even before* root_mem_cgroup is allocated. AFAIR, I received syzcaller crash report
>>> because of this, when I was implementing shrinker_maps.
>>>
>>> This is a reason why we don't use shrinker_maps even in case of mem cgroup is not
>>> disabled: we iterate every shrinker of shrinker_list. See check in shrink_slab():
>>>
>>>       if (!mem_cgroup_disabled() && !mem_cgroup_is_root(memcg))
>>>
>>> Possible, we should do the same for nr_deferred: 1)always allocate shrinker->nr_deferred,
>>> 2)use shrinker->nr_deferred in count_nr_deferred() and set_nr_deferred().
>>
>> I looked over my mail box, and I can't find that crash report and conditions to reproduce.
>>
>> Hm, let's remain this as is, and we rework this in case of such early shrinker call is still
>> possible, and there will be a report...
> 
> Sure. But I'm wondering how that could happen. On a very small machine?

Sorry, but I don't remember. Maybe this case you said. Maybe some self-tests on node boot..

>>
>> Reviewed-by: Kirill Tkhai <ktkhai@virtuozzo.com>
>>
>> With only nit:
>>
>>>>
>>>> @@ -534,26 +547,14 @@ int prealloc_shrinker(struct shrinker *shrinker)
>>>>      if (!shrinker->nr_deferred)
>>>>              return -ENOMEM;
>>>>
>>>> -    if (shrinker->flags & SHRINKER_MEMCG_AWARE) {
>>>> -            if (prealloc_memcg_shrinker(shrinker))
>>>> -                    goto free_deferred;
>>>> -    }
>>>>
>>>>      return 0;
>>>> -
>>>> -free_deferred:
>>>> -    kfree(shrinker->nr_deferred);
>>>> -    shrinker->nr_deferred = NULL;
>>>> -    return -ENOMEM;
>>>>  }
>>>>
>>>>  void free_prealloced_shrinker(struct shrinker *shrinker)
>>>>  {
>>>> -    if (!shrinker->nr_deferred)
>>>> -            return;
>>>> -
>>>>      if (shrinker->flags & SHRINKER_MEMCG_AWARE)
>>>> -            unregister_memcg_shrinker(shrinker);
>>>> +            return unregister_memcg_shrinker(shrinker);
>>
>> I've never seen return of void function in linux kernel. I'm not sure this won't confuse people.
> 
> Will fix in v7.
> 
>>
>>>>
>>>>      kfree(shrinker->nr_deferred);
>>>>      shrinker->nr_deferred = NULL;
>>>>
>>>
>>
>>
diff mbox series

Patch

diff --git a/mm/vmscan.c b/mm/vmscan.c
index 545422d2aeec..20a35d26ae12 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -334,6 +334,9 @@  static int prealloc_memcg_shrinker(struct shrinker *shrinker)
 {
 	int id, ret = -ENOMEM;
 
+	if (mem_cgroup_disabled())
+		return -ENOSYS;
+
 	down_write(&shrinker_rwsem);
 	/* This may call shrinker, so it must use down_read_trylock() */
 	id = idr_alloc(&shrinker_idr, shrinker, 0, 0, GFP_KERNEL);
@@ -414,7 +417,7 @@  static bool writeback_throttling_sane(struct scan_control *sc)
 #else
 static int prealloc_memcg_shrinker(struct shrinker *shrinker)
 {
-	return 0;
+	return -ENOSYS;
 }
 
 static void unregister_memcg_shrinker(struct shrinker *shrinker)
@@ -525,8 +528,18 @@  unsigned long lruvec_lru_size(struct lruvec *lruvec, enum lru_list lru, int zone
  */
 int prealloc_shrinker(struct shrinker *shrinker)
 {
-	unsigned int size = sizeof(*shrinker->nr_deferred);
+	unsigned int size;
+	int err;
+
+	if (shrinker->flags & SHRINKER_MEMCG_AWARE) {
+		err = prealloc_memcg_shrinker(shrinker);
+		if (err != -ENOSYS)
+			return err;
 
+		shrinker->flags &= ~SHRINKER_MEMCG_AWARE;
+	}
+
+	size = sizeof(*shrinker->nr_deferred);
 	if (shrinker->flags & SHRINKER_NUMA_AWARE)
 		size *= nr_node_ids;
 
@@ -534,26 +547,14 @@  int prealloc_shrinker(struct shrinker *shrinker)
 	if (!shrinker->nr_deferred)
 		return -ENOMEM;
 
-	if (shrinker->flags & SHRINKER_MEMCG_AWARE) {
-		if (prealloc_memcg_shrinker(shrinker))
-			goto free_deferred;
-	}
 
 	return 0;
-
-free_deferred:
-	kfree(shrinker->nr_deferred);
-	shrinker->nr_deferred = NULL;
-	return -ENOMEM;
 }
 
 void free_prealloced_shrinker(struct shrinker *shrinker)
 {
-	if (!shrinker->nr_deferred)
-		return;
-
 	if (shrinker->flags & SHRINKER_MEMCG_AWARE)
-		unregister_memcg_shrinker(shrinker);
+		return unregister_memcg_shrinker(shrinker);
 
 	kfree(shrinker->nr_deferred);
 	shrinker->nr_deferred = NULL;