diff mbox series

[v2] mm: fix unexpected changes to {failslab|fail_page_alloc}.attr

Message ID 20221108035232.87180-1-zhengqi.arch@bytedance.com (mailing list archive)
State New
Headers show
Series [v2] mm: fix unexpected changes to {failslab|fail_page_alloc}.attr | expand

Commit Message

Qi Zheng Nov. 8, 2022, 3:52 a.m. UTC
When we specify __GFP_NOWARN, we only expect that no warnings
will be issued for current caller. But in the __should_failslab()
and __should_fail_alloc_page(), the local GFP flags alter the
global {failslab|fail_page_alloc}.attr, which is persistent and
shared by all tasks. This is not what we expected, let's fix it.

Cc: stable@vger.kernel.org
Fixes: 3f913fc5f974 ("mm: fix missing handler for __GFP_NOWARN")
Reported-by: Dmitry Vyukov <dvyukov@google.com>
Signed-off-by: Qi Zheng <zhengqi.arch@bytedance.com>
---
 v1: https://lore.kernel.org/lkml/20221107033109.59709-1-zhengqi.arch@bytedance.com/

 Changelog in v1 -> v2:
  - add comment for __should_failslab() and __should_fail_alloc_page()
    (suggested by Jason)

 include/linux/fault-inject.h |  7 +++++--
 lib/fault-inject.c           | 14 +++++++++-----
 mm/failslab.c                | 12 ++++++++++--
 mm/page_alloc.c              |  7 +++++--
 4 files changed, 29 insertions(+), 11 deletions(-)

Comments

Wei Yongjun Nov. 8, 2022, 8:44 a.m. UTC | #1
Hi Zheng Qi,

On 2022/11/8 11:52, Qi Zheng wrote:
> When we specify __GFP_NOWARN, we only expect that no warnings
> will be issued for current caller. But in the __should_failslab()
> and __should_fail_alloc_page(), the local GFP flags alter the
> global {failslab|fail_page_alloc}.attr, which is persistent and
> shared by all tasks. This is not what we expected, let's fix it.
> 
> Cc: stable@vger.kernel.org
> Fixes: 3f913fc5f974 ("mm: fix missing handler for __GFP_NOWARN")
> Reported-by: Dmitry Vyukov <dvyukov@google.com>
> Signed-off-by: Qi Zheng <zhengqi.arch@bytedance.com>
> ---
>  v1: https://lore.kernel.org/lkml/20221107033109.59709-1-zhengqi.arch@bytedance.com/
> 
>  Changelog in v1 -> v2:
>   - add comment for __should_failslab() and __should_fail_alloc_page()
>     (suggested by Jason)
> 
>  include/linux/fault-inject.h |  7 +++++--
>  lib/fault-inject.c           | 14 +++++++++-----
>  mm/failslab.c                | 12 ++++++++++--
>  mm/page_alloc.c              |  7 +++++--
>  4 files changed, 29 insertions(+), 11 deletions(-)
> 
> diff --git a/include/linux/fault-inject.h b/include/linux/fault-inject.h
> index 9f6e25467844..444236dadcf0 100644
> --- a/include/linux/fault-inject.h
> +++ b/include/linux/fault-inject.h
> @@ -20,7 +20,6 @@ struct fault_attr {
>  	atomic_t space;
>  	unsigned long verbose;
>  	bool task_filter;
> -	bool no_warn;
>  	unsigned long stacktrace_depth;
>  	unsigned long require_start;
>  	unsigned long require_end;
> @@ -32,6 +31,10 @@ struct fault_attr {
>  	struct dentry *dname;
>  };
>  
> +enum fault_flags {
> +	FAULT_NOWARN =	1 << 0,
> +};
> +
>  #define FAULT_ATTR_INITIALIZER {					\
>  		.interval = 1,						\
>  		.times = ATOMIC_INIT(1),				\
> @@ -40,11 +43,11 @@ struct fault_attr {
>  		.ratelimit_state = RATELIMIT_STATE_INIT_DISABLED,	\
>  		.verbose = 2,						\
>  		.dname = NULL,						\
> -		.no_warn = false,					\

How about keep no_warn attr as it be, and export it to user?

When testing with fault injection, and each fault will print an backtrace.
but not all of the testsuit can tell us which one is fault injection
message or other is a real warning/crash like syzkaller do.

In my case, to make things simple, we usually used a regex to detect whether
wanring/error happend. So we disabled the slab/page fault warning message by
default, and only enable it when debug real issue.

Regards,


>  	}
>  
>  #define DECLARE_FAULT_ATTR(name) struct fault_attr name = FAULT_ATTR_INITIALIZER
>  int setup_fault_attr(struct fault_attr *attr, char *str);
> +bool should_fail_ex(struct fault_attr *attr, ssize_t size, int flags);
>  bool should_fail(struct fault_attr *attr, ssize_t size);
>  
>  #ifdef CONFIG_FAULT_INJECTION_DEBUG_FS
> diff --git a/lib/fault-inject.c b/lib/fault-inject.c
> index 4b8fafce415c..5971f7c3e49e 100644
> --- a/lib/fault-inject.c
> +++ b/lib/fault-inject.c
> @@ -41,9 +41,6 @@ EXPORT_SYMBOL_GPL(setup_fault_attr);
>  
>  static void fail_dump(struct fault_attr *attr)
>  {
> -	if (attr->no_warn)
> -		return;
> -
>  	if (attr->verbose > 0 && __ratelimit(&attr->ratelimit_state)) {
>  		printk(KERN_NOTICE "FAULT_INJECTION: forcing a failure.\n"
>  		       "name %pd, interval %lu, probability %lu, "
> @@ -103,7 +100,7 @@ static inline bool fail_stacktrace(struct fault_attr *attr)
>   * http://www.nongnu.org/failmalloc/
>   */
>  
> -bool should_fail(struct fault_attr *attr, ssize_t size)
> +bool should_fail_ex(struct fault_attr *attr, ssize_t size, int flags)
>  {
>  	bool stack_checked = false;
>  
> @@ -152,13 +149,20 @@ bool should_fail(struct fault_attr *attr, ssize_t size)
>  		return false;
>  
>  fail:
> -	fail_dump(attr);
> +	if (!(flags & FAULT_NOWARN))
> +		fail_dump(attr);
>  
>  	if (atomic_read(&attr->times) != -1)
>  		atomic_dec_not_zero(&attr->times);
>  
>  	return true;
>  }
> +EXPORT_SYMBOL_GPL(should_fail_ex);
> +
> +bool should_fail(struct fault_attr *attr, ssize_t size)
> +{
> +	return should_fail_ex(attr, size, 0);
> +}
>  EXPORT_SYMBOL_GPL(should_fail);
>  
>  #ifdef CONFIG_FAULT_INJECTION_DEBUG_FS
> diff --git a/mm/failslab.c b/mm/failslab.c
> index 58df9789f1d2..ffc420c0e767 100644
> --- a/mm/failslab.c
> +++ b/mm/failslab.c
> @@ -16,6 +16,8 @@ static struct {
>  
>  bool __should_failslab(struct kmem_cache *s, gfp_t gfpflags)
>  {
> +	int flags = 0;
> +
>  	/* No fault-injection for bootstrap cache */
>  	if (unlikely(s == kmem_cache))
>  		return false;
> @@ -30,10 +32,16 @@ bool __should_failslab(struct kmem_cache *s, gfp_t gfpflags)
>  	if (failslab.cache_filter && !(s->flags & SLAB_FAILSLAB))
>  		return false;
>  
> +	/*
> +	 * In some cases, it expects to specify __GFP_NOWARN
> +	 * to avoid printing any information(not just a warning),
> +	 * thus avoiding deadlocks. See commit 6b9dbedbe349 for
> +	 * details.
> +	 */
>  	if (gfpflags & __GFP_NOWARN)
> -		failslab.attr.no_warn = true;
> +		flags |= FAULT_NOWARN;
>  
> -	return should_fail(&failslab.attr, s->object_size);
> +	return should_fail_ex(&failslab.attr, s->object_size, flags);
>  }
>  
>  static int __init setup_failslab(char *str)
> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
> index 7192ded44ad0..cb6fe715d983 100644
> --- a/mm/page_alloc.c
> +++ b/mm/page_alloc.c
> @@ -3902,6 +3902,8 @@ __setup("fail_page_alloc=", setup_fail_page_alloc);
>  
>  static bool __should_fail_alloc_page(gfp_t gfp_mask, unsigned int order)
>  {
> +	int flags = 0;
> +
>  	if (order < fail_page_alloc.min_order)
>  		return false;
>  	if (gfp_mask & __GFP_NOFAIL)
> @@ -3912,10 +3914,11 @@ static bool __should_fail_alloc_page(gfp_t gfp_mask, unsigned int order)
>  			(gfp_mask & __GFP_DIRECT_RECLAIM))
>  		return false;
>  
> +	/* See comment in __should_failslab() */
>  	if (gfp_mask & __GFP_NOWARN)
> -		fail_page_alloc.attr.no_warn = true;
> +		flags |= FAULT_NOWARN;
>  
> -	return should_fail(&fail_page_alloc.attr, 1 << order);
> +	return should_fail_ex(&fail_page_alloc.attr, 1 << order, flags);
>  }
>  
>  #ifdef CONFIG_FAULT_INJECTION_DEBUG_FS
Qi Zheng Nov. 8, 2022, 8:58 a.m. UTC | #2
On 2022/11/8 16:44, Wei Yongjun wrote:
> Hi Zheng Qi,
> 
> On 2022/11/8 11:52, Qi Zheng wrote:
>> When we specify __GFP_NOWARN, we only expect that no warnings
>> will be issued for current caller. But in the __should_failslab()
>> and __should_fail_alloc_page(), the local GFP flags alter the
>> global {failslab|fail_page_alloc}.attr, which is persistent and
>> shared by all tasks. This is not what we expected, let's fix it.
>>
>> Cc: stable@vger.kernel.org
>> Fixes: 3f913fc5f974 ("mm: fix missing handler for __GFP_NOWARN")
>> Reported-by: Dmitry Vyukov <dvyukov@google.com>
>> Signed-off-by: Qi Zheng <zhengqi.arch@bytedance.com>
>> ---
>>   v1: https://lore.kernel.org/lkml/20221107033109.59709-1-zhengqi.arch@bytedance.com/
>>
>>   Changelog in v1 -> v2:
>>    - add comment for __should_failslab() and __should_fail_alloc_page()
>>      (suggested by Jason)
>>
>>   include/linux/fault-inject.h |  7 +++++--
>>   lib/fault-inject.c           | 14 +++++++++-----
>>   mm/failslab.c                | 12 ++++++++++--
>>   mm/page_alloc.c              |  7 +++++--
>>   4 files changed, 29 insertions(+), 11 deletions(-)
>>
>> diff --git a/include/linux/fault-inject.h b/include/linux/fault-inject.h
>> index 9f6e25467844..444236dadcf0 100644
>> --- a/include/linux/fault-inject.h
>> +++ b/include/linux/fault-inject.h
>> @@ -20,7 +20,6 @@ struct fault_attr {
>>   	atomic_t space;
>>   	unsigned long verbose;
>>   	bool task_filter;
>> -	bool no_warn;
>>   	unsigned long stacktrace_depth;
>>   	unsigned long require_start;
>>   	unsigned long require_end;
>> @@ -32,6 +31,10 @@ struct fault_attr {
>>   	struct dentry *dname;
>>   };
>>   
>> +enum fault_flags {
>> +	FAULT_NOWARN =	1 << 0,
>> +};
>> +
>>   #define FAULT_ATTR_INITIALIZER {					\
>>   		.interval = 1,						\
>>   		.times = ATOMIC_INIT(1),				\
>> @@ -40,11 +43,11 @@ struct fault_attr {
>>   		.ratelimit_state = RATELIMIT_STATE_INIT_DISABLED,	\
>>   		.verbose = 2,						\
>>   		.dname = NULL,						\
>> -		.no_warn = false,					\
> 
> How about keep no_warn attr as it be, and export it to user?
> 
> When testing with fault injection, and each fault will print an backtrace.
> but not all of the testsuit can tell us which one is fault injection
> message or other is a real warning/crash like syzkaller do.
> 
> In my case, to make things simple, we usually used a regex to detect whether
> wanring/error happend. So we disabled the slab/page fault warning message by
> default, and only enable it when debug real issue.

So you want to set/clear this no_warn attr through the procfs or sysfs
interface, so that you can easily disable/enable the slab/page fault
warning message from the user mode. Right?

Seems reasonable to me. Anyone else has an opinion on this? If it is
really needed, I can do it later.

Thanks,
Qi

> 
> Regards,
> 
> 
>>   	}
>>   
>>   #define DECLARE_FAULT_ATTR(name) struct fault_attr name = FAULT_ATTR_INITIALIZER
>>   int setup_fault_attr(struct fault_attr *attr, char *str);
>> +bool should_fail_ex(struct fault_attr *attr, ssize_t size, int flags);
>>   bool should_fail(struct fault_attr *attr, ssize_t size);
>>   
>>   #ifdef CONFIG_FAULT_INJECTION_DEBUG_FS
>> diff --git a/lib/fault-inject.c b/lib/fault-inject.c
>> index 4b8fafce415c..5971f7c3e49e 100644
>> --- a/lib/fault-inject.c
>> +++ b/lib/fault-inject.c
>> @@ -41,9 +41,6 @@ EXPORT_SYMBOL_GPL(setup_fault_attr);
>>   
>>   static void fail_dump(struct fault_attr *attr)
>>   {
>> -	if (attr->no_warn)
>> -		return;
>> -
>>   	if (attr->verbose > 0 && __ratelimit(&attr->ratelimit_state)) {
>>   		printk(KERN_NOTICE "FAULT_INJECTION: forcing a failure.\n"
>>   		       "name %pd, interval %lu, probability %lu, "
>> @@ -103,7 +100,7 @@ static inline bool fail_stacktrace(struct fault_attr *attr)
>>    * http://www.nongnu.org/failmalloc/
>>    */
>>   
>> -bool should_fail(struct fault_attr *attr, ssize_t size)
>> +bool should_fail_ex(struct fault_attr *attr, ssize_t size, int flags)
>>   {
>>   	bool stack_checked = false;
>>   
>> @@ -152,13 +149,20 @@ bool should_fail(struct fault_attr *attr, ssize_t size)
>>   		return false;
>>   
>>   fail:
>> -	fail_dump(attr);
>> +	if (!(flags & FAULT_NOWARN))
>> +		fail_dump(attr);
>>   
>>   	if (atomic_read(&attr->times) != -1)
>>   		atomic_dec_not_zero(&attr->times);
>>   
>>   	return true;
>>   }
>> +EXPORT_SYMBOL_GPL(should_fail_ex);
>> +
>> +bool should_fail(struct fault_attr *attr, ssize_t size)
>> +{
>> +	return should_fail_ex(attr, size, 0);
>> +}
>>   EXPORT_SYMBOL_GPL(should_fail);
>>   
>>   #ifdef CONFIG_FAULT_INJECTION_DEBUG_FS
>> diff --git a/mm/failslab.c b/mm/failslab.c
>> index 58df9789f1d2..ffc420c0e767 100644
>> --- a/mm/failslab.c
>> +++ b/mm/failslab.c
>> @@ -16,6 +16,8 @@ static struct {
>>   
>>   bool __should_failslab(struct kmem_cache *s, gfp_t gfpflags)
>>   {
>> +	int flags = 0;
>> +
>>   	/* No fault-injection for bootstrap cache */
>>   	if (unlikely(s == kmem_cache))
>>   		return false;
>> @@ -30,10 +32,16 @@ bool __should_failslab(struct kmem_cache *s, gfp_t gfpflags)
>>   	if (failslab.cache_filter && !(s->flags & SLAB_FAILSLAB))
>>   		return false;
>>   
>> +	/*
>> +	 * In some cases, it expects to specify __GFP_NOWARN
>> +	 * to avoid printing any information(not just a warning),
>> +	 * thus avoiding deadlocks. See commit 6b9dbedbe349 for
>> +	 * details.
>> +	 */
>>   	if (gfpflags & __GFP_NOWARN)
>> -		failslab.attr.no_warn = true;
>> +		flags |= FAULT_NOWARN;
>>   
>> -	return should_fail(&failslab.attr, s->object_size);
>> +	return should_fail_ex(&failslab.attr, s->object_size, flags);
>>   }
>>   
>>   static int __init setup_failslab(char *str)
>> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
>> index 7192ded44ad0..cb6fe715d983 100644
>> --- a/mm/page_alloc.c
>> +++ b/mm/page_alloc.c
>> @@ -3902,6 +3902,8 @@ __setup("fail_page_alloc=", setup_fail_page_alloc);
>>   
>>   static bool __should_fail_alloc_page(gfp_t gfp_mask, unsigned int order)
>>   {
>> +	int flags = 0;
>> +
>>   	if (order < fail_page_alloc.min_order)
>>   		return false;
>>   	if (gfp_mask & __GFP_NOFAIL)
>> @@ -3912,10 +3914,11 @@ static bool __should_fail_alloc_page(gfp_t gfp_mask, unsigned int order)
>>   			(gfp_mask & __GFP_DIRECT_RECLAIM))
>>   		return false;
>>   
>> +	/* See comment in __should_failslab() */
>>   	if (gfp_mask & __GFP_NOWARN)
>> -		fail_page_alloc.attr.no_warn = true;
>> +		flags |= FAULT_NOWARN;
>>   
>> -	return should_fail(&fail_page_alloc.attr, 1 << order);
>> +	return should_fail_ex(&fail_page_alloc.attr, 1 << order, flags);
>>   }
>>   
>>   #ifdef CONFIG_FAULT_INJECTION_DEBUG_FS
Wei Yongjun Nov. 8, 2022, 9:32 a.m. UTC | #3
On 2022/11/8 16:58, Qi Zheng wrote:
> 
> 
> On 2022/11/8 16:44, Wei Yongjun wrote:
>> Hi Zheng Qi,
>>
>> On 2022/11/8 11:52, Qi Zheng wrote:
>>> When we specify __GFP_NOWARN, we only expect that no warnings
>>> will be issued for current caller. But in the __should_failslab()
>>> and __should_fail_alloc_page(), the local GFP flags alter the
>>> global {failslab|fail_page_alloc}.attr, which is persistent and
>>> shared by all tasks. This is not what we expected, let's fix it.
>>>
>>> Cc: stable@vger.kernel.org
>>> Fixes: 3f913fc5f974 ("mm: fix missing handler for __GFP_NOWARN")
>>> Reported-by: Dmitry Vyukov <dvyukov@google.com>
>>> Signed-off-by: Qi Zheng <zhengqi.arch@bytedance.com>
>>> ---
>>>   v1: https://lore.kernel.org/lkml/20221107033109.59709-1-zhengqi.arch@bytedance.com/
>>>
>>>   Changelog in v1 -> v2:
>>>    - add comment for __should_failslab() and __should_fail_alloc_page()
>>>      (suggested by Jason)
>>>
>>>   include/linux/fault-inject.h |  7 +++++--
>>>   lib/fault-inject.c           | 14 +++++++++-----
>>>   mm/failslab.c                | 12 ++++++++++--
>>>   mm/page_alloc.c              |  7 +++++--
>>>   4 files changed, 29 insertions(+), 11 deletions(-)
>>>
>>> diff --git a/include/linux/fault-inject.h b/include/linux/fault-inject.h
>>> index 9f6e25467844..444236dadcf0 100644
>>> --- a/include/linux/fault-inject.h
>>> +++ b/include/linux/fault-inject.h
>>> @@ -20,7 +20,6 @@ struct fault_attr {
>>>       atomic_t space;
>>>       unsigned long verbose;
>>>       bool task_filter;
>>> -    bool no_warn;
>>>       unsigned long stacktrace_depth;
>>>       unsigned long require_start;
>>>       unsigned long require_end;
>>> @@ -32,6 +31,10 @@ struct fault_attr {
>>>       struct dentry *dname;
>>>   };
>>>   +enum fault_flags {
>>> +    FAULT_NOWARN =    1 << 0,
>>> +};
>>> +
>>>   #define FAULT_ATTR_INITIALIZER {                    \
>>>           .interval = 1,                        \
>>>           .times = ATOMIC_INIT(1),                \
>>> @@ -40,11 +43,11 @@ struct fault_attr {
>>>           .ratelimit_state = RATELIMIT_STATE_INIT_DISABLED,    \
>>>           .verbose = 2,                        \
>>>           .dname = NULL,                        \
>>> -        .no_warn = false,                    \
>>
>> How about keep no_warn attr as it be, and export it to user?
>>
>> When testing with fault injection, and each fault will print an backtrace.
>> but not all of the testsuit can tell us which one is fault injection
>> message or other is a real warning/crash like syzkaller do.
>>
>> In my case, to make things simple, we usually used a regex to detect whether
>> wanring/error happend. So we disabled the slab/page fault warning message by
>> default, and only enable it when debug real issue.
> 
> So you want to set/clear this no_warn attr through the procfs or sysfs
> interface, so that you can easily disable/enable the slab/page fault
> warning message from the user mode. Right?

Yes, just like:

echo 1 > /sys/kernel/debug/failslab/no_warn  #disable message
echo 0 > /sys/kernel/debug/failslab/no_warn  #enable message

Regards
Wei Yongjun

> 
> Seems reasonable to me. Anyone else has an opinion on this? If it is
> really needed, I can do it later.
> 
> Thanks,
> Qi
> 
>>
>> Regards,
>>
>>
>>>       }
>>>     #define DECLARE_FAULT_ATTR(name) struct fault_attr name = FAULT_ATTR_INITIALIZER
>>>   int setup_fault_attr(struct fault_attr *attr, char *str);
>>> +bool should_fail_ex(struct fault_attr *attr, ssize_t size, int flags);
>>>   bool should_fail(struct fault_attr *attr, ssize_t size);
>>>     #ifdef CONFIG_FAULT_INJECTION_DEBUG_FS
>>> diff --git a/lib/fault-inject.c b/lib/fault-inject.c
>>> index 4b8fafce415c..5971f7c3e49e 100644
>>> --- a/lib/fault-inject.c
>>> +++ b/lib/fault-inject.c
>>> @@ -41,9 +41,6 @@ EXPORT_SYMBOL_GPL(setup_fault_attr);
>>>     static void fail_dump(struct fault_attr *attr)
>>>   {
>>> -    if (attr->no_warn)
>>> -        return;
>>> -
>>>       if (attr->verbose > 0 && __ratelimit(&attr->ratelimit_state)) {
>>>           printk(KERN_NOTICE "FAULT_INJECTION: forcing a failure.\n"
>>>                  "name %pd, interval %lu, probability %lu, "
>>> @@ -103,7 +100,7 @@ static inline bool fail_stacktrace(struct fault_attr *attr)
>>>    * http://www.nongnu.org/failmalloc/
>>>    */
>>>   -bool should_fail(struct fault_attr *attr, ssize_t size)
>>> +bool should_fail_ex(struct fault_attr *attr, ssize_t size, int flags)
>>>   {
>>>       bool stack_checked = false;
>>>   @@ -152,13 +149,20 @@ bool should_fail(struct fault_attr *attr, ssize_t size)
>>>           return false;
>>>     fail:
>>> -    fail_dump(attr);
>>> +    if (!(flags & FAULT_NOWARN))
>>> +        fail_dump(attr);
>>>         if (atomic_read(&attr->times) != -1)
>>>           atomic_dec_not_zero(&attr->times);
>>>         return true;
>>>   }
>>> +EXPORT_SYMBOL_GPL(should_fail_ex);
>>> +
>>> +bool should_fail(struct fault_attr *attr, ssize_t size)
>>> +{
>>> +    return should_fail_ex(attr, size, 0);
>>> +}
>>>   EXPORT_SYMBOL_GPL(should_fail);
>>>     #ifdef CONFIG_FAULT_INJECTION_DEBUG_FS
>>> diff --git a/mm/failslab.c b/mm/failslab.c
>>> index 58df9789f1d2..ffc420c0e767 100644
>>> --- a/mm/failslab.c
>>> +++ b/mm/failslab.c
>>> @@ -16,6 +16,8 @@ static struct {
>>>     bool __should_failslab(struct kmem_cache *s, gfp_t gfpflags)
>>>   {
>>> +    int flags = 0;
>>> +
>>>       /* No fault-injection for bootstrap cache */
>>>       if (unlikely(s == kmem_cache))
>>>           return false;
>>> @@ -30,10 +32,16 @@ bool __should_failslab(struct kmem_cache *s, gfp_t gfpflags)
>>>       if (failslab.cache_filter && !(s->flags & SLAB_FAILSLAB))
>>>           return false;
>>>   +    /*
>>> +     * In some cases, it expects to specify __GFP_NOWARN
>>> +     * to avoid printing any information(not just a warning),
>>> +     * thus avoiding deadlocks. See commit 6b9dbedbe349 for
>>> +     * details.
>>> +     */
>>>       if (gfpflags & __GFP_NOWARN)
>>> -        failslab.attr.no_warn = true;
>>> +        flags |= FAULT_NOWARN;
>>>   -    return should_fail(&failslab.attr, s->object_size);
>>> +    return should_fail_ex(&failslab.attr, s->object_size, flags);
>>>   }
>>>     static int __init setup_failslab(char *str)
>>> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
>>> index 7192ded44ad0..cb6fe715d983 100644
>>> --- a/mm/page_alloc.c
>>> +++ b/mm/page_alloc.c
>>> @@ -3902,6 +3902,8 @@ __setup("fail_page_alloc=", setup_fail_page_alloc);
>>>     static bool __should_fail_alloc_page(gfp_t gfp_mask, unsigned int order)
>>>   {
>>> +    int flags = 0;
>>> +
>>>       if (order < fail_page_alloc.min_order)
>>>           return false;
>>>       if (gfp_mask & __GFP_NOFAIL)
>>> @@ -3912,10 +3914,11 @@ static bool __should_fail_alloc_page(gfp_t gfp_mask, unsigned int order)
>>>               (gfp_mask & __GFP_DIRECT_RECLAIM))
>>>           return false;
>>>   +    /* See comment in __should_failslab() */
>>>       if (gfp_mask & __GFP_NOWARN)
>>> -        fail_page_alloc.attr.no_warn = true;
>>> +        flags |= FAULT_NOWARN;
>>>   -    return should_fail(&fail_page_alloc.attr, 1 << order);
>>> +    return should_fail_ex(&fail_page_alloc.attr, 1 << order, flags);
>>>   }
>>>     #ifdef CONFIG_FAULT_INJECTION_DEBUG_FS
>
Qi Zheng Nov. 8, 2022, 9:45 a.m. UTC | #4
On 2022/11/8 17:32, Wei Yongjun wrote:
> 
> 
> On 2022/11/8 16:58, Qi Zheng wrote:
>>
>>
>> On 2022/11/8 16:44, Wei Yongjun wrote:
>>> Hi Zheng Qi,
>>>
>>> On 2022/11/8 11:52, Qi Zheng wrote:
>>>> When we specify __GFP_NOWARN, we only expect that no warnings
>>>> will be issued for current caller. But in the __should_failslab()
>>>> and __should_fail_alloc_page(), the local GFP flags alter the
>>>> global {failslab|fail_page_alloc}.attr, which is persistent and
>>>> shared by all tasks. This is not what we expected, let's fix it.
>>>>
>>>> Cc: stable@vger.kernel.org
>>>> Fixes: 3f913fc5f974 ("mm: fix missing handler for __GFP_NOWARN")
>>>> Reported-by: Dmitry Vyukov <dvyukov@google.com>
>>>> Signed-off-by: Qi Zheng <zhengqi.arch@bytedance.com>
>>>> ---
>>>>    v1: https://lore.kernel.org/lkml/20221107033109.59709-1-zhengqi.arch@bytedance.com/
>>>>
>>>>    Changelog in v1 -> v2:
>>>>     - add comment for __should_failslab() and __should_fail_alloc_page()
>>>>       (suggested by Jason)
>>>>
>>>>    include/linux/fault-inject.h |  7 +++++--
>>>>    lib/fault-inject.c           | 14 +++++++++-----
>>>>    mm/failslab.c                | 12 ++++++++++--
>>>>    mm/page_alloc.c              |  7 +++++--
>>>>    4 files changed, 29 insertions(+), 11 deletions(-)
>>>>
>>>> diff --git a/include/linux/fault-inject.h b/include/linux/fault-inject.h
>>>> index 9f6e25467844..444236dadcf0 100644
>>>> --- a/include/linux/fault-inject.h
>>>> +++ b/include/linux/fault-inject.h
>>>> @@ -20,7 +20,6 @@ struct fault_attr {
>>>>        atomic_t space;
>>>>        unsigned long verbose;
>>>>        bool task_filter;
>>>> -    bool no_warn;
>>>>        unsigned long stacktrace_depth;
>>>>        unsigned long require_start;
>>>>        unsigned long require_end;
>>>> @@ -32,6 +31,10 @@ struct fault_attr {
>>>>        struct dentry *dname;
>>>>    };
>>>>    +enum fault_flags {
>>>> +    FAULT_NOWARN =    1 << 0,
>>>> +};
>>>> +
>>>>    #define FAULT_ATTR_INITIALIZER {                    \
>>>>            .interval = 1,                        \
>>>>            .times = ATOMIC_INIT(1),                \
>>>> @@ -40,11 +43,11 @@ struct fault_attr {
>>>>            .ratelimit_state = RATELIMIT_STATE_INIT_DISABLED,    \
>>>>            .verbose = 2,                        \
>>>>            .dname = NULL,                        \
>>>> -        .no_warn = false,                    \
>>>
>>> How about keep no_warn attr as it be, and export it to user?
>>>
>>> When testing with fault injection, and each fault will print an backtrace.
>>> but not all of the testsuit can tell us which one is fault injection
>>> message or other is a real warning/crash like syzkaller do.
>>>
>>> In my case, to make things simple, we usually used a regex to detect whether
>>> wanring/error happend. So we disabled the slab/page fault warning message by
>>> default, and only enable it when debug real issue.
>>
>> So you want to set/clear this no_warn attr through the procfs or sysfs
>> interface, so that you can easily disable/enable the slab/page fault
>> warning message from the user mode. Right?
> 
> Yes, just like:
> 
> echo 1 > /sys/kernel/debug/failslab/no_warn  #disable message
> echo 0 > /sys/kernel/debug/failslab/no_warn  #enable message

Got it. Let's wait for the other people's comments and suggestions. :)

> 
> Regards
> Wei Yongjun
> 
>>
>> Seems reasonable to me. Anyone else has an opinion on this? If it is
>> really needed, I can do it later.
>>
>> Thanks,
>> Qi
>>
>>>
>>> Regards,
>>>
>>>
>>>>        }
>>>>      #define DECLARE_FAULT_ATTR(name) struct fault_attr name = FAULT_ATTR_INITIALIZER
>>>>    int setup_fault_attr(struct fault_attr *attr, char *str);
>>>> +bool should_fail_ex(struct fault_attr *attr, ssize_t size, int flags);
>>>>    bool should_fail(struct fault_attr *attr, ssize_t size);
>>>>      #ifdef CONFIG_FAULT_INJECTION_DEBUG_FS
>>>> diff --git a/lib/fault-inject.c b/lib/fault-inject.c
>>>> index 4b8fafce415c..5971f7c3e49e 100644
>>>> --- a/lib/fault-inject.c
>>>> +++ b/lib/fault-inject.c
>>>> @@ -41,9 +41,6 @@ EXPORT_SYMBOL_GPL(setup_fault_attr);
>>>>      static void fail_dump(struct fault_attr *attr)
>>>>    {
>>>> -    if (attr->no_warn)
>>>> -        return;
>>>> -
>>>>        if (attr->verbose > 0 && __ratelimit(&attr->ratelimit_state)) {
>>>>            printk(KERN_NOTICE "FAULT_INJECTION: forcing a failure.\n"
>>>>                   "name %pd, interval %lu, probability %lu, "
>>>> @@ -103,7 +100,7 @@ static inline bool fail_stacktrace(struct fault_attr *attr)
>>>>     * http://www.nongnu.org/failmalloc/
>>>>     */
>>>>    -bool should_fail(struct fault_attr *attr, ssize_t size)
>>>> +bool should_fail_ex(struct fault_attr *attr, ssize_t size, int flags)
>>>>    {
>>>>        bool stack_checked = false;
>>>>    @@ -152,13 +149,20 @@ bool should_fail(struct fault_attr *attr, ssize_t size)
>>>>            return false;
>>>>      fail:
>>>> -    fail_dump(attr);
>>>> +    if (!(flags & FAULT_NOWARN))
>>>> +        fail_dump(attr);
>>>>          if (atomic_read(&attr->times) != -1)
>>>>            atomic_dec_not_zero(&attr->times);
>>>>          return true;
>>>>    }
>>>> +EXPORT_SYMBOL_GPL(should_fail_ex);
>>>> +
>>>> +bool should_fail(struct fault_attr *attr, ssize_t size)
>>>> +{
>>>> +    return should_fail_ex(attr, size, 0);
>>>> +}
>>>>    EXPORT_SYMBOL_GPL(should_fail);
>>>>      #ifdef CONFIG_FAULT_INJECTION_DEBUG_FS
>>>> diff --git a/mm/failslab.c b/mm/failslab.c
>>>> index 58df9789f1d2..ffc420c0e767 100644
>>>> --- a/mm/failslab.c
>>>> +++ b/mm/failslab.c
>>>> @@ -16,6 +16,8 @@ static struct {
>>>>      bool __should_failslab(struct kmem_cache *s, gfp_t gfpflags)
>>>>    {
>>>> +    int flags = 0;
>>>> +
>>>>        /* No fault-injection for bootstrap cache */
>>>>        if (unlikely(s == kmem_cache))
>>>>            return false;
>>>> @@ -30,10 +32,16 @@ bool __should_failslab(struct kmem_cache *s, gfp_t gfpflags)
>>>>        if (failslab.cache_filter && !(s->flags & SLAB_FAILSLAB))
>>>>            return false;
>>>>    +    /*
>>>> +     * In some cases, it expects to specify __GFP_NOWARN
>>>> +     * to avoid printing any information(not just a warning),
>>>> +     * thus avoiding deadlocks. See commit 6b9dbedbe349 for
>>>> +     * details.
>>>> +     */
>>>>        if (gfpflags & __GFP_NOWARN)
>>>> -        failslab.attr.no_warn = true;
>>>> +        flags |= FAULT_NOWARN;
>>>>    -    return should_fail(&failslab.attr, s->object_size);
>>>> +    return should_fail_ex(&failslab.attr, s->object_size, flags);
>>>>    }
>>>>      static int __init setup_failslab(char *str)
>>>> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
>>>> index 7192ded44ad0..cb6fe715d983 100644
>>>> --- a/mm/page_alloc.c
>>>> +++ b/mm/page_alloc.c
>>>> @@ -3902,6 +3902,8 @@ __setup("fail_page_alloc=", setup_fail_page_alloc);
>>>>      static bool __should_fail_alloc_page(gfp_t gfp_mask, unsigned int order)
>>>>    {
>>>> +    int flags = 0;
>>>> +
>>>>        if (order < fail_page_alloc.min_order)
>>>>            return false;
>>>>        if (gfp_mask & __GFP_NOFAIL)
>>>> @@ -3912,10 +3914,11 @@ static bool __should_fail_alloc_page(gfp_t gfp_mask, unsigned int order)
>>>>                (gfp_mask & __GFP_DIRECT_RECLAIM))
>>>>            return false;
>>>>    +    /* See comment in __should_failslab() */
>>>>        if (gfp_mask & __GFP_NOWARN)
>>>> -        fail_page_alloc.attr.no_warn = true;
>>>> +        flags |= FAULT_NOWARN;
>>>>    -    return should_fail(&fail_page_alloc.attr, 1 << order);
>>>> +    return should_fail_ex(&fail_page_alloc.attr, 1 << order, flags);
>>>>    }
>>>>      #ifdef CONFIG_FAULT_INJECTION_DEBUG_FS
>>
Jason Gunthorpe Nov. 8, 2022, 12:04 p.m. UTC | #5
On Tue, Nov 08, 2022 at 05:32:52PM +0800, Wei Yongjun wrote:
> > So you want to set/clear this no_warn attr through the procfs or sysfs
> > interface, so that you can easily disable/enable the slab/page fault
> > warning message from the user mode. Right?
> 
> Yes, just like:
> 
> echo 1 > /sys/kernel/debug/failslab/no_warn  #disable message
> echo 0 > /sys/kernel/debug/failslab/no_warn  #enable message

You can already do that:

 echo 0 > /sys/kernel/debug/failslab/verbose  #disable message

Jason
Akinobu Mita Nov. 8, 2022, 5:36 p.m. UTC | #6
2022年11月8日(火) 12:52 Qi Zheng <zhengqi.arch@bytedance.com>:
>
> When we specify __GFP_NOWARN, we only expect that no warnings
> will be issued for current caller. But in the __should_failslab()
> and __should_fail_alloc_page(), the local GFP flags alter the
> global {failslab|fail_page_alloc}.attr, which is persistent and
> shared by all tasks. This is not what we expected, let's fix it.
>
> Cc: stable@vger.kernel.org
> Fixes: 3f913fc5f974 ("mm: fix missing handler for __GFP_NOWARN")
> Reported-by: Dmitry Vyukov <dvyukov@google.com>
> Signed-off-by: Qi Zheng <zhengqi.arch@bytedance.com>
> ---
>  v1: https://lore.kernel.org/lkml/20221107033109.59709-1-zhengqi.arch@bytedance.com/
>
>  Changelog in v1 -> v2:
>   - add comment for __should_failslab() and __should_fail_alloc_page()
>     (suggested by Jason)

Looks good.

Reviewed-by: Akinobu Mita <akinobu.mita@gmail.com>
Wei Yongjun Nov. 9, 2022, 3:57 a.m. UTC | #7
On 2022/11/8 20:04, Jason Gunthorpe wrote:
> On Tue, Nov 08, 2022 at 05:32:52PM +0800, Wei Yongjun wrote:
>>> So you want to set/clear this no_warn attr through the procfs or sysfs
>>> interface, so that you can easily disable/enable the slab/page fault
>>> warning message from the user mode. Right?
>>
>> Yes, just like:
>>
>> echo 1 > /sys/kernel/debug/failslab/no_warn  #disable message
>> echo 0 > /sys/kernel/debug/failslab/no_warn  #enable message
> 
> You can already do that:
> 
>  echo 0 > /sys/kernel/debug/failslab/verbose  #disable message

Got it, thanks.

Wei Yongjun
Qi Zheng Nov. 14, 2022, 3:59 a.m. UTC | #8
On 2022/11/9 01:36, Akinobu Mita wrote:
> 2022年11月8日(火) 12:52 Qi Zheng <zhengqi.arch@bytedance.com>:
>>
>> When we specify __GFP_NOWARN, we only expect that no warnings
>> will be issued for current caller. But in the __should_failslab()
>> and __should_fail_alloc_page(), the local GFP flags alter the
>> global {failslab|fail_page_alloc}.attr, which is persistent and
>> shared by all tasks. This is not what we expected, let's fix it.
>>
>> Cc: stable@vger.kernel.org
>> Fixes: 3f913fc5f974 ("mm: fix missing handler for __GFP_NOWARN")
>> Reported-by: Dmitry Vyukov <dvyukov@google.com>
>> Signed-off-by: Qi Zheng <zhengqi.arch@bytedance.com>
>> ---
>>   v1: https://lore.kernel.org/lkml/20221107033109.59709-1-zhengqi.arch@bytedance.com/
>>
>>   Changelog in v1 -> v2:
>>    - add comment for __should_failslab() and __should_fail_alloc_page()
>>      (suggested by Jason)
> 
> Looks good.
> 
> Reviewed-by: Akinobu Mita <akinobu.mita@gmail.com>

Thanks. And hi Andrew, seems no action left for me, can this patch
be applied to mm-unstable tree for testing? :)
diff mbox series

Patch

diff --git a/include/linux/fault-inject.h b/include/linux/fault-inject.h
index 9f6e25467844..444236dadcf0 100644
--- a/include/linux/fault-inject.h
+++ b/include/linux/fault-inject.h
@@ -20,7 +20,6 @@  struct fault_attr {
 	atomic_t space;
 	unsigned long verbose;
 	bool task_filter;
-	bool no_warn;
 	unsigned long stacktrace_depth;
 	unsigned long require_start;
 	unsigned long require_end;
@@ -32,6 +31,10 @@  struct fault_attr {
 	struct dentry *dname;
 };
 
+enum fault_flags {
+	FAULT_NOWARN =	1 << 0,
+};
+
 #define FAULT_ATTR_INITIALIZER {					\
 		.interval = 1,						\
 		.times = ATOMIC_INIT(1),				\
@@ -40,11 +43,11 @@  struct fault_attr {
 		.ratelimit_state = RATELIMIT_STATE_INIT_DISABLED,	\
 		.verbose = 2,						\
 		.dname = NULL,						\
-		.no_warn = false,					\
 	}
 
 #define DECLARE_FAULT_ATTR(name) struct fault_attr name = FAULT_ATTR_INITIALIZER
 int setup_fault_attr(struct fault_attr *attr, char *str);
+bool should_fail_ex(struct fault_attr *attr, ssize_t size, int flags);
 bool should_fail(struct fault_attr *attr, ssize_t size);
 
 #ifdef CONFIG_FAULT_INJECTION_DEBUG_FS
diff --git a/lib/fault-inject.c b/lib/fault-inject.c
index 4b8fafce415c..5971f7c3e49e 100644
--- a/lib/fault-inject.c
+++ b/lib/fault-inject.c
@@ -41,9 +41,6 @@  EXPORT_SYMBOL_GPL(setup_fault_attr);
 
 static void fail_dump(struct fault_attr *attr)
 {
-	if (attr->no_warn)
-		return;
-
 	if (attr->verbose > 0 && __ratelimit(&attr->ratelimit_state)) {
 		printk(KERN_NOTICE "FAULT_INJECTION: forcing a failure.\n"
 		       "name %pd, interval %lu, probability %lu, "
@@ -103,7 +100,7 @@  static inline bool fail_stacktrace(struct fault_attr *attr)
  * http://www.nongnu.org/failmalloc/
  */
 
-bool should_fail(struct fault_attr *attr, ssize_t size)
+bool should_fail_ex(struct fault_attr *attr, ssize_t size, int flags)
 {
 	bool stack_checked = false;
 
@@ -152,13 +149,20 @@  bool should_fail(struct fault_attr *attr, ssize_t size)
 		return false;
 
 fail:
-	fail_dump(attr);
+	if (!(flags & FAULT_NOWARN))
+		fail_dump(attr);
 
 	if (atomic_read(&attr->times) != -1)
 		atomic_dec_not_zero(&attr->times);
 
 	return true;
 }
+EXPORT_SYMBOL_GPL(should_fail_ex);
+
+bool should_fail(struct fault_attr *attr, ssize_t size)
+{
+	return should_fail_ex(attr, size, 0);
+}
 EXPORT_SYMBOL_GPL(should_fail);
 
 #ifdef CONFIG_FAULT_INJECTION_DEBUG_FS
diff --git a/mm/failslab.c b/mm/failslab.c
index 58df9789f1d2..ffc420c0e767 100644
--- a/mm/failslab.c
+++ b/mm/failslab.c
@@ -16,6 +16,8 @@  static struct {
 
 bool __should_failslab(struct kmem_cache *s, gfp_t gfpflags)
 {
+	int flags = 0;
+
 	/* No fault-injection for bootstrap cache */
 	if (unlikely(s == kmem_cache))
 		return false;
@@ -30,10 +32,16 @@  bool __should_failslab(struct kmem_cache *s, gfp_t gfpflags)
 	if (failslab.cache_filter && !(s->flags & SLAB_FAILSLAB))
 		return false;
 
+	/*
+	 * In some cases, it expects to specify __GFP_NOWARN
+	 * to avoid printing any information(not just a warning),
+	 * thus avoiding deadlocks. See commit 6b9dbedbe349 for
+	 * details.
+	 */
 	if (gfpflags & __GFP_NOWARN)
-		failslab.attr.no_warn = true;
+		flags |= FAULT_NOWARN;
 
-	return should_fail(&failslab.attr, s->object_size);
+	return should_fail_ex(&failslab.attr, s->object_size, flags);
 }
 
 static int __init setup_failslab(char *str)
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 7192ded44ad0..cb6fe715d983 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -3902,6 +3902,8 @@  __setup("fail_page_alloc=", setup_fail_page_alloc);
 
 static bool __should_fail_alloc_page(gfp_t gfp_mask, unsigned int order)
 {
+	int flags = 0;
+
 	if (order < fail_page_alloc.min_order)
 		return false;
 	if (gfp_mask & __GFP_NOFAIL)
@@ -3912,10 +3914,11 @@  static bool __should_fail_alloc_page(gfp_t gfp_mask, unsigned int order)
 			(gfp_mask & __GFP_DIRECT_RECLAIM))
 		return false;
 
+	/* See comment in __should_failslab() */
 	if (gfp_mask & __GFP_NOWARN)
-		fail_page_alloc.attr.no_warn = true;
+		flags |= FAULT_NOWARN;
 
-	return should_fail(&fail_page_alloc.attr, 1 << order);
+	return should_fail_ex(&fail_page_alloc.attr, 1 << order, flags);
 }
 
 #ifdef CONFIG_FAULT_INJECTION_DEBUG_FS