diff mbox series

drm/i915/gem: Suppress oom warning in favour of ENOMEM to userspace

Message ID 20240626143318.11600-1-nirmoy.das@intel.com (mailing list archive)
State New, archived
Headers show
Series drm/i915/gem: Suppress oom warning in favour of ENOMEM to userspace | expand

Commit Message

Nirmoy Das June 26, 2024, 2:33 p.m. UTC
We report object allocation failures to userspace with ENOMEM
so add __GFP_NOWARN to remove superfluous oom warnings.

Closes: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/4936
Cc: Andi Shyti <andi.shyti@linux.intel.com>
Signed-off-by: Nirmoy Das <nirmoy.das@intel.com>
---
 drivers/gpu/drm/i915/i915_scatterlist.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

Comments

Rodrigo Vivi June 26, 2024, 3:24 p.m. UTC | #1
On Wed, Jun 26, 2024 at 04:33:18PM +0200, Nirmoy Das wrote:
> We report object allocation failures to userspace with ENOMEM
> so add __GFP_NOWARN to remove superfluous oom warnings.
> 
> Closes: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/4936
> Cc: Andi Shyti <andi.shyti@linux.intel.com>
> Signed-off-by: Nirmoy Das <nirmoy.das@intel.com>
> ---
>  drivers/gpu/drm/i915/i915_scatterlist.c | 8 ++++----
>  1 file changed, 4 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_scatterlist.c b/drivers/gpu/drm/i915/i915_scatterlist.c
> index e93d2538f298..4d830740946d 100644
> --- a/drivers/gpu/drm/i915/i915_scatterlist.c
> +++ b/drivers/gpu/drm/i915/i915_scatterlist.c
> @@ -90,7 +90,7 @@ struct i915_refct_sgt *i915_rsgt_from_mm_node(const struct drm_mm_node *node,
>  
>  	GEM_BUG_ON(!max_segment);
>  
> -	rsgt = kmalloc(sizeof(*rsgt), GFP_KERNEL);
> +	rsgt = kmalloc(sizeof(*rsgt), GFP_KERNEL | __GFP_NOWARN);
>  	if (!rsgt)
>  		return ERR_PTR(-ENOMEM);

is it really safe?
I don't believe we can guarantee a good fallback plan here if allocation fails.
__i915_refct_sgt_init
might end up in a null dereference, no?!

>  
> @@ -104,7 +104,7 @@ struct i915_refct_sgt *i915_rsgt_from_mm_node(const struct drm_mm_node *node,
>  	}
>  
>  	if (sg_alloc_table(st, DIV_ROUND_UP_ULL(node->size, segment_pages),
> -			   GFP_KERNEL)) {
> +			   GFP_KERNEL | __GFP_NOWARN)) {
>  		i915_refct_sgt_put(rsgt);
>  		return ERR_PTR(-ENOMEM);
>  	}
> @@ -178,7 +178,7 @@ struct i915_refct_sgt *i915_rsgt_from_buddy_resource(struct ttm_resource *res,
>  	GEM_BUG_ON(list_empty(blocks));
>  	GEM_BUG_ON(!max_segment);
>  
> -	rsgt = kmalloc(sizeof(*rsgt), GFP_KERNEL);
> +	rsgt = kmalloc(sizeof(*rsgt), GFP_KERNEL | __GFP_NOWARN);
>  	if (!rsgt)
>  		return ERR_PTR(-ENOMEM);
>  
> @@ -190,7 +190,7 @@ struct i915_refct_sgt *i915_rsgt_from_buddy_resource(struct ttm_resource *res,
>  		return ERR_PTR(-E2BIG);
>  	}
>  
> -	if (sg_alloc_table(st, PFN_UP(res->size), GFP_KERNEL)) {
> +	if (sg_alloc_table(st, PFN_UP(res->size), GFP_KERNEL | __GFP_NOWARN)) {
>  		i915_refct_sgt_put(rsgt);
>  		return ERR_PTR(-ENOMEM);
>  	}
> -- 
> 2.42.0
>
Nirmoy Das June 26, 2024, 3:36 p.m. UTC | #2
Hi Rodrigo,

On 6/26/2024 5:24 PM, Rodrigo Vivi wrote:
> On Wed, Jun 26, 2024 at 04:33:18PM +0200, Nirmoy Das wrote:
>> We report object allocation failures to userspace with ENOMEM
>> so add __GFP_NOWARN to remove superfluous oom warnings.
>>
>> Closes:https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/4936
>> Cc: Andi Shyti<andi.shyti@linux.intel.com>
>> Signed-off-by: Nirmoy Das<nirmoy.das@intel.com>
>> ---
>>   drivers/gpu/drm/i915/i915_scatterlist.c | 8 ++++----
>>   1 file changed, 4 insertions(+), 4 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/i915_scatterlist.c b/drivers/gpu/drm/i915/i915_scatterlist.c
>> index e93d2538f298..4d830740946d 100644
>> --- a/drivers/gpu/drm/i915/i915_scatterlist.c
>> +++ b/drivers/gpu/drm/i915/i915_scatterlist.c
>> @@ -90,7 +90,7 @@ struct i915_refct_sgt *i915_rsgt_from_mm_node(const struct drm_mm_node *node,
>>   
>>   	GEM_BUG_ON(!max_segment);
>>   
>> -	rsgt = kmalloc(sizeof(*rsgt), GFP_KERNEL);
>> +	rsgt = kmalloc(sizeof(*rsgt), GFP_KERNEL | __GFP_NOWARN);
>>   	if (!rsgt)
>>   		return ERR_PTR(-ENOMEM);
> is it really safe?
> I don't believe we can guarantee a good fallback plan here if allocation fails.
> __i915_refct_sgt_init
> might end up in a null dereference, no?!

Kernel is now returning ENOMEM and also throwing a oom warning stack. 
With __GFP_NOWARN

the oom warning stack won't be there in the dmesg but userspace will 
still get ENOMEM as expected.

Let me know if got your question correctly.

Regards,

Nirmoy

>
>>   
>> @@ -104,7 +104,7 @@ struct i915_refct_sgt *i915_rsgt_from_mm_node(const struct drm_mm_node *node,
>>   	}
>>   
>>   	if (sg_alloc_table(st, DIV_ROUND_UP_ULL(node->size, segment_pages),
>> -			   GFP_KERNEL)) {
>> +			   GFP_KERNEL | __GFP_NOWARN)) {
>>   		i915_refct_sgt_put(rsgt);
>>   		return ERR_PTR(-ENOMEM);
>>   	}
>> @@ -178,7 +178,7 @@ struct i915_refct_sgt *i915_rsgt_from_buddy_resource(struct ttm_resource *res,
>>   	GEM_BUG_ON(list_empty(blocks));
>>   	GEM_BUG_ON(!max_segment);
>>   
>> -	rsgt = kmalloc(sizeof(*rsgt), GFP_KERNEL);
>> +	rsgt = kmalloc(sizeof(*rsgt), GFP_KERNEL | __GFP_NOWARN);
>>   	if (!rsgt)
>>   		return ERR_PTR(-ENOMEM);
>>   
>> @@ -190,7 +190,7 @@ struct i915_refct_sgt *i915_rsgt_from_buddy_resource(struct ttm_resource *res,
>>   		return ERR_PTR(-E2BIG);
>>   	}
>>   
>> -	if (sg_alloc_table(st, PFN_UP(res->size), GFP_KERNEL)) {
>> +	if (sg_alloc_table(st, PFN_UP(res->size), GFP_KERNEL | __GFP_NOWARN)) {
>>   		i915_refct_sgt_put(rsgt);
>>   		return ERR_PTR(-ENOMEM);
>>   	}
>> -- 
>> 2.42.0
>>
Rodrigo Vivi June 26, 2024, 3:50 p.m. UTC | #3
On Wed, Jun 26, 2024 at 05:36:43PM +0200, Nirmoy Das wrote:
>    Hi Rodrigo,                                                                  
>                                                                                 
>    On 6/26/2024 5:24 PM, Rodrigo Vivi wrote:                                    
>                                                                                 
>    On Wed, Jun 26, 2024 at 04:33:18PM +0200, Nirmoy Das wrote:                  
>                                                                                 
>    >We report object allocation failures to userspace with ENOMEM               
>    >so add __GFP_NOWARN to remove superfluous oom warnings.                     
>                                                                                 
>    >Closes: [1]https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/4936     
>    >Cc: Andi Shyti [2]<andi.shyti@linux.intel.com>                              
>    >Signed-off-by: Nirmoy Das [3]<nirmoy.das@intel.com>                         
>    >---                                                                         
>    > drivers/gpu/drm/i915/i915_scatterlist.c | 8 ++++----                       
>    > 1 file changed, 4 insertions(+), 4 deletions(-)                            
>                                                                                 
>    >diff --git a/drivers/gpu/drm/i915/i915_scatterlist.c b/drivers/gpu/drm/i915/i915_scatterlist.c
>    >index e93d2538f298..4d830740946d 100644                                     
>    >--- a/drivers/gpu/drm/i915/i915_scatterlist.c                               
>    >+++ b/drivers/gpu/drm/i915/i915_scatterlist.c                               
>    >@@ -90,7 +90,7 @@ struct i915_refct_sgt *i915_rsgt_from_mm_node(const struct drm_mm_node *node,
>    >                                                                            
>    >        GEM_BUG_ON(!max_segment);                                           
>    >                                                                            
>    >-       rsgt = kmalloc(sizeof(*rsgt), GFP_KERNEL);                          
>    >+       rsgt = kmalloc(sizeof(*rsgt), GFP_KERNEL | __GFP_NOWARN);           
>    >        if (!rsgt)                                                          
>    >                return ERR_PTR(-ENOMEM);                                    
>                                                                                 
>    is it really safe?                                                           
>    I don't believe we can guarantee a good fallback plan here if allocation fails.
>    __i915_refct_sgt_init                                                        
>    might end up in a null dereference, no?!                                     
>                                                                                 
>    Kernel is now returning  ENOMEM and also throwing a oom warning stack.       
>    With __GFP_NOWARN                                                            
>                                                                                 
>    the oom warning stack won't be there in the dmesg but userspace will still   
>    get ENOMEM as expected.                                                      

doh! I had missunderstand the flag. Thanks for the confirmation.

Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>

BTW, what email clients are you using recently?
it is hard to parse your responses lately. Please check if it is really
sending/replying as text-only mode.

>                                                                                 
>    Let me know if got your question correctly.                                  
>                                                                                 
>    Regards,                                                                     
>                                                                                 
>    Nirmoy                                                                       
>                                                                                 
>                                                                                 
>                                                                                 
>    >                                                                            
>    >@@ -104,7 +104,7 @@ struct i915_refct_sgt *i915_rsgt_from_mm_node(const struct drm_mm_node *node,
>    >        }                                                                   
>    >                                                                            
>    >        if (sg_alloc_table(st, DIV_ROUND_UP_ULL(node->size, segment_pages), 
>    >-                          GFP_KERNEL)) {                                   
>    >+                          GFP_KERNEL | __GFP_NOWARN)) {                    
>    >                i915_refct_sgt_put(rsgt);                                   
>    >                return ERR_PTR(-ENOMEM);                                    
>    >        }                                                                   
>    >@@ -178,7 +178,7 @@ struct i915_refct_sgt *i915_rsgt_from_buddy_resource(struct ttm_resource *res,
>    >        GEM_BUG_ON(list_empty(blocks));                                     
>    >        GEM_BUG_ON(!max_segment);                                           
>    >                                                                            
>    >-       rsgt = kmalloc(sizeof(*rsgt), GFP_KERNEL);                          
>    >+       rsgt = kmalloc(sizeof(*rsgt), GFP_KERNEL | __GFP_NOWARN);           
>    >        if (!rsgt)                                                          
>    >                return ERR_PTR(-ENOMEM);                                    
>    >                                                                            
>    >@@ -190,7 +190,7 @@ struct i915_refct_sgt *i915_rsgt_from_buddy_resource(struct ttm_resource *res,
>    >                return ERR_PTR(-E2BIG);                                     
>    >        }                                                                   
>    >                                                                            
>    >-       if (sg_alloc_table(st, PFN_UP(res->size), GFP_KERNEL)) {            
>    >+       if (sg_alloc_table(st, PFN_UP(res->size), GFP_KERNEL | __GFP_NOWARN)) {
>    >                i915_refct_sgt_put(rsgt);                                   
>    >                return ERR_PTR(-ENOMEM);                                    
>    >        }                                                                   
>    >--                                                                          
>    >2.42.0                                                                      
> 
> References
> 
>    Visible links
>    1. https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/4936
>    2. mailto:andi.shyti@linux.intel.com
>    3. mailto:nirmoy.das@intel.com
Nirmoy Das June 26, 2024, 9:07 p.m. UTC | #4
Hi Rodrigo,

On 6/26/2024 5:50 PM, Rodrigo Vivi wrote:
> On Wed, Jun 26, 2024 at 05:36:43PM +0200, Nirmoy Das wrote:
>>     Hi Rodrigo,
>>                                                                                  
>>     On 6/26/2024 5:24 PM, Rodrigo Vivi wrote:
>>                                                                                  
>>     On Wed, Jun 26, 2024 at 04:33:18PM +0200, Nirmoy Das wrote:
>>                                                                                  
>>     >We report object allocation failures to userspace with ENOMEM
>>     >so add __GFP_NOWARN to remove superfluous oom warnings.
>>                                                                                  
>>     >Closes: [1]https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/4936
>>     >Cc: Andi Shyti [2]<andi.shyti@linux.intel.com>
>>     >Signed-off-by: Nirmoy Das [3]<nirmoy.das@intel.com>
>>     >---
>>     > drivers/gpu/drm/i915/i915_scatterlist.c | 8 ++++----
>>     > 1 file changed, 4 insertions(+), 4 deletions(-)
>>                                                                                  
>>     >diff --git a/drivers/gpu/drm/i915/i915_scatterlist.c b/drivers/gpu/drm/i915/i915_scatterlist.c
>>     >index e93d2538f298..4d830740946d 100644
>>     >--- a/drivers/gpu/drm/i915/i915_scatterlist.c
>>     >+++ b/drivers/gpu/drm/i915/i915_scatterlist.c
>>     >@@ -90,7 +90,7 @@ struct i915_refct_sgt *i915_rsgt_from_mm_node(const struct drm_mm_node *node,
>>     >
>>     >        GEM_BUG_ON(!max_segment);
>>     >
>>     >-       rsgt = kmalloc(sizeof(*rsgt), GFP_KERNEL);
>>     >+       rsgt = kmalloc(sizeof(*rsgt), GFP_KERNEL | __GFP_NOWARN);
>>     >        if (!rsgt)
>>     >                return ERR_PTR(-ENOMEM);
>>                                                                                  
>>     is it really safe?
>>     I don't believe we can guarantee a good fallback plan here if allocation fails.
>>     __i915_refct_sgt_init
>>     might end up in a null dereference, no?!
>>                                                                                  
>>     Kernel is now returning  ENOMEM and also throwing a oom warning stack.
>>     With __GFP_NOWARN
>>                                                                                  
>>     the oom warning stack won't be there in the dmesg but userspace will still
>>     get ENOMEM as expected.
> doh! I had missunderstand the flag. Thanks for the confirmation.
>
> Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
>
> BTW, what email clients are you using recently?

Using the same client, Thunderbird.


> it is hard to parse your responses lately. Please check if it is really
> sending/replying as text-only mode.

Thanks for notifying me. May be recent update changed some settings. I 
will check.


Nirmoy

>
>>                                                                                  
>>     Let me know if got your question correctly.
>>                                                                                  
>>     Regards,
>>                                                                                  
>>     Nirmoy
>>                                                                                  
>>                                                                                  
>>                                                                                  
>>     >
>>     >@@ -104,7 +104,7 @@ struct i915_refct_sgt *i915_rsgt_from_mm_node(const struct drm_mm_node *node,
>>     >        }
>>     >
>>     >        if (sg_alloc_table(st, DIV_ROUND_UP_ULL(node->size, segment_pages),
>>     >-                          GFP_KERNEL)) {
>>     >+                          GFP_KERNEL | __GFP_NOWARN)) {
>>     >                i915_refct_sgt_put(rsgt);
>>     >                return ERR_PTR(-ENOMEM);
>>     >        }
>>     >@@ -178,7 +178,7 @@ struct i915_refct_sgt *i915_rsgt_from_buddy_resource(struct ttm_resource *res,
>>     >        GEM_BUG_ON(list_empty(blocks));
>>     >        GEM_BUG_ON(!max_segment);
>>     >
>>     >-       rsgt = kmalloc(sizeof(*rsgt), GFP_KERNEL);
>>     >+       rsgt = kmalloc(sizeof(*rsgt), GFP_KERNEL | __GFP_NOWARN);
>>     >        if (!rsgt)
>>     >                return ERR_PTR(-ENOMEM);
>>     >
>>     >@@ -190,7 +190,7 @@ struct i915_refct_sgt *i915_rsgt_from_buddy_resource(struct ttm_resource *res,
>>     >                return ERR_PTR(-E2BIG);
>>     >        }
>>     >
>>     >-       if (sg_alloc_table(st, PFN_UP(res->size), GFP_KERNEL)) {
>>     >+       if (sg_alloc_table(st, PFN_UP(res->size), GFP_KERNEL | __GFP_NOWARN)) {
>>     >                i915_refct_sgt_put(rsgt);
>>     >                return ERR_PTR(-ENOMEM);
>>     >        }
>>     >--
>>     >2.42.0
>>
>> References
>>
>>     Visible links
>>     1. https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/4936
>>     2. mailto:andi.shyti@linux.intel.com
>>     3. mailto:nirmoy.das@intel.com
Andi Shyti June 27, 2024, 10:04 a.m. UTC | #5
Hi Nirmoy,

On Wed, Jun 26, 2024 at 04:33:18PM +0200, Nirmoy Das wrote:
> We report object allocation failures to userspace with ENOMEM
> so add __GFP_NOWARN to remove superfluous oom warnings.

I think this should be the default behavior. ENOMEM doesn't
necessarily mean that there is a kernel failure. Most of the time
we just run out of memory, deal with it :-)

Reviewed-by: Andi Shyti <andi.shyti@linux.intel.com>

Thanks,
Andi
Nirmoy Das June 27, 2024, 4:56 p.m. UTC | #6
Hi Andi,

On 6/27/2024 12:04 PM, Andi Shyti wrote:
> Hi Nirmoy,
>
> On Wed, Jun 26, 2024 at 04:33:18PM +0200, Nirmoy Das wrote:
>> We report object allocation failures to userspace with ENOMEM
>> so add __GFP_NOWARN to remove superfluous oom warnings.
> I think this should be the default behavior.
Yes, when drivers handle ENOMEM situation which is the case for i915/gem 
code
>   ENOMEM doesn't
> necessarily mean that there is a kernel failure. Most of the time
> we just run out of memory, deal with it :-)
>
> Reviewed-by: Andi Shyti <andi.shyti@linux.intel.com>

Thanks!


>
> Thanks,
> Andi
Andi Shyti June 27, 2024, 10:36 p.m. UTC | #7
Hi Nirmoy,

On Thu, Jun 27, 2024 at 06:56:53PM +0200, Nirmoy Das wrote:
> On 6/27/2024 12:04 PM, Andi Shyti wrote:
> > On Wed, Jun 26, 2024 at 04:33:18PM +0200, Nirmoy Das wrote:
> > > We report object allocation failures to userspace with ENOMEM
> > > so add __GFP_NOWARN to remove superfluous oom warnings.
> > I think this should be the default behavior.
> Yes, when drivers handle ENOMEM situation which is the case for i915/gem
> code
> >   ENOMEM doesn't
> > necessarily mean that there is a kernel failure. Most of the time
> > we just run out of memory, deal with it :-)
> > 
> > Reviewed-by: Andi Shyti <andi.shyti@linux.intel.com>
> 
> Thanks!

while at it... merged in

drm-intel-gt-next

Andi
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/i915_scatterlist.c b/drivers/gpu/drm/i915/i915_scatterlist.c
index e93d2538f298..4d830740946d 100644
--- a/drivers/gpu/drm/i915/i915_scatterlist.c
+++ b/drivers/gpu/drm/i915/i915_scatterlist.c
@@ -90,7 +90,7 @@  struct i915_refct_sgt *i915_rsgt_from_mm_node(const struct drm_mm_node *node,
 
 	GEM_BUG_ON(!max_segment);
 
-	rsgt = kmalloc(sizeof(*rsgt), GFP_KERNEL);
+	rsgt = kmalloc(sizeof(*rsgt), GFP_KERNEL | __GFP_NOWARN);
 	if (!rsgt)
 		return ERR_PTR(-ENOMEM);
 
@@ -104,7 +104,7 @@  struct i915_refct_sgt *i915_rsgt_from_mm_node(const struct drm_mm_node *node,
 	}
 
 	if (sg_alloc_table(st, DIV_ROUND_UP_ULL(node->size, segment_pages),
-			   GFP_KERNEL)) {
+			   GFP_KERNEL | __GFP_NOWARN)) {
 		i915_refct_sgt_put(rsgt);
 		return ERR_PTR(-ENOMEM);
 	}
@@ -178,7 +178,7 @@  struct i915_refct_sgt *i915_rsgt_from_buddy_resource(struct ttm_resource *res,
 	GEM_BUG_ON(list_empty(blocks));
 	GEM_BUG_ON(!max_segment);
 
-	rsgt = kmalloc(sizeof(*rsgt), GFP_KERNEL);
+	rsgt = kmalloc(sizeof(*rsgt), GFP_KERNEL | __GFP_NOWARN);
 	if (!rsgt)
 		return ERR_PTR(-ENOMEM);
 
@@ -190,7 +190,7 @@  struct i915_refct_sgt *i915_rsgt_from_buddy_resource(struct ttm_resource *res,
 		return ERR_PTR(-E2BIG);
 	}
 
-	if (sg_alloc_table(st, PFN_UP(res->size), GFP_KERNEL)) {
+	if (sg_alloc_table(st, PFN_UP(res->size), GFP_KERNEL | __GFP_NOWARN)) {
 		i915_refct_sgt_put(rsgt);
 		return ERR_PTR(-ENOMEM);
 	}