diff mbox series

drm/i915: Fix page cleanup on DMA remap failure

Message ID 20250116135636.410164-1-bgeffon@google.com (mailing list archive)
State New
Headers show
Series drm/i915: Fix page cleanup on DMA remap failure | expand

Commit Message

Brian Geffon Jan. 16, 2025, 1:56 p.m. UTC
When converting to folios the cleanup path of shmem_get_pages() was
missed. When a DMA remap fails and the max segment size is greater than
PAGE_SIZE it will attempt to retry the remap with a PAGE_SIZEd segment
size. The cleanup code isn't properly using the folio apis and as a
result isn't handling compound pages correctly.

Link: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/13487
Fixes: 0b62af28f249 ("i915: convert shmem_sg_free_table() to use a folio_batch")
Signed-off-by: Brian Geffon <bgeffon@google.com>
Suggested-by: Tomasz Figa <tfiga@google.com>
---
 drivers/gpu/drm/i915/gem/i915_gem_shmem.c | 13 +++++--------
 1 file changed, 5 insertions(+), 8 deletions(-)

Comments

Ville Syrjala Jan. 16, 2025, 2:24 p.m. UTC | #1
On Thu, Jan 16, 2025 at 08:56:36AM -0500, Brian Geffon wrote:
> When converting to folios the cleanup path of shmem_get_pages() was
> missed. When a DMA remap fails and the max segment size is greater than
> PAGE_SIZE it will attempt to retry the remap with a PAGE_SIZEd segment
> size. The cleanup code isn't properly using the folio apis and as a
> result isn't handling compound pages correctly.
> 
> Link: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/13487
> Fixes: 0b62af28f249 ("i915: convert shmem_sg_free_table() to use a folio_batch")
> Signed-off-by: Brian Geffon <bgeffon@google.com>
> Suggested-by: Tomasz Figa <tfiga@google.com>
> ---
>  drivers/gpu/drm/i915/gem/i915_gem_shmem.c | 13 +++++--------
>  1 file changed, 5 insertions(+), 8 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
> index fe69f2c8527d..02ddab5bf5c0 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
> @@ -37,8 +37,6 @@ void shmem_sg_free_table(struct sg_table *st, struct address_space *mapping,
>  	struct folio *last = NULL;
>  	struct page *page;
>  
> -	mapping_clear_unevictable(mapping);
> -

The assymmetry here between the alloc vs. free is a bit annoying.
Maybe we can just keep this here? 

Or if avoiding the ping-pong actually mattes in the gtt prepare
error case, then maybe we should rename this guy into
__shmem_sg_free_table() without the mapping_clear_unevictable()
and wrap it in a higher level shmem_sg_free_table() that does
everything?

>  	folio_batch_init(&fbatch);
>  	for_each_sgt_page(page, sgt_iter, st) {
>  		struct folio *folio = page_folio(page);
> @@ -180,10 +178,10 @@ int shmem_sg_alloc_table(struct drm_i915_private *i915, struct sg_table *st,
>  	return 0;
>  err_sg:
>  	sg_mark_end(sg);
> +	mapping_clear_unevictable(mapping);
>  	if (sg != st->sgl) {
>  		shmem_sg_free_table(st, mapping, false, false);
>  	} else {
> -		mapping_clear_unevictable(mapping);
>  		sg_free_table(st);
>  	}
>  
> @@ -209,8 +207,6 @@ static int shmem_get_pages(struct drm_i915_gem_object *obj)
>  	struct address_space *mapping = obj->base.filp->f_mapping;
>  	unsigned int max_segment = i915_sg_segment_size(i915->drm.dev);
>  	struct sg_table *st;
> -	struct sgt_iter sgt_iter;
> -	struct page *page;
>  	int ret;
>  
>  	/*
> @@ -239,9 +235,8 @@ static int shmem_get_pages(struct drm_i915_gem_object *obj)
>  		 * for PAGE_SIZE chunks instead may be helpful.
>  		 */
>  		if (max_segment > PAGE_SIZE) {
> -			for_each_sgt_page(page, sgt_iter, st)
> -				put_page(page);
> -			sg_free_table(st);
> +			/* Leave the mapping unevictable while we retry */
> +			shmem_sg_free_table(st, mapping, false, false);
>  			kfree(st);
>  
>  			max_segment = PAGE_SIZE;
> @@ -265,6 +260,7 @@ static int shmem_get_pages(struct drm_i915_gem_object *obj)
>  	return 0;
>  
>  err_pages:
> +	mapping_clear_unevictable(mapping);
>  	shmem_sg_free_table(st, mapping, false, false);
>  	/*
>  	 * shmemfs first checks if there is enough memory to allocate the page
> @@ -402,6 +398,7 @@ void i915_gem_object_put_pages_shmem(struct drm_i915_gem_object *obj, struct sg_
>  	if (i915_gem_object_needs_bit17_swizzle(obj))
>  		i915_gem_object_save_bit_17_swizzle(obj, pages);
>  
> +	mapping_clear_unevictable(file_inode(obj->base.filp)->i_mapping);
>  	shmem_sg_free_table(pages, file_inode(obj->base.filp)->i_mapping,
>  			    obj->mm.dirty, obj->mm.madv == I915_MADV_WILLNEED);
>  	kfree(pages);
> -- 
> 2.48.0.rc2.279.g1de40edade-goog
Brian Geffon Jan. 16, 2025, 2:36 p.m. UTC | #2
On Thu, Jan 16, 2025 at 9:24 AM Ville Syrjälä
<ville.syrjala@linux.intel.com> wrote:
>
> On Thu, Jan 16, 2025 at 08:56:36AM -0500, Brian Geffon wrote:
> > When converting to folios the cleanup path of shmem_get_pages() was
> > missed. When a DMA remap fails and the max segment size is greater than
> > PAGE_SIZE it will attempt to retry the remap with a PAGE_SIZEd segment
> > size. The cleanup code isn't properly using the folio apis and as a
> > result isn't handling compound pages correctly.
> >
> > Link: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/13487
> > Fixes: 0b62af28f249 ("i915: convert shmem_sg_free_table() to use a folio_batch")
> > Signed-off-by: Brian Geffon <bgeffon@google.com>
> > Suggested-by: Tomasz Figa <tfiga@google.com>
> > ---
> >  drivers/gpu/drm/i915/gem/i915_gem_shmem.c | 13 +++++--------
> >  1 file changed, 5 insertions(+), 8 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
> > index fe69f2c8527d..02ddab5bf5c0 100644
> > --- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
> > +++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
> > @@ -37,8 +37,6 @@ void shmem_sg_free_table(struct sg_table *st, struct address_space *mapping,
> >       struct folio *last = NULL;
> >       struct page *page;
> >
> > -     mapping_clear_unevictable(mapping);
> > -
>
> The assymmetry here between the alloc vs. free is a bit annoying.
> Maybe we can just keep this here?

If you want, I think this can also be fixed by something like the
following I believe.
Ultimately we don't want to put page on non-head pages in a compound
page. What do you think? If you like this better I can test and mail a v2.

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
index fe69f2c8527d..b79cd396e878 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
@@ -239,8 +239,14 @@ static int shmem_get_pages(struct drm_i915_gem_object *obj)
                 * for PAGE_SIZE chunks instead may be helpful.
                 */
                if (max_segment > PAGE_SIZE) {
-                       for_each_sgt_page(page, sgt_iter, st)
+                       struct folio *last = NULL;
+                       for_each_sgt_page(page, sgt_iter, st) {
+                               struct folio *folio = page_folio(page);
+                               if (folio == last)
+                                       continue;
+                               last = folio;
                                put_page(page);
+                       }
                        sg_free_table(st);
                        kfree(st);


--
2.48.0.rc2.279.g1de40edade-goog


>
> Or if avoiding the ping-pong actually mattes in the gtt prepare
> error case, then maybe we should rename this guy into
> __shmem_sg_free_table() without the mapping_clear_unevictable()
> and wrap it in a higher level shmem_sg_free_table() that does
> everything?
>
> >       folio_batch_init(&fbatch);
> >       for_each_sgt_page(page, sgt_iter, st) {
> >               struct folio *folio = page_folio(page);
> > @@ -180,10 +178,10 @@ int shmem_sg_alloc_table(struct drm_i915_private *i915, struct sg_table *st,
> >       return 0;
> >  err_sg:
> >       sg_mark_end(sg);
> > +     mapping_clear_unevictable(mapping);
> >       if (sg != st->sgl) {
> >               shmem_sg_free_table(st, mapping, false, false);
> >       } else {
> > -             mapping_clear_unevictable(mapping);
> >               sg_free_table(st);
> >       }
> >
> > @@ -209,8 +207,6 @@ static int shmem_get_pages(struct drm_i915_gem_object *obj)
> >       struct address_space *mapping = obj->base.filp->f_mapping;
> >       unsigned int max_segment = i915_sg_segment_size(i915->drm.dev);
> >       struct sg_table *st;
> > -     struct sgt_iter sgt_iter;
> > -     struct page *page;
> >       int ret;
> >
> >       /*
> > @@ -239,9 +235,8 @@ static int shmem_get_pages(struct drm_i915_gem_object *obj)
> >                * for PAGE_SIZE chunks instead may be helpful.
> >                */
> >               if (max_segment > PAGE_SIZE) {
> > -                     for_each_sgt_page(page, sgt_iter, st)
> > -                             put_page(page);
> > -                     sg_free_table(st);
> > +                     /* Leave the mapping unevictable while we retry */
> > +                     shmem_sg_free_table(st, mapping, false, false);
> >                       kfree(st);
> >
> >                       max_segment = PAGE_SIZE;
> > @@ -265,6 +260,7 @@ static int shmem_get_pages(struct drm_i915_gem_object *obj)
> >       return 0;
> >
> >  err_pages:
> > +     mapping_clear_unevictable(mapping);
> >       shmem_sg_free_table(st, mapping, false, false);
> >       /*
> >        * shmemfs first checks if there is enough memory to allocate the page
> > @@ -402,6 +398,7 @@ void i915_gem_object_put_pages_shmem(struct drm_i915_gem_object *obj, struct sg_
> >       if (i915_gem_object_needs_bit17_swizzle(obj))
> >               i915_gem_object_save_bit_17_swizzle(obj, pages);
> >
> > +     mapping_clear_unevictable(file_inode(obj->base.filp)->i_mapping);
> >       shmem_sg_free_table(pages, file_inode(obj->base.filp)->i_mapping,
> >                           obj->mm.dirty, obj->mm.madv == I915_MADV_WILLNEED);
> >       kfree(pages);
> > --
> > 2.48.0.rc2.279.g1de40edade-goog
>
> --
> Ville Syrjälä
> Intel
Ville Syrjala Jan. 16, 2025, 2:37 p.m. UTC | #3
On Thu, Jan 16, 2025 at 04:24:26PM +0200, Ville Syrjälä wrote:
> On Thu, Jan 16, 2025 at 08:56:36AM -0500, Brian Geffon wrote:
> > When converting to folios the cleanup path of shmem_get_pages() was
> > missed. When a DMA remap fails and the max segment size is greater than
> > PAGE_SIZE it will attempt to retry the remap with a PAGE_SIZEd segment
> > size. The cleanup code isn't properly using the folio apis and as a
> > result isn't handling compound pages correctly.
> > 
> > Link: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/13487
> > Fixes: 0b62af28f249 ("i915: convert shmem_sg_free_table() to use a folio_batch")
> > Signed-off-by: Brian Geffon <bgeffon@google.com>
> > Suggested-by: Tomasz Figa <tfiga@google.com>
> > ---
> >  drivers/gpu/drm/i915/gem/i915_gem_shmem.c | 13 +++++--------
> >  1 file changed, 5 insertions(+), 8 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
> > index fe69f2c8527d..02ddab5bf5c0 100644
> > --- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
> > +++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
> > @@ -37,8 +37,6 @@ void shmem_sg_free_table(struct sg_table *st, struct address_space *mapping,
> >  	struct folio *last = NULL;
> >  	struct page *page;
> >  
> > -	mapping_clear_unevictable(mapping);
> > -
> 
> The assymmetry here between the alloc vs. free is a bit annoying.
> Maybe we can just keep this here? 

Actually, I guess it's a bit more than just annoying since
shmem_sg_free_table() is actually used from outside i915_gem_shmem.c
as well.

> 
> Or if avoiding the ping-pong actually mattes in the gtt prepare
> error case, then maybe we should rename this guy into
> __shmem_sg_free_table() without the mapping_clear_unevictable()
> and wrap it in a higher level shmem_sg_free_table() that does
> everything?
> 
> >  	folio_batch_init(&fbatch);
> >  	for_each_sgt_page(page, sgt_iter, st) {
> >  		struct folio *folio = page_folio(page);
> > @@ -180,10 +178,10 @@ int shmem_sg_alloc_table(struct drm_i915_private *i915, struct sg_table *st,
> >  	return 0;
> >  err_sg:
> >  	sg_mark_end(sg);
> > +	mapping_clear_unevictable(mapping);
> >  	if (sg != st->sgl) {
> >  		shmem_sg_free_table(st, mapping, false, false);
> >  	} else {
> > -		mapping_clear_unevictable(mapping);
> >  		sg_free_table(st);
> >  	}
> >  
> > @@ -209,8 +207,6 @@ static int shmem_get_pages(struct drm_i915_gem_object *obj)
> >  	struct address_space *mapping = obj->base.filp->f_mapping;
> >  	unsigned int max_segment = i915_sg_segment_size(i915->drm.dev);
> >  	struct sg_table *st;
> > -	struct sgt_iter sgt_iter;
> > -	struct page *page;
> >  	int ret;
> >  
> >  	/*
> > @@ -239,9 +235,8 @@ static int shmem_get_pages(struct drm_i915_gem_object *obj)
> >  		 * for PAGE_SIZE chunks instead may be helpful.
> >  		 */
> >  		if (max_segment > PAGE_SIZE) {
> > -			for_each_sgt_page(page, sgt_iter, st)
> > -				put_page(page);
> > -			sg_free_table(st);
> > +			/* Leave the mapping unevictable while we retry */
> > +			shmem_sg_free_table(st, mapping, false, false);
> >  			kfree(st);
> >  
> >  			max_segment = PAGE_SIZE;
> > @@ -265,6 +260,7 @@ static int shmem_get_pages(struct drm_i915_gem_object *obj)
> >  	return 0;
> >  
> >  err_pages:
> > +	mapping_clear_unevictable(mapping);
> >  	shmem_sg_free_table(st, mapping, false, false);
> >  	/*
> >  	 * shmemfs first checks if there is enough memory to allocate the page
> > @@ -402,6 +398,7 @@ void i915_gem_object_put_pages_shmem(struct drm_i915_gem_object *obj, struct sg_
> >  	if (i915_gem_object_needs_bit17_swizzle(obj))
> >  		i915_gem_object_save_bit_17_swizzle(obj, pages);
> >  
> > +	mapping_clear_unevictable(file_inode(obj->base.filp)->i_mapping);
> >  	shmem_sg_free_table(pages, file_inode(obj->base.filp)->i_mapping,
> >  			    obj->mm.dirty, obj->mm.madv == I915_MADV_WILLNEED);
> >  	kfree(pages);
> > -- 
> > 2.48.0.rc2.279.g1de40edade-goog
> 
> -- 
> Ville Syrjälä
> Intel
Ville Syrjala Jan. 16, 2025, 2:39 p.m. UTC | #4
On Thu, Jan 16, 2025 at 09:36:42AM -0500, Brian Geffon wrote:
> On Thu, Jan 16, 2025 at 9:24 AM Ville Syrjälä
> <ville.syrjala@linux.intel.com> wrote:
> >
> > On Thu, Jan 16, 2025 at 08:56:36AM -0500, Brian Geffon wrote:
> > > When converting to folios the cleanup path of shmem_get_pages() was
> > > missed. When a DMA remap fails and the max segment size is greater than
> > > PAGE_SIZE it will attempt to retry the remap with a PAGE_SIZEd segment
> > > size. The cleanup code isn't properly using the folio apis and as a
> > > result isn't handling compound pages correctly.
> > >
> > > Link: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/13487
> > > Fixes: 0b62af28f249 ("i915: convert shmem_sg_free_table() to use a folio_batch")
> > > Signed-off-by: Brian Geffon <bgeffon@google.com>
> > > Suggested-by: Tomasz Figa <tfiga@google.com>
> > > ---
> > >  drivers/gpu/drm/i915/gem/i915_gem_shmem.c | 13 +++++--------
> > >  1 file changed, 5 insertions(+), 8 deletions(-)
> > >
> > > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
> > > index fe69f2c8527d..02ddab5bf5c0 100644
> > > --- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
> > > +++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
> > > @@ -37,8 +37,6 @@ void shmem_sg_free_table(struct sg_table *st, struct address_space *mapping,
> > >       struct folio *last = NULL;
> > >       struct page *page;
> > >
> > > -     mapping_clear_unevictable(mapping);
> > > -
> >
> > The assymmetry here between the alloc vs. free is a bit annoying.
> > Maybe we can just keep this here?
> 
> If you want, I think this can also be fixed by something like the
> following I believe.
> Ultimately we don't want to put page on non-head pages in a compound
> page. What do you think? If you like this better I can test and mail a v2.

I think having it all in one place would be nicer, if only to avoid
similar oversights in the future.

> 
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
> b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
> index fe69f2c8527d..b79cd396e878 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
> @@ -239,8 +239,14 @@ static int shmem_get_pages(struct drm_i915_gem_object *obj)
>                  * for PAGE_SIZE chunks instead may be helpful.
>                  */
>                 if (max_segment > PAGE_SIZE) {
> -                       for_each_sgt_page(page, sgt_iter, st)
> +                       struct folio *last = NULL;
> +                       for_each_sgt_page(page, sgt_iter, st) {
> +                               struct folio *folio = page_folio(page);
> +                               if (folio == last)
> +                                       continue;
> +                               last = folio;
>                                 put_page(page);
> +                       }
>                         sg_free_table(st);
>                         kfree(st);
> 
> 
> --
> 2.48.0.rc2.279.g1de40edade-goog
> 
> 
> >
> > Or if avoiding the ping-pong actually mattes in the gtt prepare
> > error case, then maybe we should rename this guy into
> > __shmem_sg_free_table() without the mapping_clear_unevictable()
> > and wrap it in a higher level shmem_sg_free_table() that does
> > everything?
> >
> > >       folio_batch_init(&fbatch);
> > >       for_each_sgt_page(page, sgt_iter, st) {
> > >               struct folio *folio = page_folio(page);
> > > @@ -180,10 +178,10 @@ int shmem_sg_alloc_table(struct drm_i915_private *i915, struct sg_table *st,
> > >       return 0;
> > >  err_sg:
> > >       sg_mark_end(sg);
> > > +     mapping_clear_unevictable(mapping);
> > >       if (sg != st->sgl) {
> > >               shmem_sg_free_table(st, mapping, false, false);
> > >       } else {
> > > -             mapping_clear_unevictable(mapping);
> > >               sg_free_table(st);
> > >       }
> > >
> > > @@ -209,8 +207,6 @@ static int shmem_get_pages(struct drm_i915_gem_object *obj)
> > >       struct address_space *mapping = obj->base.filp->f_mapping;
> > >       unsigned int max_segment = i915_sg_segment_size(i915->drm.dev);
> > >       struct sg_table *st;
> > > -     struct sgt_iter sgt_iter;
> > > -     struct page *page;
> > >       int ret;
> > >
> > >       /*
> > > @@ -239,9 +235,8 @@ static int shmem_get_pages(struct drm_i915_gem_object *obj)
> > >                * for PAGE_SIZE chunks instead may be helpful.
> > >                */
> > >               if (max_segment > PAGE_SIZE) {
> > > -                     for_each_sgt_page(page, sgt_iter, st)
> > > -                             put_page(page);
> > > -                     sg_free_table(st);
> > > +                     /* Leave the mapping unevictable while we retry */
> > > +                     shmem_sg_free_table(st, mapping, false, false);
> > >                       kfree(st);
> > >
> > >                       max_segment = PAGE_SIZE;
> > > @@ -265,6 +260,7 @@ static int shmem_get_pages(struct drm_i915_gem_object *obj)
> > >       return 0;
> > >
> > >  err_pages:
> > > +     mapping_clear_unevictable(mapping);
> > >       shmem_sg_free_table(st, mapping, false, false);
> > >       /*
> > >        * shmemfs first checks if there is enough memory to allocate the page
> > > @@ -402,6 +398,7 @@ void i915_gem_object_put_pages_shmem(struct drm_i915_gem_object *obj, struct sg_
> > >       if (i915_gem_object_needs_bit17_swizzle(obj))
> > >               i915_gem_object_save_bit_17_swizzle(obj, pages);
> > >
> > > +     mapping_clear_unevictable(file_inode(obj->base.filp)->i_mapping);
> > >       shmem_sg_free_table(pages, file_inode(obj->base.filp)->i_mapping,
> > >                           obj->mm.dirty, obj->mm.madv == I915_MADV_WILLNEED);
> > >       kfree(pages);
> > > --
> > > 2.48.0.rc2.279.g1de40edade-goog
> >
> > --
> > Ville Syrjälä
> > Intel
Brian Geffon Jan. 16, 2025, 2:44 p.m. UTC | #5
On Thu, Jan 16, 2025 at 9:24 AM Ville Syrjälä
<ville.syrjala@linux.intel.com> wrote:
>
> On Thu, Jan 16, 2025 at 08:56:36AM -0500, Brian Geffon wrote:
> > When converting to folios the cleanup path of shmem_get_pages() was
> > missed. When a DMA remap fails and the max segment size is greater than
> > PAGE_SIZE it will attempt to retry the remap with a PAGE_SIZEd segment
> > size. The cleanup code isn't properly using the folio apis and as a
> > result isn't handling compound pages correctly.
> >
> > Link: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/13487
> > Fixes: 0b62af28f249 ("i915: convert shmem_sg_free_table() to use a folio_batch")
> > Signed-off-by: Brian Geffon <bgeffon@google.com>
> > Suggested-by: Tomasz Figa <tfiga@google.com>
> > ---
> >  drivers/gpu/drm/i915/gem/i915_gem_shmem.c | 13 +++++--------
> >  1 file changed, 5 insertions(+), 8 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
> > index fe69f2c8527d..02ddab5bf5c0 100644
> > --- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
> > +++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
> > @@ -37,8 +37,6 @@ void shmem_sg_free_table(struct sg_table *st, struct address_space *mapping,
> >       struct folio *last = NULL;
> >       struct page *page;
> >
> > -     mapping_clear_unevictable(mapping);
> > -
>
> The assymmetry here between the alloc vs. free is a bit annoying.
> Maybe we can just keep this here?

My thought on this was that freeing the sg table is orthogonal to
making the mapping evictable, and shmem_sg_free_table() can be
simplified to not even take the mapping as a parameter.

>
> Or if avoiding the ping-pong actually mattes in the gtt prepare
> error case, then maybe we should rename this guy into
> __shmem_sg_free_table() without the mapping_clear_unevictable()
> and wrap it in a higher level shmem_sg_free_table() that does
> everything?
>
> >       folio_batch_init(&fbatch);
> >       for_each_sgt_page(page, sgt_iter, st) {
> >               struct folio *folio = page_folio(page);
> > @@ -180,10 +178,10 @@ int shmem_sg_alloc_table(struct drm_i915_private *i915, struct sg_table *st,
> >       return 0;
> >  err_sg:
> >       sg_mark_end(sg);
> > +     mapping_clear_unevictable(mapping);
> >       if (sg != st->sgl) {
> >               shmem_sg_free_table(st, mapping, false, false);
> >       } else {
> > -             mapping_clear_unevictable(mapping);
> >               sg_free_table(st);
> >       }
> >
> > @@ -209,8 +207,6 @@ static int shmem_get_pages(struct drm_i915_gem_object *obj)
> >       struct address_space *mapping = obj->base.filp->f_mapping;
> >       unsigned int max_segment = i915_sg_segment_size(i915->drm.dev);
> >       struct sg_table *st;
> > -     struct sgt_iter sgt_iter;
> > -     struct page *page;
> >       int ret;
> >
> >       /*
> > @@ -239,9 +235,8 @@ static int shmem_get_pages(struct drm_i915_gem_object *obj)
> >                * for PAGE_SIZE chunks instead may be helpful.
> >                */
> >               if (max_segment > PAGE_SIZE) {
> > -                     for_each_sgt_page(page, sgt_iter, st)
> > -                             put_page(page);
> > -                     sg_free_table(st);
> > +                     /* Leave the mapping unevictable while we retry */
> > +                     shmem_sg_free_table(st, mapping, false, false);
> >                       kfree(st);
> >
> >                       max_segment = PAGE_SIZE;
> > @@ -265,6 +260,7 @@ static int shmem_get_pages(struct drm_i915_gem_object *obj)
> >       return 0;
> >
> >  err_pages:
> > +     mapping_clear_unevictable(mapping);
> >       shmem_sg_free_table(st, mapping, false, false);
> >       /*
> >        * shmemfs first checks if there is enough memory to allocate the page
> > @@ -402,6 +398,7 @@ void i915_gem_object_put_pages_shmem(struct drm_i915_gem_object *obj, struct sg_
> >       if (i915_gem_object_needs_bit17_swizzle(obj))
> >               i915_gem_object_save_bit_17_swizzle(obj, pages);
> >
> > +     mapping_clear_unevictable(file_inode(obj->base.filp)->i_mapping);
> >       shmem_sg_free_table(pages, file_inode(obj->base.filp)->i_mapping,
> >                           obj->mm.dirty, obj->mm.madv == I915_MADV_WILLNEED);
> >       kfree(pages);
> > --
> > 2.48.0.rc2.279.g1de40edade-goog
>
> --
> Ville Syrjälä
> Intel
Brian Geffon Jan. 16, 2025, 2:48 p.m. UTC | #6
On Thu, Jan 16, 2025 at 9:38 AM Ville Syrjälä
<ville.syrjala@linux.intel.com> wrote:
>
> On Thu, Jan 16, 2025 at 04:24:26PM +0200, Ville Syrjälä wrote:
> > On Thu, Jan 16, 2025 at 08:56:36AM -0500, Brian Geffon wrote:
> > > When converting to folios the cleanup path of shmem_get_pages() was
> > > missed. When a DMA remap fails and the max segment size is greater than
> > > PAGE_SIZE it will attempt to retry the remap with a PAGE_SIZEd segment
> > > size. The cleanup code isn't properly using the folio apis and as a
> > > result isn't handling compound pages correctly.
> > >
> > > Link: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/13487
> > > Fixes: 0b62af28f249 ("i915: convert shmem_sg_free_table() to use a folio_batch")
> > > Signed-off-by: Brian Geffon <bgeffon@google.com>
> > > Suggested-by: Tomasz Figa <tfiga@google.com>
> > > ---
> > >  drivers/gpu/drm/i915/gem/i915_gem_shmem.c | 13 +++++--------
> > >  1 file changed, 5 insertions(+), 8 deletions(-)
> > >
> > > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
> > > index fe69f2c8527d..02ddab5bf5c0 100644
> > > --- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
> > > +++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
> > > @@ -37,8 +37,6 @@ void shmem_sg_free_table(struct sg_table *st, struct address_space *mapping,
> > >     struct folio *last = NULL;
> > >     struct page *page;
> > >
> > > -   mapping_clear_unevictable(mapping);
> > > -
> >
> > The assymmetry here between the alloc vs. free is a bit annoying.
> > Maybe we can just keep this here?
>
> Actually, I guess it's a bit more than just annoying since
> shmem_sg_free_table() is actually used from outside i915_gem_shmem.c
> as well.

You're correct, this was a bad oversight on my part. This patch is
going to require a v2 regardless, how do you feel about just changing
shmem_sg_free_table() to not accept a mapping given that managing the
mapping is really orthogonal to cleaning up the sg table.

>
> >
> > Or if avoiding the ping-pong actually mattes in the gtt prepare
> > error case, then maybe we should rename this guy into
> > __shmem_sg_free_table() without the mapping_clear_unevictable()
> > and wrap it in a higher level shmem_sg_free_table() that does
> > everything?
> >
> > >     folio_batch_init(&fbatch);
> > >     for_each_sgt_page(page, sgt_iter, st) {
> > >             struct folio *folio = page_folio(page);
> > > @@ -180,10 +178,10 @@ int shmem_sg_alloc_table(struct drm_i915_private *i915, struct sg_table *st,
> > >     return 0;
> > >  err_sg:
> > >     sg_mark_end(sg);
> > > +   mapping_clear_unevictable(mapping);
> > >     if (sg != st->sgl) {
> > >             shmem_sg_free_table(st, mapping, false, false);
> > >     } else {
> > > -           mapping_clear_unevictable(mapping);
> > >             sg_free_table(st);
> > >     }
> > >
> > > @@ -209,8 +207,6 @@ static int shmem_get_pages(struct drm_i915_gem_object *obj)
> > >     struct address_space *mapping = obj->base.filp->f_mapping;
> > >     unsigned int max_segment = i915_sg_segment_size(i915->drm.dev);
> > >     struct sg_table *st;
> > > -   struct sgt_iter sgt_iter;
> > > -   struct page *page;
> > >     int ret;
> > >
> > >     /*
> > > @@ -239,9 +235,8 @@ static int shmem_get_pages(struct drm_i915_gem_object *obj)
> > >              * for PAGE_SIZE chunks instead may be helpful.
> > >              */
> > >             if (max_segment > PAGE_SIZE) {
> > > -                   for_each_sgt_page(page, sgt_iter, st)
> > > -                           put_page(page);
> > > -                   sg_free_table(st);
> > > +                   /* Leave the mapping unevictable while we retry */
> > > +                   shmem_sg_free_table(st, mapping, false, false);
> > >                     kfree(st);
> > >
> > >                     max_segment = PAGE_SIZE;
> > > @@ -265,6 +260,7 @@ static int shmem_get_pages(struct drm_i915_gem_object *obj)
> > >     return 0;
> > >
> > >  err_pages:
> > > +   mapping_clear_unevictable(mapping);
> > >     shmem_sg_free_table(st, mapping, false, false);
> > >     /*
> > >      * shmemfs first checks if there is enough memory to allocate the page
> > > @@ -402,6 +398,7 @@ void i915_gem_object_put_pages_shmem(struct drm_i915_gem_object *obj, struct sg_
> > >     if (i915_gem_object_needs_bit17_swizzle(obj))
> > >             i915_gem_object_save_bit_17_swizzle(obj, pages);
> > >
> > > +   mapping_clear_unevictable(file_inode(obj->base.filp)->i_mapping);
> > >     shmem_sg_free_table(pages, file_inode(obj->base.filp)->i_mapping,
> > >                         obj->mm.dirty, obj->mm.madv == I915_MADV_WILLNEED);
> > >     kfree(pages);
> > > --
> > > 2.48.0.rc2.279.g1de40edade-goog
> >
> > --
> > Ville Syrjälä
> > Intel
>
> --
> Ville Syrjälä
> Intel
Ville Syrjala Jan. 16, 2025, 2:51 p.m. UTC | #7
On Thu, Jan 16, 2025 at 04:24:26PM +0200, Ville Syrjälä wrote:
> On Thu, Jan 16, 2025 at 08:56:36AM -0500, Brian Geffon wrote:
> > When converting to folios the cleanup path of shmem_get_pages() was
> > missed. When a DMA remap fails and the max segment size is greater than
> > PAGE_SIZE it will attempt to retry the remap with a PAGE_SIZEd segment
> > size. The cleanup code isn't properly using the folio apis and as a
> > result isn't handling compound pages correctly.
> > 
> > Link: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/13487
> > Fixes: 0b62af28f249 ("i915: convert shmem_sg_free_table() to use a folio_batch")
> > Signed-off-by: Brian Geffon <bgeffon@google.com>
> > Suggested-by: Tomasz Figa <tfiga@google.com>
> > ---
> >  drivers/gpu/drm/i915/gem/i915_gem_shmem.c | 13 +++++--------
> >  1 file changed, 5 insertions(+), 8 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
> > index fe69f2c8527d..02ddab5bf5c0 100644
> > --- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
> > +++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
> > @@ -37,8 +37,6 @@ void shmem_sg_free_table(struct sg_table *st, struct address_space *mapping,
> >  	struct folio *last = NULL;
> >  	struct page *page;
> >  
> > -	mapping_clear_unevictable(mapping);
> > -
> 
> The assymmetry here between the alloc vs. free is a bit annoying.
> Maybe we can just keep this here? 

Hmm, I guess with the current code that avoids the ping-pong
we (at least theoretically) could leak the mapping_set_unevictable()
if both i915_gem_gtt_prepare_pages() fails, and then the the
subsequent shmem_sg_alloc_table() retry fails early enough.

So looks to me like the ping-pong would be the only 100% correct
approach.

> 
> Or if avoiding the ping-pong actually mattes in the gtt prepare
> error case, then maybe we should rename this guy into
> __shmem_sg_free_table() without the mapping_clear_unevictable()
> and wrap it in a higher level shmem_sg_free_table() that does
> everything?
> 
> >  	folio_batch_init(&fbatch);
> >  	for_each_sgt_page(page, sgt_iter, st) {
> >  		struct folio *folio = page_folio(page);
> > @@ -180,10 +178,10 @@ int shmem_sg_alloc_table(struct drm_i915_private *i915, struct sg_table *st,
> >  	return 0;
> >  err_sg:
> >  	sg_mark_end(sg);
> > +	mapping_clear_unevictable(mapping);
> >  	if (sg != st->sgl) {
> >  		shmem_sg_free_table(st, mapping, false, false);
> >  	} else {
> > -		mapping_clear_unevictable(mapping);
> >  		sg_free_table(st);
> >  	}
> >  
> > @@ -209,8 +207,6 @@ static int shmem_get_pages(struct drm_i915_gem_object *obj)
> >  	struct address_space *mapping = obj->base.filp->f_mapping;
> >  	unsigned int max_segment = i915_sg_segment_size(i915->drm.dev);
> >  	struct sg_table *st;
> > -	struct sgt_iter sgt_iter;
> > -	struct page *page;
> >  	int ret;
> >  
> >  	/*
> > @@ -239,9 +235,8 @@ static int shmem_get_pages(struct drm_i915_gem_object *obj)
> >  		 * for PAGE_SIZE chunks instead may be helpful.
> >  		 */
> >  		if (max_segment > PAGE_SIZE) {
> > -			for_each_sgt_page(page, sgt_iter, st)
> > -				put_page(page);
> > -			sg_free_table(st);
> > +			/* Leave the mapping unevictable while we retry */
> > +			shmem_sg_free_table(st, mapping, false, false);
> >  			kfree(st);
> >  
> >  			max_segment = PAGE_SIZE;
> > @@ -265,6 +260,7 @@ static int shmem_get_pages(struct drm_i915_gem_object *obj)
> >  	return 0;
> >  
> >  err_pages:
> > +	mapping_clear_unevictable(mapping);
> >  	shmem_sg_free_table(st, mapping, false, false);
> >  	/*
> >  	 * shmemfs first checks if there is enough memory to allocate the page
> > @@ -402,6 +398,7 @@ void i915_gem_object_put_pages_shmem(struct drm_i915_gem_object *obj, struct sg_
> >  	if (i915_gem_object_needs_bit17_swizzle(obj))
> >  		i915_gem_object_save_bit_17_swizzle(obj, pages);
> >  
> > +	mapping_clear_unevictable(file_inode(obj->base.filp)->i_mapping);
> >  	shmem_sg_free_table(pages, file_inode(obj->base.filp)->i_mapping,
> >  			    obj->mm.dirty, obj->mm.madv == I915_MADV_WILLNEED);
> >  	kfree(pages);
> > -- 
> > 2.48.0.rc2.279.g1de40edade-goog
> 
> -- 
> Ville Syrjälä
> Intel
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
index fe69f2c8527d..02ddab5bf5c0 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
@@ -37,8 +37,6 @@  void shmem_sg_free_table(struct sg_table *st, struct address_space *mapping,
 	struct folio *last = NULL;
 	struct page *page;
 
-	mapping_clear_unevictable(mapping);
-
 	folio_batch_init(&fbatch);
 	for_each_sgt_page(page, sgt_iter, st) {
 		struct folio *folio = page_folio(page);
@@ -180,10 +178,10 @@  int shmem_sg_alloc_table(struct drm_i915_private *i915, struct sg_table *st,
 	return 0;
 err_sg:
 	sg_mark_end(sg);
+	mapping_clear_unevictable(mapping);
 	if (sg != st->sgl) {
 		shmem_sg_free_table(st, mapping, false, false);
 	} else {
-		mapping_clear_unevictable(mapping);
 		sg_free_table(st);
 	}
 
@@ -209,8 +207,6 @@  static int shmem_get_pages(struct drm_i915_gem_object *obj)
 	struct address_space *mapping = obj->base.filp->f_mapping;
 	unsigned int max_segment = i915_sg_segment_size(i915->drm.dev);
 	struct sg_table *st;
-	struct sgt_iter sgt_iter;
-	struct page *page;
 	int ret;
 
 	/*
@@ -239,9 +235,8 @@  static int shmem_get_pages(struct drm_i915_gem_object *obj)
 		 * for PAGE_SIZE chunks instead may be helpful.
 		 */
 		if (max_segment > PAGE_SIZE) {
-			for_each_sgt_page(page, sgt_iter, st)
-				put_page(page);
-			sg_free_table(st);
+			/* Leave the mapping unevictable while we retry */
+			shmem_sg_free_table(st, mapping, false, false);
 			kfree(st);
 
 			max_segment = PAGE_SIZE;
@@ -265,6 +260,7 @@  static int shmem_get_pages(struct drm_i915_gem_object *obj)
 	return 0;
 
 err_pages:
+	mapping_clear_unevictable(mapping);
 	shmem_sg_free_table(st, mapping, false, false);
 	/*
 	 * shmemfs first checks if there is enough memory to allocate the page
@@ -402,6 +398,7 @@  void i915_gem_object_put_pages_shmem(struct drm_i915_gem_object *obj, struct sg_
 	if (i915_gem_object_needs_bit17_swizzle(obj))
 		i915_gem_object_save_bit_17_swizzle(obj, pages);
 
+	mapping_clear_unevictable(file_inode(obj->base.filp)->i_mapping);
 	shmem_sg_free_table(pages, file_inode(obj->base.filp)->i_mapping,
 			    obj->mm.dirty, obj->mm.madv == I915_MADV_WILLNEED);
 	kfree(pages);