diff mbox series

[v2,1/3] drm/i915: Introduce refcounted sg-tables

Message ID 20211027105211.485125-2-thomas.hellstrom@linux.intel.com (mailing list archive)
State New, archived
Headers show
Series Prepare error capture for asynchronous migration | expand

Commit Message

Thomas Hellström Oct. 27, 2021, 10:52 a.m. UTC
As we start to introduce asynchronous failsafe object migration,
where we update the object state and then submit asynchronous
commands we need to record what memory resources are actually used
by various part of the command stream. Initially for three purposes:

1) Error capture.
2) Asynchronous migration error recovery.
3) Asynchronous vma bind.

At the time where these happens, the object state may have been updated
to be several migrations ahead and object sg-tables discarded.

In order to make it possible to keep sg-tables with memory resource
information for these operations, introduce refcounted sg-tables that
aren't freed until the last user is done with them.

The alternative would be to reference information sitting on the
corresponding ttm_resources which typically have the same lifetime as
these refcountes sg_tables, but that leads to other awkward constructs:
Due to the design direction chosen for ttm resource managers that would
lead to diamond-style inheritance, the LMEM resources may sometimes be
prematurely freed, and finally the subclassed struct ttm_resource would
have to bleed into the asynchronous vma bind code.

Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
---
 drivers/gpu/drm/i915/gem/i915_gem_object.h    |   4 +-
 .../gpu/drm/i915/gem/i915_gem_object_types.h  |   3 +-
 drivers/gpu/drm/i915/gem/i915_gem_shmem.c     |  16 +-
 drivers/gpu/drm/i915/gem/i915_gem_ttm.c       | 188 +++++++++++-------
 drivers/gpu/drm/i915/i915_scatterlist.c       |  62 ++++--
 drivers/gpu/drm/i915/i915_scatterlist.h       |  76 ++++++-
 drivers/gpu/drm/i915/intel_region_ttm.c       |  15 +-
 drivers/gpu/drm/i915/intel_region_ttm.h       |   5 +-
 drivers/gpu/drm/i915/selftests/mock_region.c  |  12 +-
 9 files changed, 262 insertions(+), 119 deletions(-)

Comments

Matthew Auld Oct. 27, 2021, 6:03 p.m. UTC | #1
On 27/10/2021 11:52, Thomas Hellström wrote:
> As we start to introduce asynchronous failsafe object migration,
> where we update the object state and then submit asynchronous
> commands we need to record what memory resources are actually used
> by various part of the command stream. Initially for three purposes:
> 
> 1) Error capture.
> 2) Asynchronous migration error recovery.
> 3) Asynchronous vma bind.
> 
> At the time where these happens, the object state may have been updated
> to be several migrations ahead and object sg-tables discarded.
> 
> In order to make it possible to keep sg-tables with memory resource
> information for these operations, introduce refcounted sg-tables that
> aren't freed until the last user is done with them.
> 
> The alternative would be to reference information sitting on the
> corresponding ttm_resources which typically have the same lifetime as
> these refcountes sg_tables, but that leads to other awkward constructs:
> Due to the design direction chosen for ttm resource managers that would
> lead to diamond-style inheritance, the LMEM resources may sometimes be
> prematurely freed, and finally the subclassed struct ttm_resource would
> have to bleed into the asynchronous vma bind code.
> 
> Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
> ---
>   drivers/gpu/drm/i915/gem/i915_gem_object.h    |   4 +-
>   .../gpu/drm/i915/gem/i915_gem_object_types.h  |   3 +-
>   drivers/gpu/drm/i915/gem/i915_gem_shmem.c     |  16 +-
>   drivers/gpu/drm/i915/gem/i915_gem_ttm.c       | 188 +++++++++++-------
>   drivers/gpu/drm/i915/i915_scatterlist.c       |  62 ++++--
>   drivers/gpu/drm/i915/i915_scatterlist.h       |  76 ++++++-
>   drivers/gpu/drm/i915/intel_region_ttm.c       |  15 +-
>   drivers/gpu/drm/i915/intel_region_ttm.h       |   5 +-
>   drivers/gpu/drm/i915/selftests/mock_region.c  |  12 +-
>   9 files changed, 262 insertions(+), 119 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h
> index a5479ac7a4ad..c5ab364d4311 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_object.h
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h
> @@ -624,8 +624,8 @@ struct sg_table *shmem_alloc_st(struct drm_i915_private *i915,
>   				size_t size, struct intel_memory_region *mr,
>   				struct address_space *mapping,
>   				unsigned int max_segment);
> -void shmem_free_st(struct sg_table *st, struct address_space *mapping,
> -		   bool dirty, bool backup);
> +void shmem_free_st_table(struct sg_table *st, struct address_space *mapping,
> +			 bool dirty, bool backup);

s/st_table/sg_table/?

I thought st was shorthand for sg_table? Maybe shmem_sg_free_table?


>   void __shmem_writeback(size_t size, struct address_space *mapping);
>   
>   #ifdef CONFIG_MMU_NOTIFIER
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
> index a4b69a43b898..604ed5ad77f5 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
> @@ -544,6 +544,7 @@ struct drm_i915_gem_object {
>   		 */
>   		struct list_head region_link;
>   
> +		struct i915_refct_sgt *rsgt;
>   		struct sg_table *pages;
>   		void *mapping;
>   
> @@ -597,7 +598,7 @@ struct drm_i915_gem_object {
>   	} mm;
>   
>   	struct {
> -		struct sg_table *cached_io_st;
> +		struct i915_refct_sgt *cached_io_rsgt;
>   		struct i915_gem_object_page_iter get_io_page;
>   		struct drm_i915_gem_object *backup;
>   		bool created:1;
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
> index 01f332d8dbde..67c6bee695c7 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
> @@ -25,8 +25,8 @@ static void check_release_pagevec(struct pagevec *pvec)
>   	cond_resched();
>   }
>   
> -void shmem_free_st(struct sg_table *st, struct address_space *mapping,
> -		   bool dirty, bool backup)
> +void shmem_free_st_table(struct sg_table *st, struct address_space *mapping,
> +			 bool dirty, bool backup)
>   {
>   	struct sgt_iter sgt_iter;
>   	struct pagevec pvec;
> @@ -49,7 +49,6 @@ void shmem_free_st(struct sg_table *st, struct address_space *mapping,
>   		check_release_pagevec(&pvec);
>   
>   	sg_free_table(st);
> -	kfree(st);
>   }
>   
>   struct sg_table *shmem_alloc_st(struct drm_i915_private *i915,
> @@ -171,7 +170,8 @@ struct sg_table *shmem_alloc_st(struct drm_i915_private *i915,
>   err_sg:
>   	sg_mark_end(sg);
>   	if (sg != st->sgl) {
> -		shmem_free_st(st, mapping, false, false);
> +		shmem_free_st_table(st, mapping, false, false);
> +		kfree(st);
>   	} else {
>   		mapping_clear_unevictable(mapping);
>   		sg_free_table(st);
> @@ -254,7 +254,8 @@ static int shmem_get_pages(struct drm_i915_gem_object *obj)
>   	return 0;
>   
>   err_pages:
> -	shmem_free_st(st, mapping, false, false);
> +	shmem_free_st_table(st, mapping, false, false);
> +	kfree(st);
>   	/*
>   	 * shmemfs first checks if there is enough memory to allocate the page
>   	 * and reports ENOSPC should there be insufficient, along with the usual
> @@ -374,8 +375,9 @@ void i915_gem_object_put_pages_shmem(struct drm_i915_gem_object *obj, struct sg_
>   	if (i915_gem_object_needs_bit17_swizzle(obj))
>   		i915_gem_object_save_bit_17_swizzle(obj, pages);
>   
> -	shmem_free_st(pages, file_inode(obj->base.filp)->i_mapping,
> -		      obj->mm.dirty, obj->mm.madv == I915_MADV_WILLNEED);
> +	shmem_free_st_table(pages, file_inode(obj->base.filp)->i_mapping,
> +			    obj->mm.dirty, obj->mm.madv == I915_MADV_WILLNEED);
> +	kfree(pages);
>   	obj->mm.dirty = false;
>   }
>   
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
> index 4fd2edb20dd9..6826e3859e18 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
> @@ -34,7 +34,7 @@
>    * struct i915_ttm_tt - TTM page vector with additional private information
>    * @ttm: The base TTM page vector.
>    * @dev: The struct device used for dma mapping and unmapping.
> - * @cached_st: The cached scatter-gather table.
> + * @cached_rsgt: The cached scatter-gather table.
>    * @is_shmem: Set if using shmem.
>    * @filp: The shmem file, if using shmem backend.
>    *
> @@ -47,7 +47,7 @@
>   struct i915_ttm_tt {
>   	struct ttm_tt ttm;
>   	struct device *dev;
> -	struct sg_table *cached_st;
> +	struct i915_refct_sgt cached_rsgt;
>   
>   	bool is_shmem;
>   	struct file *filp;
> @@ -221,14 +221,10 @@ static int i915_ttm_tt_shmem_populate(struct ttm_device *bdev,
>   	if (IS_ERR(st))
>   		return PTR_ERR(st);
>   
> -	err = dma_map_sg_attrs(i915_tt->dev,
> -			       st->sgl, st->nents,
> -			       DMA_BIDIRECTIONAL,
> -			       DMA_ATTR_SKIP_CPU_SYNC);
> -	if (err <= 0) {
> -		err = -EINVAL;
> +	err = dma_map_sgtable(i915_tt->dev, st, DMA_BIDIRECTIONAL,
> +			      DMA_ATTR_SKIP_CPU_SYNC);
> +	if (err)
>   		goto err_free_st;
> -	}
>   
>   	i = 0;
>   	for_each_sgt_page(page, sgt_iter, st)
> @@ -237,11 +233,15 @@ static int i915_ttm_tt_shmem_populate(struct ttm_device *bdev,
>   	if (ttm->page_flags & TTM_TT_FLAG_SWAPPED)
>   		ttm->page_flags &= ~TTM_TT_FLAG_SWAPPED;
>   
> -	i915_tt->cached_st = st;
> +	i915_tt->cached_rsgt.table = *st;
> +	kfree(st);

Will it work if the above just operates on the rsgt.table?

> +
>   	return 0;
>   
>   err_free_st:
> -	shmem_free_st(st, filp->f_mapping, false, false);
> +	shmem_free_st_table(st, filp->f_mapping, false, false);
> +	kfree(st);
> +
>   	return err;
>   }
>   
> @@ -249,16 +249,29 @@ static void i915_ttm_tt_shmem_unpopulate(struct ttm_tt *ttm)
>   {
>   	struct i915_ttm_tt *i915_tt = container_of(ttm, typeof(*i915_tt), ttm);
>   	bool backup = ttm->page_flags & TTM_TT_FLAG_SWAPPED;
> +	struct sg_table *st = &i915_tt->cached_rsgt.table;
>   
> -	dma_unmap_sg(i915_tt->dev, i915_tt->cached_st->sgl,
> -		     i915_tt->cached_st->nents,
> -		     DMA_BIDIRECTIONAL);
> +	if (st->sgl)

Can we ever hit this? I would have presumed not? Below also.

> +		shmem_free_st_table(st,
> +				    file_inode(i915_tt->filp)->i_mapping,
> +				    backup, backup);
> +}
>   
> -	shmem_free_st(fetch_and_zero(&i915_tt->cached_st),
> -		      file_inode(i915_tt->filp)->i_mapping,
> -		      backup, backup);
> +static void i915_ttm_tt_release(struct kref *ref)
> +{
> +	struct i915_ttm_tt *i915_tt =
> +		container_of(ref, typeof(*i915_tt), cached_rsgt.kref);
> +	struct sg_table *st = &i915_tt->cached_rsgt.table;
> +
> +	GEM_WARN_ON(st->sgl);
> +
> +	kfree(i915_tt);
>   }
>   
> +static const struct i915_refct_sgt_ops tt_rsgt_ops = {
> +	.release = i915_ttm_tt_release
> +};
> +
>   static struct ttm_tt *i915_ttm_tt_create(struct ttm_buffer_object *bo,
>   					 uint32_t page_flags)
>   {
> @@ -287,6 +300,9 @@ static struct ttm_tt *i915_ttm_tt_create(struct ttm_buffer_object *bo,
>   	if (ret)
>   		goto err_free;
>   
> +	i915_refct_sgt_init_ops(&i915_tt->cached_rsgt, bo->base.size,
> +				&tt_rsgt_ops);
> +
>   	i915_tt->dev = obj->base.dev->dev;
>   
>   	return &i915_tt->ttm;
> @@ -311,17 +327,15 @@ static int i915_ttm_tt_populate(struct ttm_device *bdev,
>   static void i915_ttm_tt_unpopulate(struct ttm_device *bdev, struct ttm_tt *ttm)
>   {
>   	struct i915_ttm_tt *i915_tt = container_of(ttm, typeof(*i915_tt), ttm);
> +	struct sg_table *st = &i915_tt->cached_rsgt.table;
> +
> +	if (st->sgl)
> +		dma_unmap_sgtable(i915_tt->dev, st, DMA_BIDIRECTIONAL, 0);
>   
>   	if (i915_tt->is_shmem) {
>   		i915_ttm_tt_shmem_unpopulate(ttm);
>   	} else {
> -		if (i915_tt->cached_st) {
> -			dma_unmap_sgtable(i915_tt->dev, i915_tt->cached_st,
> -					  DMA_BIDIRECTIONAL, 0);
> -			sg_free_table(i915_tt->cached_st);
> -			kfree(i915_tt->cached_st);
> -			i915_tt->cached_st = NULL;
> -		}
> +		sg_free_table(st);
>   		ttm_pool_free(&bdev->pool, ttm);
>   	}
>   }
> @@ -334,7 +348,7 @@ static void i915_ttm_tt_destroy(struct ttm_device *bdev, struct ttm_tt *ttm)
>   		fput(i915_tt->filp);
>   
>   	ttm_tt_fini(ttm);
> -	kfree(i915_tt);
> +	i915_refct_sgt_put(&i915_tt->cached_rsgt);
>   }
>   
>   static bool i915_ttm_eviction_valuable(struct ttm_buffer_object *bo,
> @@ -376,12 +390,12 @@ static int i915_ttm_move_notify(struct ttm_buffer_object *bo)
>   	return 0;
>   }
>   
> -static void i915_ttm_free_cached_io_st(struct drm_i915_gem_object *obj)
> +static void i915_ttm_free_cached_io_rsgt(struct drm_i915_gem_object *obj)
>   {
>   	struct radix_tree_iter iter;
>   	void __rcu **slot;
>   
> -	if (!obj->ttm.cached_io_st)
> +	if (!obj->ttm.cached_io_rsgt)
>   		return;
>   
>   	rcu_read_lock();
> @@ -389,9 +403,8 @@ static void i915_ttm_free_cached_io_st(struct drm_i915_gem_object *obj)
>   		radix_tree_delete(&obj->ttm.get_io_page.radix, iter.index);
>   	rcu_read_unlock();
>   
> -	sg_free_table(obj->ttm.cached_io_st);
> -	kfree(obj->ttm.cached_io_st);
> -	obj->ttm.cached_io_st = NULL;
> +	i915_refct_sgt_put(obj->ttm.cached_io_rsgt);
> +	obj->ttm.cached_io_rsgt = NULL;
>   }
>   
>   static void
> @@ -477,7 +490,7 @@ static int i915_ttm_purge(struct drm_i915_gem_object *obj)
>   	obj->write_domain = 0;
>   	obj->read_domains = 0;
>   	i915_ttm_adjust_gem_after_move(obj);
> -	i915_ttm_free_cached_io_st(obj);
> +	i915_ttm_free_cached_io_rsgt(obj);
>   	obj->mm.madv = __I915_MADV_PURGED;
>   	return 0;
>   }
> @@ -532,7 +545,7 @@ static void i915_ttm_swap_notify(struct ttm_buffer_object *bo)
>   	int ret = i915_ttm_move_notify(bo);
>   
>   	GEM_WARN_ON(ret);
> -	GEM_WARN_ON(obj->ttm.cached_io_st);
> +	GEM_WARN_ON(obj->ttm.cached_io_rsgt);
>   	if (!ret && obj->mm.madv != I915_MADV_WILLNEED)
>   		i915_ttm_purge(obj);
>   }
> @@ -543,7 +556,7 @@ static void i915_ttm_delete_mem_notify(struct ttm_buffer_object *bo)
>   
>   	if (likely(obj)) {
>   		__i915_gem_object_pages_fini(obj);
> -		i915_ttm_free_cached_io_st(obj);
> +		i915_ttm_free_cached_io_rsgt(obj);
>   	}
>   }
>   
> @@ -563,40 +576,35 @@ i915_ttm_region(struct ttm_device *bdev, int ttm_mem_type)
>   					  ttm_mem_type - I915_PL_LMEM0);
>   }
>   
> -static struct sg_table *i915_ttm_tt_get_st(struct ttm_tt *ttm)
> +static struct i915_refct_sgt *i915_ttm_tt_get_st(struct ttm_tt *ttm)
>   {
>   	struct i915_ttm_tt *i915_tt = container_of(ttm, typeof(*i915_tt), ttm);
>   	struct sg_table *st;
>   	int ret;
>   
> -	if (i915_tt->cached_st)
> -		return i915_tt->cached_st;
> -
> -	st = kzalloc(sizeof(*st), GFP_KERNEL);
> -	if (!st)
> -		return ERR_PTR(-ENOMEM);
> +	if (i915_tt->cached_rsgt.table.sgl)
> +		return i915_refct_sgt_get(&i915_tt->cached_rsgt);
>   
> +	st = &i915_tt->cached_rsgt.table;
>   	ret = sg_alloc_table_from_pages_segment(st,
>   			ttm->pages, ttm->num_pages,
>   			0, (unsigned long)ttm->num_pages << PAGE_SHIFT,
>   			i915_sg_segment_size(), GFP_KERNEL);
>   	if (ret) {
> -		kfree(st);
> +		st->sgl = NULL;
>   		return ERR_PTR(ret);
>   	}
>   
>   	ret = dma_map_sgtable(i915_tt->dev, st, DMA_BIDIRECTIONAL, 0);
>   	if (ret) {
>   		sg_free_table(st);
> -		kfree(st);
>   		return ERR_PTR(ret);
>   	}
>   
> -	i915_tt->cached_st = st;
> -	return st;
> +	return i915_refct_sgt_get(&i915_tt->cached_rsgt);
>   }
>   
> -static struct sg_table *
> +static struct i915_refct_sgt *
>   i915_ttm_resource_get_st(struct drm_i915_gem_object *obj,
>   			 struct ttm_resource *res)
>   {
> @@ -610,7 +618,21 @@ i915_ttm_resource_get_st(struct drm_i915_gem_object *obj,
>   	 * the resulting st. Might make sense for GGTT.
>   	 */
>   	GEM_WARN_ON(!cpu_maps_iomem(res));
> -	return intel_region_ttm_resource_to_st(obj->mm.region, res);
> +	if (bo->resource == res) {
> +		if (!obj->ttm.cached_io_rsgt) {
> +			struct i915_refct_sgt *rsgt;
> +
> +			rsgt = intel_region_ttm_resource_to_rsgt(obj->mm.region,
> +								 res);
> +			if (IS_ERR(rsgt))
> +				return rsgt;
> +
> +			obj->ttm.cached_io_rsgt = rsgt;
> +		}
> +		return i915_refct_sgt_get(obj->ttm.cached_io_rsgt);
> +	}
> +
> +	return intel_region_ttm_resource_to_rsgt(obj->mm.region, res);
>   }
>   
>   static int i915_ttm_accel_move(struct ttm_buffer_object *bo,
> @@ -621,10 +643,7 @@ static int i915_ttm_accel_move(struct ttm_buffer_object *bo,
>   {
>   	struct drm_i915_private *i915 = container_of(bo->bdev, typeof(*i915),
>   						     bdev);
> -	struct ttm_resource_manager *src_man =
> -		ttm_manager_type(bo->bdev, bo->resource->mem_type);
>   	struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo);
> -	struct sg_table *src_st;
>   	struct i915_request *rq;
>   	struct ttm_tt *src_ttm = bo->ttm;
>   	enum i915_cache_level src_level, dst_level;
> @@ -650,17 +669,22 @@ static int i915_ttm_accel_move(struct ttm_buffer_object *bo,
>   		}
>   		intel_engine_pm_put(i915->gt.migrate.context->engine);
>   	} else {
> -		src_st = src_man->use_tt ? i915_ttm_tt_get_st(src_ttm) :
> -			obj->ttm.cached_io_st;
> +		struct i915_refct_sgt *src_rsgt =
> +			i915_ttm_resource_get_st(obj, bo->resource);
> +
> +		if (IS_ERR(src_rsgt))
> +			return PTR_ERR(src_rsgt);
>   
>   		src_level = i915_ttm_cache_level(i915, bo->resource, src_ttm);
>   		intel_engine_pm_get(i915->gt.migrate.context->engine);
>   		ret = intel_context_migrate_copy(i915->gt.migrate.context,
> -						 NULL, src_st->sgl, src_level,
> +						 NULL, src_rsgt->table.sgl,
> +						 src_level,
>   						 gpu_binds_iomem(bo->resource),
>   						 dst_st->sgl, dst_level,
>   						 gpu_binds_iomem(dst_mem),
>   						 &rq);
> +		i915_refct_sgt_put(src_rsgt);
>   		if (!ret && rq) {
>   			i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT);
>   			i915_request_put(rq);
> @@ -674,13 +698,14 @@ static int i915_ttm_accel_move(struct ttm_buffer_object *bo,
>   static void __i915_ttm_move(struct ttm_buffer_object *bo, bool clear,
>   			    struct ttm_resource *dst_mem,
>   			    struct ttm_tt *dst_ttm,
> -			    struct sg_table *dst_st,
> +			    struct i915_refct_sgt *dst_rsgt,
>   			    bool allow_accel)
>   {
>   	int ret = -EINVAL;
>   
>   	if (allow_accel)
> -		ret = i915_ttm_accel_move(bo, clear, dst_mem, dst_ttm, dst_st);
> +		ret = i915_ttm_accel_move(bo, clear, dst_mem, dst_ttm,
> +					  &dst_rsgt->table);
>   	if (ret) {
>   		struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo);
>   		struct intel_memory_region *dst_reg, *src_reg;
> @@ -697,12 +722,13 @@ static void __i915_ttm_move(struct ttm_buffer_object *bo, bool clear,
>   		dst_iter = !cpu_maps_iomem(dst_mem) ?
>   			ttm_kmap_iter_tt_init(&_dst_iter.tt, dst_ttm) :
>   			ttm_kmap_iter_iomap_init(&_dst_iter.io, &dst_reg->iomap,
> -						 dst_st, dst_reg->region.start);
> +						 &dst_rsgt->table,
> +						 dst_reg->region.start);
>   
>   		src_iter = !cpu_maps_iomem(bo->resource) ?
>   			ttm_kmap_iter_tt_init(&_src_iter.tt, bo->ttm) :
>   			ttm_kmap_iter_iomap_init(&_src_iter.io, &src_reg->iomap,
> -						 obj->ttm.cached_io_st,
> +						 &obj->ttm.cached_io_rsgt->table,
>   						 src_reg->region.start);
>   
>   		ttm_move_memcpy(clear, dst_mem->num_pages, dst_iter, src_iter);
> @@ -718,7 +744,7 @@ static int i915_ttm_move(struct ttm_buffer_object *bo, bool evict,
>   	struct ttm_resource_manager *dst_man =
>   		ttm_manager_type(bo->bdev, dst_mem->mem_type);
>   	struct ttm_tt *ttm = bo->ttm;
> -	struct sg_table *dst_st;
> +	struct i915_refct_sgt *dst_rsgt;
>   	bool clear;
>   	int ret;
>   
> @@ -744,22 +770,24 @@ static int i915_ttm_move(struct ttm_buffer_object *bo, bool evict,
>   			return ret;
>   	}
>   
> -	dst_st = i915_ttm_resource_get_st(obj, dst_mem);
> -	if (IS_ERR(dst_st))
> -		return PTR_ERR(dst_st);
> +	dst_rsgt = i915_ttm_resource_get_st(obj, dst_mem);
> +	if (IS_ERR(dst_rsgt))
> +		return PTR_ERR(dst_rsgt);
>   
>   	clear = !cpu_maps_iomem(bo->resource) && (!ttm || !ttm_tt_is_populated(ttm));
>   	if (!(clear && ttm && !(ttm->page_flags & TTM_TT_FLAG_ZERO_ALLOC)))
> -		__i915_ttm_move(bo, clear, dst_mem, bo->ttm, dst_st, true);
> +		__i915_ttm_move(bo, clear, dst_mem, bo->ttm, dst_rsgt, true);
>   
>   	ttm_bo_move_sync_cleanup(bo, dst_mem);
>   	i915_ttm_adjust_domains_after_move(obj);
> -	i915_ttm_free_cached_io_st(obj);
> +	i915_ttm_free_cached_io_rsgt(obj);
>   
>   	if (gpu_binds_iomem(dst_mem) || cpu_maps_iomem(dst_mem)) {
> -		obj->ttm.cached_io_st = dst_st;
> -		obj->ttm.get_io_page.sg_pos = dst_st->sgl;
> +		obj->ttm.cached_io_rsgt = dst_rsgt;
> +		obj->ttm.get_io_page.sg_pos = dst_rsgt->table.sgl;
>   		obj->ttm.get_io_page.sg_idx = 0;
> +	} else {
> +		i915_refct_sgt_put(dst_rsgt);
>   	}
>   
>   	i915_ttm_adjust_lru(obj);
> @@ -825,7 +853,6 @@ static int __i915_ttm_get_pages(struct drm_i915_gem_object *obj,
>   		.interruptible = true,
>   		.no_wait_gpu = false,
>   	};
> -	struct sg_table *st;
>   	int real_num_busy;
>   	int ret;
>   
> @@ -862,12 +889,16 @@ static int __i915_ttm_get_pages(struct drm_i915_gem_object *obj,
>   	}
>   
>   	if (!i915_gem_object_has_pages(obj)) {
> -		/* Object either has a page vector or is an iomem object */
> -		st = bo->ttm ? i915_ttm_tt_get_st(bo->ttm) : obj->ttm.cached_io_st;
> -		if (IS_ERR(st))
> -			return PTR_ERR(st);
> +		struct i915_refct_sgt *rsgt =
> +			i915_ttm_resource_get_st(obj, bo->resource);
> +
> +		if (IS_ERR(rsgt))
> +			return PTR_ERR(rsgt);
>   
> -		__i915_gem_object_set_pages(obj, st, i915_sg_dma_sizes(st->sgl));
> +		GEM_BUG_ON(obj->mm.rsgt);
> +		obj->mm.rsgt = rsgt;
> +		__i915_gem_object_set_pages(obj, &rsgt->table,
> +					    i915_sg_dma_sizes(rsgt->table.sgl));
>   	}
>   
>   	i915_ttm_adjust_lru(obj);
> @@ -941,6 +972,13 @@ static void i915_ttm_put_pages(struct drm_i915_gem_object *obj,
>   	 * If the object is not destroyed next, The TTM eviction logic
>   	 * and shrinkers will move it out if needed.
>   	 */
> +
> +	if (obj->mm.rsgt) {
> +		i915_refct_sgt_put(obj->mm.rsgt);
> +		obj->mm.rsgt = NULL;
> +	}

i915_refct_sgt_put(fetch_and_zero(&obj->mm.rsgt)) ?

> +
> +	i915_ttm_adjust_lru(obj);

Unrelated change, or do I need to look closer?

>   }
>   
>   static void i915_ttm_adjust_lru(struct drm_i915_gem_object *obj)
> @@ -1278,7 +1316,7 @@ int i915_gem_obj_copy_ttm(struct drm_i915_gem_object *dst,
>   	struct ttm_operation_ctx ctx = {
>   		.interruptible = intr,
>   	};
> -	struct sg_table *dst_st;
> +	struct i915_refct_sgt *dst_rsgt;
>   	int ret;
>   
>   	assert_object_held(dst);
> @@ -1293,11 +1331,11 @@ int i915_gem_obj_copy_ttm(struct drm_i915_gem_object *dst,
>   	if (ret)
>   		return ret;
>   
> -	dst_st = gpu_binds_iomem(dst_bo->resource) ?
> -		dst->ttm.cached_io_st : i915_ttm_tt_get_st(dst_bo->ttm);
> -
> +	dst_rsgt = i915_ttm_resource_get_st(dst, dst_bo->resource);
>   	__i915_ttm_move(src_bo, false, dst_bo->resource, dst_bo->ttm,
> -			dst_st, allow_accel);
> +			dst_rsgt, allow_accel);
> +
> +	i915_refct_sgt_put(dst_rsgt);
>   
>   	return 0;
>   }
> diff --git a/drivers/gpu/drm/i915/i915_scatterlist.c b/drivers/gpu/drm/i915/i915_scatterlist.c
> index 4a6712dca838..8a510ee5d1ad 100644
> --- a/drivers/gpu/drm/i915/i915_scatterlist.c
> +++ b/drivers/gpu/drm/i915/i915_scatterlist.c
> @@ -41,8 +41,32 @@ bool i915_sg_trim(struct sg_table *orig_st)
>   	return true;
>   }
>   
> +static void i915_refct_sgt_release(struct kref *ref)
> +{
> +	struct i915_refct_sgt *rsgt =
> +		container_of(ref, typeof(*rsgt), kref);
> +
> +	sg_free_table(&rsgt->table);
> +	kfree(rsgt);
> +}
> +
> +static const struct i915_refct_sgt_ops rsgt_ops = {
> +	.release = i915_refct_sgt_release
> +};
> +
> +/**
> + * i915_refct_sgt_init - Initialize a struct i915_refct_sgt with default ops
> + * @rsgt: The struct i915_refct_sgt to initialize.
> + * size: The size of the underlying memory buffer.
> + */
> +void i915_refct_sgt_init(struct i915_refct_sgt *rsgt, size_t size)
> +{
> +	i915_refct_sgt_init_ops(rsgt, size, &rsgt_ops);
> +}
> +
>   /**
> - * i915_sg_from_mm_node - Create an sg_table from a struct drm_mm_node
> + * i915_rsgt_from_mm_node - Create a refcounted sg_table from a struct
> + * drm_mm_node
>    * @node: The drm_mm_node.
>    * @region_start: An offset to add to the dma addresses of the sg list.
>    *
> @@ -50,25 +74,28 @@ bool i915_sg_trim(struct sg_table *orig_st)
>    * taking a maximum segment length into account, splitting into segments
>    * if necessary.
>    *
> - * Return: A pointer to a kmalloced struct sg_table on success, negative
> + * Return: A pointer to a kmalloced struct i915_refct_sgt on success, negative
>    * error code cast to an error pointer on failure.
>    */
> -struct sg_table *i915_sg_from_mm_node(const struct drm_mm_node *node,
> -				      u64 region_start)
> +struct i915_refct_sgt *i915_rsgt_from_mm_node(const struct drm_mm_node *node,
> +					      u64 region_start)
>   {
>   	const u64 max_segment = SZ_1G; /* Do we have a limit on this? */
>   	u64 segment_pages = max_segment >> PAGE_SHIFT;
>   	u64 block_size, offset, prev_end;
> +	struct i915_refct_sgt *rsgt;
>   	struct sg_table *st;
>   	struct scatterlist *sg;
>   
> -	st = kmalloc(sizeof(*st), GFP_KERNEL);
> -	if (!st)
> +	rsgt = kmalloc(sizeof(*rsgt), GFP_KERNEL);
> +	if (!rsgt)
>   		return ERR_PTR(-ENOMEM);
>   
> +	i915_refct_sgt_init(rsgt, node->size << PAGE_SHIFT);
> +	st = &rsgt->table;
>   	if (sg_alloc_table(st, DIV_ROUND_UP(node->size, segment_pages),
>   			   GFP_KERNEL)) {
> -		kfree(st);
> +		i915_refct_sgt_put(rsgt);
>   		return ERR_PTR(-ENOMEM);
>   	}
>   
> @@ -104,11 +131,11 @@ struct sg_table *i915_sg_from_mm_node(const struct drm_mm_node *node,
>   	sg_mark_end(sg);
>   	i915_sg_trim(st);
>   
> -	return st;
> +	return rsgt;
>   }
>   
>   /**
> - * i915_sg_from_buddy_resource - Create an sg_table from a struct
> + * i915_rsgt_from_buddy_resource - Create a refcounted sg_table from a struct
>    * i915_buddy_block list
>    * @res: The struct i915_ttm_buddy_resource.
>    * @region_start: An offset to add to the dma addresses of the sg list.
> @@ -117,11 +144,11 @@ struct sg_table *i915_sg_from_mm_node(const struct drm_mm_node *node,
>    * taking a maximum segment length into account, splitting into segments
>    * if necessary.
>    *
> - * Return: A pointer to a kmalloced struct sg_table on success, negative
> + * Return: A pointer to a kmalloced struct i915_refct_sgts on success, negative
>    * error code cast to an error pointer on failure.
>    */
> -struct sg_table *i915_sg_from_buddy_resource(struct ttm_resource *res,
> -					     u64 region_start)
> +struct i915_refct_sgt *i915_rsgt_from_buddy_resource(struct ttm_resource *res,
> +						     u64 region_start)
>   {
>   	struct i915_ttm_buddy_resource *bman_res = to_ttm_buddy_resource(res);
>   	const u64 size = res->num_pages << PAGE_SHIFT;
> @@ -129,18 +156,21 @@ struct sg_table *i915_sg_from_buddy_resource(struct ttm_resource *res,
>   	struct i915_buddy_mm *mm = bman_res->mm;
>   	struct list_head *blocks = &bman_res->blocks;
>   	struct i915_buddy_block *block;
> +	struct i915_refct_sgt *rsgt;
>   	struct scatterlist *sg;
>   	struct sg_table *st;
>   	resource_size_t prev_end;
>   
>   	GEM_BUG_ON(list_empty(blocks));
>   
> -	st = kmalloc(sizeof(*st), GFP_KERNEL);
> -	if (!st)
> +	rsgt = kmalloc(sizeof(*rsgt), GFP_KERNEL);
> +	if (!rsgt)
>   		return ERR_PTR(-ENOMEM);
>   
> +	i915_refct_sgt_init(rsgt, size);
> +	st = &rsgt->table;
>   	if (sg_alloc_table(st, res->num_pages, GFP_KERNEL)) {
> -		kfree(st);
> +		i915_refct_sgt_put(rsgt);
>   		return ERR_PTR(-ENOMEM);
>   	}
>   
> @@ -181,7 +211,7 @@ struct sg_table *i915_sg_from_buddy_resource(struct ttm_resource *res,
>   	sg_mark_end(sg);
>   	i915_sg_trim(st);
>   
> -	return st;
> +	return rsgt;
>   }
>   
>   #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
> diff --git a/drivers/gpu/drm/i915/i915_scatterlist.h b/drivers/gpu/drm/i915/i915_scatterlist.h
> index b8bd5925b03f..321fd4a9f777 100644
> --- a/drivers/gpu/drm/i915/i915_scatterlist.h
> +++ b/drivers/gpu/drm/i915/i915_scatterlist.h
> @@ -144,10 +144,78 @@ static inline unsigned int i915_sg_segment_size(void)
>   
>   bool i915_sg_trim(struct sg_table *orig_st);
>   
> -struct sg_table *i915_sg_from_mm_node(const struct drm_mm_node *node,
> -				      u64 region_start);
> +/**
> + * struct i915_refct_sgt_ops - Operations structure for struct i915_refct_sgt
> + */
> +struct i915_refct_sgt_ops {
> +	/**
> +	 * release() - Free the memory of the struct i915_refct_sgt
> +	 * @ref: struct kref that is embedded in the struct i915_refct_sgt
> +	 */
> +	void (*release)(struct kref *ref);
> +};
> +
> +/**
> + * struct i915_refct_sgt - A refcounted scatter-gather table
> + * @kref: struct kref for refcounting
> + * @table: struct sg_table holding the scatter-gather table itself. Note that
> + * @table->sgl = NULL can be used to determine whether a scatter-gather table
> + * is present or not.
> + * @size: The size in bytes of the underlying memory buffer
> + * @ops: The operations structure.
> + */
> +struct i915_refct_sgt {
> +	struct kref kref;
> +	struct sg_table table;
> +	size_t size;
> +	const struct i915_refct_sgt_ops *ops;
> +};
> +
> +/**
> + * i915_refct_sgt_put - Put a refcounted sg-table
> + * @rsgt the struct i915_refct_sgt to put.
> + */
> +static inline void i915_refct_sgt_put(struct i915_refct_sgt *rsgt)
> +{
> +	if (rsgt)
> +		kref_put(&rsgt->kref, rsgt->ops->release);
> +}
> +
> +/**
> + * i915_refct_sgt_get - Get a refcounted sg-table
> + * @rsgt the struct i915_refct_sgt to get.
> + */
> +static inline struct i915_refct_sgt *
> +i915_refct_sgt_get(struct i915_refct_sgt *rsgt)
> +{
> +	kref_get(&rsgt->kref);
> +	return rsgt;
> +}
> +
> +/**
> + * i915_refct_sgt_init_ops - Initialize a refcounted sg-list with a custom
> + * operations structure
> + * @rsgt The struct i915_refct_sgt to initialize.
> + * @size: Size in bytes of the underlying memory buffer.
> + * @ops: A customized operations structure in case the refcounted sg-list
> + * is embedded into another structure.
> + */
> +static inline void i915_refct_sgt_init_ops(struct i915_refct_sgt *rsgt,
> +					   size_t size,
> +					   const struct i915_refct_sgt_ops *ops)
> +{
> +	kref_init(&rsgt->kref);
> +	rsgt->table.sgl = NULL;
> +	rsgt->size = size;
> +	rsgt->ops = ops;
> +}

Could maybe make this __i915_refct_sgt_init(), since it inits more than 
just the ops?

> +
> +void i915_refct_sgt_init(struct i915_refct_sgt *rsgt, size_t size);
> +
> +struct i915_refct_sgt *i915_rsgt_from_mm_node(const struct drm_mm_node *node,
> +					      u64 region_start);
>   
> -struct sg_table *i915_sg_from_buddy_resource(struct ttm_resource *res,
> -					     u64 region_start);
> +struct i915_refct_sgt *i915_rsgt_from_buddy_resource(struct ttm_resource *res,
> +						     u64 region_start);
>   
>   #endif
> diff --git a/drivers/gpu/drm/i915/intel_region_ttm.c b/drivers/gpu/drm/i915/intel_region_ttm.c
> index 98c7339bf8ba..2e901a27e259 100644
> --- a/drivers/gpu/drm/i915/intel_region_ttm.c
> +++ b/drivers/gpu/drm/i915/intel_region_ttm.c
> @@ -115,8 +115,8 @@ void intel_region_ttm_fini(struct intel_memory_region *mem)
>   }
>   
>   /**
> - * intel_region_ttm_resource_to_st - Convert an opaque TTM resource manager resource
> - * to an sg_table.
> + * intel_region_ttm_resource_to_rsgt -
> + * Convert an opaque TTM resource manager resource to a refcounted sg_table.
>    * @mem: The memory region.
>    * @res: The resource manager resource obtained from the TTM resource manager.
>    *
> @@ -126,17 +126,18 @@ void intel_region_ttm_fini(struct intel_memory_region *mem)
>    *
>    * Return: A malloced sg_table on success, an error pointer on failure.
>    */
> -struct sg_table *intel_region_ttm_resource_to_st(struct intel_memory_region *mem,
> -						 struct ttm_resource *res)
> +struct i915_refct_sgt *
> +intel_region_ttm_resource_to_rsgt(struct intel_memory_region *mem,
> +				  struct ttm_resource *res)
>   {
>   	if (mem->is_range_manager) {
>   		struct ttm_range_mgr_node *range_node =
>   			to_ttm_range_mgr_node(res);
>   
> -		return i915_sg_from_mm_node(&range_node->mm_nodes[0],
> -					    mem->region.start);
> +		return i915_rsgt_from_mm_node(&range_node->mm_nodes[0],
> +					      mem->region.start);
>   	} else {
> -		return i915_sg_from_buddy_resource(res, mem->region.start);
> +		return i915_rsgt_from_buddy_resource(res, mem->region.start);
>   	}
>   }
>   
> diff --git a/drivers/gpu/drm/i915/intel_region_ttm.h b/drivers/gpu/drm/i915/intel_region_ttm.h
> index 6f44075920f2..7bbe2b46b504 100644
> --- a/drivers/gpu/drm/i915/intel_region_ttm.h
> +++ b/drivers/gpu/drm/i915/intel_region_ttm.h
> @@ -22,8 +22,9 @@ int intel_region_ttm_init(struct intel_memory_region *mem);
>   
>   void intel_region_ttm_fini(struct intel_memory_region *mem);
>   
> -struct sg_table *intel_region_ttm_resource_to_st(struct intel_memory_region *mem,
> -						 struct ttm_resource *res);
> +struct i915_refct_sgt *
> +intel_region_ttm_resource_to_rsgt(struct intel_memory_region *mem,
> +				  struct ttm_resource *res);
>   
>   void intel_region_ttm_resource_free(struct intel_memory_region *mem,
>   				    struct ttm_resource *res);
> diff --git a/drivers/gpu/drm/i915/selftests/mock_region.c b/drivers/gpu/drm/i915/selftests/mock_region.c
> index 75793008c4ef..7ec5037eaa58 100644
> --- a/drivers/gpu/drm/i915/selftests/mock_region.c
> +++ b/drivers/gpu/drm/i915/selftests/mock_region.c
> @@ -15,9 +15,9 @@
>   static void mock_region_put_pages(struct drm_i915_gem_object *obj,
>   				  struct sg_table *pages)
>   {
> +	i915_refct_sgt_put(obj->mm.rsgt);
> +	obj->mm.rsgt = NULL;
>   	intel_region_ttm_resource_free(obj->mm.region, obj->mm.res);
> -	sg_free_table(pages);
> -	kfree(pages);
>   }
>   
>   static int mock_region_get_pages(struct drm_i915_gem_object *obj)
> @@ -36,12 +36,14 @@ static int mock_region_get_pages(struct drm_i915_gem_object *obj)
>   	if (IS_ERR(obj->mm.res))
>   		return PTR_ERR(obj->mm.res);
>   
> -	pages = intel_region_ttm_resource_to_st(obj->mm.region, obj->mm.res);
> -	if (IS_ERR(pages)) {
> -		err = PTR_ERR(pages);
> +	obj->mm.rsgt = intel_region_ttm_resource_to_rsgt(obj->mm.region,
> +							 obj->mm.res);
> +	if (IS_ERR(obj->mm.rsgt)) {
> +		err = PTR_ERR(obj->mm.rsgt);
>   		goto err_free_resource;
>   	}
>   
> +	pages = &obj->mm.rsgt->table;
>   	__i915_gem_object_set_pages(obj, pages, i915_sg_dma_sizes(pages->sgl));
>   
>   	return 0;
>
Thomas Hellström Oct. 28, 2021, 7:04 a.m. UTC | #2
On Wed, 2021-10-27 at 19:03 +0100, Matthew Auld wrote:
> On 27/10/2021 11:52, Thomas Hellström wrote:
> > As we start to introduce asynchronous failsafe object migration,
> > where we update the object state and then submit asynchronous
> > commands we need to record what memory resources are actually used
> > by various part of the command stream. Initially for three
> > purposes:
> > 
> > 1) Error capture.
> > 2) Asynchronous migration error recovery.
> > 3) Asynchronous vma bind.
> > 
> > At the time where these happens, the object state may have been
> > updated
> > to be several migrations ahead and object sg-tables discarded.
> > 
> > In order to make it possible to keep sg-tables with memory resource
> > information for these operations, introduce refcounted sg-tables
> > that
> > aren't freed until the last user is done with them.
> > 
> > The alternative would be to reference information sitting on the
> > corresponding ttm_resources which typically have the same lifetime
> > as
> > these refcountes sg_tables, but that leads to other awkward
> > constructs:
> > Due to the design direction chosen for ttm resource managers that
> > would
> > lead to diamond-style inheritance, the LMEM resources may sometimes
> > be
> > prematurely freed, and finally the subclassed struct ttm_resource
> > would
> > have to bleed into the asynchronous vma bind code.
> > 
> > Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
> > ---
> >   drivers/gpu/drm/i915/gem/i915_gem_object.h    |   4 +-
> >   .../gpu/drm/i915/gem/i915_gem_object_types.h  |   3 +-
> >   drivers/gpu/drm/i915/gem/i915_gem_shmem.c     |  16 +-
> >   drivers/gpu/drm/i915/gem/i915_gem_ttm.c       | 188 +++++++++++--
> > -----
> >   drivers/gpu/drm/i915/i915_scatterlist.c       |  62 ++++--
> >   drivers/gpu/drm/i915/i915_scatterlist.h       |  76 ++++++-
> >   drivers/gpu/drm/i915/intel_region_ttm.c       |  15 +-
> >   drivers/gpu/drm/i915/intel_region_ttm.h       |   5 +-
> >   drivers/gpu/drm/i915/selftests/mock_region.c  |  12 +-
> >   9 files changed, 262 insertions(+), 119 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h
> > b/drivers/gpu/drm/i915/gem/i915_gem_object.h
> > index a5479ac7a4ad..c5ab364d4311 100644
> > --- a/drivers/gpu/drm/i915/gem/i915_gem_object.h
> > +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h
> > @@ -624,8 +624,8 @@ struct sg_table *shmem_alloc_st(struct
> > drm_i915_private *i915,
> >                                 size_t size, struct
> > intel_memory_region *mr,
> >                                 struct address_space *mapping,
> >                                 unsigned int max_segment);
> > -void shmem_free_st(struct sg_table *st, struct address_space
> > *mapping,
> > -                  bool dirty, bool backup);
> > +void shmem_free_st_table(struct sg_table *st, struct address_space
> > *mapping,
> > +                        bool dirty, bool backup);
> 
> s/st_table/sg_table/?
> 
> I thought st was shorthand for sg_table? Maybe shmem_sg_free_table?

Yes the naming is indeed a bit unfortunate here. To be consistent with
the core's sg_free_table(), I changed to
shmem_sg_free_table() / shmem_sg_alloc_table.

> 
> 
> >   void __shmem_writeback(size_t size, struct address_space
> > *mapping);
> >   
> >   #ifdef CONFIG_MMU_NOTIFIER
> > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
> > b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
> > index a4b69a43b898..604ed5ad77f5 100644
> > --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
> > +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
> > @@ -544,6 +544,7 @@ struct drm_i915_gem_object {
> >                  */
> >                 struct list_head region_link;
> >   
> > +               struct i915_refct_sgt *rsgt;
> >                 struct sg_table *pages;
> >                 void *mapping;
> >   
> > @@ -597,7 +598,7 @@ struct drm_i915_gem_object {
> >         } mm;
> >   
> >         struct {
> > -               struct sg_table *cached_io_st;
> > +               struct i915_refct_sgt *cached_io_rsgt;
> >                 struct i915_gem_object_page_iter get_io_page;
> >                 struct drm_i915_gem_object *backup;
> >                 bool created:1;
> > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
> > b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
> > index 01f332d8dbde..67c6bee695c7 100644
> > --- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
> > +++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
> > @@ -25,8 +25,8 @@ static void check_release_pagevec(struct pagevec
> > *pvec)
> >         cond_resched();
> >   }
> >   
> > -void shmem_free_st(struct sg_table *st, struct address_space
> > *mapping,
> > -                  bool dirty, bool backup)
> > +void shmem_free_st_table(struct sg_table *st, struct address_space
> > *mapping,
> > +                        bool dirty, bool backup)
> >   {
> >         struct sgt_iter sgt_iter;
> >         struct pagevec pvec;
> > @@ -49,7 +49,6 @@ void shmem_free_st(struct sg_table *st, struct
> > address_space *mapping,
> >                 check_release_pagevec(&pvec);
> >   
> >         sg_free_table(st);
> > -       kfree(st);
> >   }
> >   
> >   struct sg_table *shmem_alloc_st(struct drm_i915_private *i915,
> > @@ -171,7 +170,8 @@ struct sg_table *shmem_alloc_st(struct
> > drm_i915_private *i915,
> >   err_sg:
> >         sg_mark_end(sg);
> >         if (sg != st->sgl) {
> > -               shmem_free_st(st, mapping, false, false);
> > +               shmem_free_st_table(st, mapping, false, false);
> > +               kfree(st);
> >         } else {
> >                 mapping_clear_unevictable(mapping);
> >                 sg_free_table(st);
> > @@ -254,7 +254,8 @@ static int shmem_get_pages(struct
> > drm_i915_gem_object *obj)
> >         return 0;
> >   
> >   err_pages:
> > -       shmem_free_st(st, mapping, false, false);
> > +       shmem_free_st_table(st, mapping, false, false);
> > +       kfree(st);
> >         /*
> >          * shmemfs first checks if there is enough memory to
> > allocate the page
> >          * and reports ENOSPC should there be insufficient, along
> > with the usual
> > @@ -374,8 +375,9 @@ void i915_gem_object_put_pages_shmem(struct
> > drm_i915_gem_object *obj, struct sg_
> >         if (i915_gem_object_needs_bit17_swizzle(obj))
> >                 i915_gem_object_save_bit_17_swizzle(obj, pages);
> >   
> > -       shmem_free_st(pages, file_inode(obj->base.filp)->i_mapping,
> > -                     obj->mm.dirty, obj->mm.madv ==
> > I915_MADV_WILLNEED);
> > +       shmem_free_st_table(pages, file_inode(obj->base.filp)-
> > >i_mapping,
> > +                           obj->mm.dirty, obj->mm.madv ==
> > I915_MADV_WILLNEED);
> > +       kfree(pages);
> >         obj->mm.dirty = false;
> >   }
> >   
> > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
> > b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
> > index 4fd2edb20dd9..6826e3859e18 100644
> > --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
> > +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
> > @@ -34,7 +34,7 @@
> >    * struct i915_ttm_tt - TTM page vector with additional private
> > information
> >    * @ttm: The base TTM page vector.
> >    * @dev: The struct device used for dma mapping and unmapping.
> > - * @cached_st: The cached scatter-gather table.
> > + * @cached_rsgt: The cached scatter-gather table.
> >    * @is_shmem: Set if using shmem.
> >    * @filp: The shmem file, if using shmem backend.
> >    *
> > @@ -47,7 +47,7 @@
> >   struct i915_ttm_tt {
> >         struct ttm_tt ttm;
> >         struct device *dev;
> > -       struct sg_table *cached_st;
> > +       struct i915_refct_sgt cached_rsgt;
> >   
> >         bool is_shmem;
> >         struct file *filp;
> > @@ -221,14 +221,10 @@ static int i915_ttm_tt_shmem_populate(struct
> > ttm_device *bdev,
> >         if (IS_ERR(st))
> >                 return PTR_ERR(st);
> >   
> > -       err = dma_map_sg_attrs(i915_tt->dev,
> > -                              st->sgl, st->nents,
> > -                              DMA_BIDIRECTIONAL,
> > -                              DMA_ATTR_SKIP_CPU_SYNC);
> > -       if (err <= 0) {
> > -               err = -EINVAL;
> > +       err = dma_map_sgtable(i915_tt->dev, st, DMA_BIDIRECTIONAL,
> > +                             DMA_ATTR_SKIP_CPU_SYNC);
> > +       if (err)
> >                 goto err_free_st;
> > -       }
> >   
> >         i = 0;
> >         for_each_sgt_page(page, sgt_iter, st)
> > @@ -237,11 +233,15 @@ static int i915_ttm_tt_shmem_populate(struct
> > ttm_device *bdev,
> >         if (ttm->page_flags & TTM_TT_FLAG_SWAPPED)
> >                 ttm->page_flags &= ~TTM_TT_FLAG_SWAPPED;
> >   
> > -       i915_tt->cached_st = st;
> > +       i915_tt->cached_rsgt.table = *st;
> > +       kfree(st);
> 
> Will it work if the above just operates on the rsgt.table?

I'll change the shmem utility here to not allocate the struct sg_table
and then we can operate on it directly.

> 
> > +
> >         return 0;
> >   
> >   err_free_st:
> > -       shmem_free_st(st, filp->f_mapping, false, false);
> > +       shmem_free_st_table(st, filp->f_mapping, false, false);
> > +       kfree(st);
> > +
> >         return err;
> >   }
> >   
> > @@ -249,16 +249,29 @@ static void
> > i915_ttm_tt_shmem_unpopulate(struct ttm_tt *ttm)
> >   {
> >         struct i915_ttm_tt *i915_tt = container_of(ttm,
> > typeof(*i915_tt), ttm);
> >         bool backup = ttm->page_flags & TTM_TT_FLAG_SWAPPED;
> > +       struct sg_table *st = &i915_tt->cached_rsgt.table;
> >   
> > -       dma_unmap_sg(i915_tt->dev, i915_tt->cached_st->sgl,
> > -                    i915_tt->cached_st->nents,
> > -                    DMA_BIDIRECTIONAL);
> > +       if (st->sgl)
> 
> Can we ever hit this? I would have presumed not? Below also.

Yes, here's where we typically free the scatterlist.

> 
> > +               shmem_free_st_table(st,
> > +                                   file_inode(i915_tt->filp)-
> > >i_mapping,
> > +                                   backup, backup);
> > +}
> >   
> > -       shmem_free_st(fetch_and_zero(&i915_tt->cached_st),
> > -                     file_inode(i915_tt->filp)->i_mapping,
> > -                     backup, backup);
> > +static void i915_ttm_tt_release(struct kref *ref)
> > +{
> > +       struct i915_ttm_tt *i915_tt =
> > +               container_of(ref, typeof(*i915_tt),
> > cached_rsgt.kref);
> > +       struct sg_table *st = &i915_tt->cached_rsgt.table;
> > +
> > +       GEM_WARN_ON(st->sgl);
> > +
> > +       kfree(i915_tt);
> >   }
> >   
> > +static const struct i915_refct_sgt_ops tt_rsgt_ops = {
> > +       .release = i915_ttm_tt_release
> > +};
> > +
> >   static struct ttm_tt *i915_ttm_tt_create(struct ttm_buffer_object
> > *bo,
> >                                          uint32_t page_flags)
> >   {
> > @@ -287,6 +300,9 @@ static struct ttm_tt *i915_ttm_tt_create(struct
> > ttm_buffer_object *bo,
> >         if (ret)
> >                 goto err_free;
> >   
> > +       i915_refct_sgt_init_ops(&i915_tt->cached_rsgt, bo-
> > >base.size,
> > +                               &tt_rsgt_ops);
> > +
> >         i915_tt->dev = obj->base.dev->dev;
> >   
> >         return &i915_tt->ttm;
> > @@ -311,17 +327,15 @@ static int i915_ttm_tt_populate(struct
> > ttm_device *bdev,
> >   static void i915_ttm_tt_unpopulate(struct ttm_device *bdev,
> > struct ttm_tt *ttm)
> >   {
> >         struct i915_ttm_tt *i915_tt = container_of(ttm,
> > typeof(*i915_tt), ttm);
> > +       struct sg_table *st = &i915_tt->cached_rsgt.table;
> > +
> > +       if (st->sgl)
> > +               dma_unmap_sgtable(i915_tt->dev, st,
> > DMA_BIDIRECTIONAL, 0);
> >   
> >         if (i915_tt->is_shmem) {
> >                 i915_ttm_tt_shmem_unpopulate(ttm);
> >         } else {
> > -               if (i915_tt->cached_st) {
> > -                       dma_unmap_sgtable(i915_tt->dev, i915_tt-
> > >cached_st,
> > -                                         DMA_BIDIRECTIONAL, 0);
> > -                       sg_free_table(i915_tt->cached_st);
> > -                       kfree(i915_tt->cached_st);
> > -                       i915_tt->cached_st = NULL;
> > -               }
> > +               sg_free_table(st);
> >                 ttm_pool_free(&bdev->pool, ttm);
> >         }
> >   }
> > @@ -334,7 +348,7 @@ static void i915_ttm_tt_destroy(struct
> > ttm_device *bdev, struct ttm_tt *ttm)
> >                 fput(i915_tt->filp);
> >   
> >         ttm_tt_fini(ttm);
> > -       kfree(i915_tt);
> > +       i915_refct_sgt_put(&i915_tt->cached_rsgt);
> >   }
> >   
> >   static bool i915_ttm_eviction_valuable(struct ttm_buffer_object
> > *bo,
> > @@ -376,12 +390,12 @@ static int i915_ttm_move_notify(struct
> > ttm_buffer_object *bo)
> >         return 0;
> >   }
> >   
> > -static void i915_ttm_free_cached_io_st(struct drm_i915_gem_object
> > *obj)
> > +static void i915_ttm_free_cached_io_rsgt(struct
> > drm_i915_gem_object *obj)
> >   {
> >         struct radix_tree_iter iter;
> >         void __rcu **slot;
> >   
> > -       if (!obj->ttm.cached_io_st)
> > +       if (!obj->ttm.cached_io_rsgt)
> >                 return;
> >   
> >         rcu_read_lock();
> > @@ -389,9 +403,8 @@ static void i915_ttm_free_cached_io_st(struct
> > drm_i915_gem_object *obj)
> >                 radix_tree_delete(&obj->ttm.get_io_page.radix,
> > iter.index);
> >         rcu_read_unlock();
> >   
> > -       sg_free_table(obj->ttm.cached_io_st);
> > -       kfree(obj->ttm.cached_io_st);
> > -       obj->ttm.cached_io_st = NULL;
> > +       i915_refct_sgt_put(obj->ttm.cached_io_rsgt);
> > +       obj->ttm.cached_io_rsgt = NULL;
> >   }
> >   
> >   static void
> > @@ -477,7 +490,7 @@ static int i915_ttm_purge(struct
> > drm_i915_gem_object *obj)
> >         obj->write_domain = 0;
> >         obj->read_domains = 0;
> >         i915_ttm_adjust_gem_after_move(obj);
> > -       i915_ttm_free_cached_io_st(obj);
> > +       i915_ttm_free_cached_io_rsgt(obj);
> >         obj->mm.madv = __I915_MADV_PURGED;
> >         return 0;
> >   }
> > @@ -532,7 +545,7 @@ static void i915_ttm_swap_notify(struct
> > ttm_buffer_object *bo)
> >         int ret = i915_ttm_move_notify(bo);
> >   
> >         GEM_WARN_ON(ret);
> > -       GEM_WARN_ON(obj->ttm.cached_io_st);
> > +       GEM_WARN_ON(obj->ttm.cached_io_rsgt);
> >         if (!ret && obj->mm.madv != I915_MADV_WILLNEED)
> >                 i915_ttm_purge(obj);
> >   }
> > @@ -543,7 +556,7 @@ static void i915_ttm_delete_mem_notify(struct
> > ttm_buffer_object *bo)
> >   
> >         if (likely(obj)) {
> >                 __i915_gem_object_pages_fini(obj);
> > -               i915_ttm_free_cached_io_st(obj);
> > +               i915_ttm_free_cached_io_rsgt(obj);
> >         }
> >   }
> >   
> > @@ -563,40 +576,35 @@ i915_ttm_region(struct ttm_device *bdev, int
> > ttm_mem_type)
> >                                           ttm_mem_type -
> > I915_PL_LMEM0);
> >   }
> >   
> > -static struct sg_table *i915_ttm_tt_get_st(struct ttm_tt *ttm)
> > +static struct i915_refct_sgt *i915_ttm_tt_get_st(struct ttm_tt
> > *ttm)
> >   {
> >         struct i915_ttm_tt *i915_tt = container_of(ttm,
> > typeof(*i915_tt), ttm);
> >         struct sg_table *st;
> >         int ret;
> >   
> > -       if (i915_tt->cached_st)
> > -               return i915_tt->cached_st;
> > -
> > -       st = kzalloc(sizeof(*st), GFP_KERNEL);
> > -       if (!st)
> > -               return ERR_PTR(-ENOMEM);
> > +       if (i915_tt->cached_rsgt.table.sgl)
> > +               return i915_refct_sgt_get(&i915_tt->cached_rsgt);
> >   
> > +       st = &i915_tt->cached_rsgt.table;
> >         ret = sg_alloc_table_from_pages_segment(st,
> >                         ttm->pages, ttm->num_pages,
> >                         0, (unsigned long)ttm->num_pages <<
> > PAGE_SHIFT,
> >                         i915_sg_segment_size(), GFP_KERNEL);
> >         if (ret) {
> > -               kfree(st);
> > +               st->sgl = NULL;
> >                 return ERR_PTR(ret);
> >         }
> >   
> >         ret = dma_map_sgtable(i915_tt->dev, st, DMA_BIDIRECTIONAL,
> > 0);
> >         if (ret) {
> >                 sg_free_table(st);
> > -               kfree(st);
> >                 return ERR_PTR(ret);
> >         }
> >   
> > -       i915_tt->cached_st = st;
> > -       return st;
> > +       return i915_refct_sgt_get(&i915_tt->cached_rsgt);
> >   }
> >   
> > -static struct sg_table *
> > +static struct i915_refct_sgt *
> >   i915_ttm_resource_get_st(struct drm_i915_gem_object *obj,
> >                          struct ttm_resource *res)
> >   {
> > @@ -610,7 +618,21 @@ i915_ttm_resource_get_st(struct
> > drm_i915_gem_object *obj,
> >          * the resulting st. Might make sense for GGTT.
> >          */
> >         GEM_WARN_ON(!cpu_maps_iomem(res));
> > -       return intel_region_ttm_resource_to_st(obj->mm.region,
> > res);
> > +       if (bo->resource == res) {
> > +               if (!obj->ttm.cached_io_rsgt) {
> > +                       struct i915_refct_sgt *rsgt;
> > +
> > +                       rsgt =
> > intel_region_ttm_resource_to_rsgt(obj->mm.region,
> > +                                                               
> > res);
> > +                       if (IS_ERR(rsgt))
> > +                               return rsgt;
> > +
> > +                       obj->ttm.cached_io_rsgt = rsgt;
> > +               }
> > +               return i915_refct_sgt_get(obj->ttm.cached_io_rsgt);
> > +       }
> > +
> > +       return intel_region_ttm_resource_to_rsgt(obj->mm.region,
> > res);
> >   }
> >   
> >   static int i915_ttm_accel_move(struct ttm_buffer_object *bo,
> > @@ -621,10 +643,7 @@ static int i915_ttm_accel_move(struct
> > ttm_buffer_object *bo,
> >   {
> >         struct drm_i915_private *i915 = container_of(bo->bdev,
> > typeof(*i915),
> >                                                      bdev);
> > -       struct ttm_resource_manager *src_man =
> > -               ttm_manager_type(bo->bdev, bo->resource->mem_type);
> >         struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo);
> > -       struct sg_table *src_st;
> >         struct i915_request *rq;
> >         struct ttm_tt *src_ttm = bo->ttm;
> >         enum i915_cache_level src_level, dst_level;
> > @@ -650,17 +669,22 @@ static int i915_ttm_accel_move(struct
> > ttm_buffer_object *bo,
> >                 }
> >                 intel_engine_pm_put(i915->gt.migrate.context-
> > >engine);
> >         } else {
> > -               src_st = src_man->use_tt ?
> > i915_ttm_tt_get_st(src_ttm) :
> > -                       obj->ttm.cached_io_st;
> > +               struct i915_refct_sgt *src_rsgt =
> > +                       i915_ttm_resource_get_st(obj, bo-
> > >resource);
> > +
> > +               if (IS_ERR(src_rsgt))
> > +                       return PTR_ERR(src_rsgt);
> >   
> >                 src_level = i915_ttm_cache_level(i915, bo-
> > >resource, src_ttm);
> >                 intel_engine_pm_get(i915->gt.migrate.context-
> > >engine);
> >                 ret = intel_context_migrate_copy(i915-
> > >gt.migrate.context,
> > -                                                NULL, src_st->sgl,
> > src_level,
> > +                                                NULL, src_rsgt-
> > >table.sgl,
> > +                                                src_level,
> >                                                 
> > gpu_binds_iomem(bo->resource),
> >                                                  dst_st->sgl,
> > dst_level,
> >                                                 
> > gpu_binds_iomem(dst_mem),
> >                                                  &rq);
> > +               i915_refct_sgt_put(src_rsgt);
> >                 if (!ret && rq) {
> >                         i915_request_wait(rq, 0,
> > MAX_SCHEDULE_TIMEOUT);
> >                         i915_request_put(rq);
> > @@ -674,13 +698,14 @@ static int i915_ttm_accel_move(struct
> > ttm_buffer_object *bo,
> >   static void __i915_ttm_move(struct ttm_buffer_object *bo, bool
> > clear,
> >                             struct ttm_resource *dst_mem,
> >                             struct ttm_tt *dst_ttm,
> > -                           struct sg_table *dst_st,
> > +                           struct i915_refct_sgt *dst_rsgt,
> >                             bool allow_accel)
> >   {
> >         int ret = -EINVAL;
> >   
> >         if (allow_accel)
> > -               ret = i915_ttm_accel_move(bo, clear, dst_mem,
> > dst_ttm, dst_st);
> > +               ret = i915_ttm_accel_move(bo, clear, dst_mem,
> > dst_ttm,
> > +                                         &dst_rsgt->table);
> >         if (ret) {
> >                 struct drm_i915_gem_object *obj =
> > i915_ttm_to_gem(bo);
> >                 struct intel_memory_region *dst_reg, *src_reg;
> > @@ -697,12 +722,13 @@ static void __i915_ttm_move(struct
> > ttm_buffer_object *bo, bool clear,
> >                 dst_iter = !cpu_maps_iomem(dst_mem) ?
> >                         ttm_kmap_iter_tt_init(&_dst_iter.tt,
> > dst_ttm) :
> >                         ttm_kmap_iter_iomap_init(&_dst_iter.io,
> > &dst_reg->iomap,
> > -                                                dst_st, dst_reg-
> > >region.start);
> > +                                                &dst_rsgt->table,
> > +                                                dst_reg-
> > >region.start);
> >   
> >                 src_iter = !cpu_maps_iomem(bo->resource) ?
> >                         ttm_kmap_iter_tt_init(&_src_iter.tt, bo-
> > >ttm) :
> >                         ttm_kmap_iter_iomap_init(&_src_iter.io,
> > &src_reg->iomap,
> > -                                                obj-
> > >ttm.cached_io_st,
> > +                                                &obj-
> > >ttm.cached_io_rsgt->table,
> >                                                  src_reg-
> > >region.start);
> >   
> >                 ttm_move_memcpy(clear, dst_mem->num_pages,
> > dst_iter, src_iter);
> > @@ -718,7 +744,7 @@ static int i915_ttm_move(struct
> > ttm_buffer_object *bo, bool evict,
> >         struct ttm_resource_manager *dst_man =
> >                 ttm_manager_type(bo->bdev, dst_mem->mem_type);
> >         struct ttm_tt *ttm = bo->ttm;
> > -       struct sg_table *dst_st;
> > +       struct i915_refct_sgt *dst_rsgt;
> >         bool clear;
> >         int ret;
> >   
> > @@ -744,22 +770,24 @@ static int i915_ttm_move(struct
> > ttm_buffer_object *bo, bool evict,
> >                         return ret;
> >         }
> >   
> > -       dst_st = i915_ttm_resource_get_st(obj, dst_mem);
> > -       if (IS_ERR(dst_st))
> > -               return PTR_ERR(dst_st);
> > +       dst_rsgt = i915_ttm_resource_get_st(obj, dst_mem);
> > +       if (IS_ERR(dst_rsgt))
> > +               return PTR_ERR(dst_rsgt);
> >   
> >         clear = !cpu_maps_iomem(bo->resource) && (!ttm ||
> > !ttm_tt_is_populated(ttm));
> >         if (!(clear && ttm && !(ttm->page_flags &
> > TTM_TT_FLAG_ZERO_ALLOC)))
> > -               __i915_ttm_move(bo, clear, dst_mem, bo->ttm,
> > dst_st, true);
> > +               __i915_ttm_move(bo, clear, dst_mem, bo->ttm,
> > dst_rsgt, true);
> >   
> >         ttm_bo_move_sync_cleanup(bo, dst_mem);
> >         i915_ttm_adjust_domains_after_move(obj);
> > -       i915_ttm_free_cached_io_st(obj);
> > +       i915_ttm_free_cached_io_rsgt(obj);
> >   
> >         if (gpu_binds_iomem(dst_mem) || cpu_maps_iomem(dst_mem)) {
> > -               obj->ttm.cached_io_st = dst_st;
> > -               obj->ttm.get_io_page.sg_pos = dst_st->sgl;
> > +               obj->ttm.cached_io_rsgt = dst_rsgt;
> > +               obj->ttm.get_io_page.sg_pos = dst_rsgt->table.sgl;
> >                 obj->ttm.get_io_page.sg_idx = 0;
> > +       } else {
> > +               i915_refct_sgt_put(dst_rsgt);
> >         }
> >   
> >         i915_ttm_adjust_lru(obj);
> > @@ -825,7 +853,6 @@ static int __i915_ttm_get_pages(struct
> > drm_i915_gem_object *obj,
> >                 .interruptible = true,
> >                 .no_wait_gpu = false,
> >         };
> > -       struct sg_table *st;
> >         int real_num_busy;
> >         int ret;
> >   
> > @@ -862,12 +889,16 @@ static int __i915_ttm_get_pages(struct
> > drm_i915_gem_object *obj,
> >         }
> >   
> >         if (!i915_gem_object_has_pages(obj)) {
> > -               /* Object either has a page vector or is an iomem
> > object */
> > -               st = bo->ttm ? i915_ttm_tt_get_st(bo->ttm) : obj-
> > >ttm.cached_io_st;
> > -               if (IS_ERR(st))
> > -                       return PTR_ERR(st);
> > +               struct i915_refct_sgt *rsgt =
> > +                       i915_ttm_resource_get_st(obj, bo-
> > >resource);
> > +
> > +               if (IS_ERR(rsgt))
> > +                       return PTR_ERR(rsgt);
> >   
> > -               __i915_gem_object_set_pages(obj, st,
> > i915_sg_dma_sizes(st->sgl));
> > +               GEM_BUG_ON(obj->mm.rsgt);
> > +               obj->mm.rsgt = rsgt;
> > +               __i915_gem_object_set_pages(obj, &rsgt->table,
> > +                                           i915_sg_dma_sizes(rsgt-
> > >table.sgl));
> >         }
> >   
> >         i915_ttm_adjust_lru(obj);
> > @@ -941,6 +972,13 @@ static void i915_ttm_put_pages(struct
> > drm_i915_gem_object *obj,
> >          * If the object is not destroyed next, The TTM eviction
> > logic
> >          * and shrinkers will move it out if needed.
> >          */
> > +
> > +       if (obj->mm.rsgt) {
> > +               i915_refct_sgt_put(obj->mm.rsgt);
> > +               obj->mm.rsgt = NULL;
> > +       }
> 
> i915_refct_sgt_put(fetch_and_zero(&obj->mm.rsgt)) ?
> 
> > +
> > +       i915_ttm_adjust_lru(obj);
> 
> Unrelated change, or do I need to look closer?

No that's unrelated. Probably a rebase mistake. Will remove.

> 
> >   }
> >   
> >   static void i915_ttm_adjust_lru(struct drm_i915_gem_object *obj)
> > @@ -1278,7 +1316,7 @@ int i915_gem_obj_copy_ttm(struct
> > drm_i915_gem_object *dst,
> >         struct ttm_operation_ctx ctx = {
> >                 .interruptible = intr,
> >         };
> > -       struct sg_table *dst_st;
> > +       struct i915_refct_sgt *dst_rsgt;
> >         int ret;
> >   
> >         assert_object_held(dst);
> > @@ -1293,11 +1331,11 @@ int i915_gem_obj_copy_ttm(struct
> > drm_i915_gem_object *dst,
> >         if (ret)
> >                 return ret;
> >   
> > -       dst_st = gpu_binds_iomem(dst_bo->resource) ?
> > -               dst->ttm.cached_io_st : i915_ttm_tt_get_st(dst_bo-
> > >ttm);
> > -
> > +       dst_rsgt = i915_ttm_resource_get_st(dst, dst_bo->resource);
> >         __i915_ttm_move(src_bo, false, dst_bo->resource, dst_bo-
> > >ttm,
> > -                       dst_st, allow_accel);
> > +                       dst_rsgt, allow_accel);
> > +
> > +       i915_refct_sgt_put(dst_rsgt);
> >   
> >         return 0;
> >   }
> > diff --git a/drivers/gpu/drm/i915/i915_scatterlist.c
> > b/drivers/gpu/drm/i915/i915_scatterlist.c
> > index 4a6712dca838..8a510ee5d1ad 100644
> > --- a/drivers/gpu/drm/i915/i915_scatterlist.c
> > +++ b/drivers/gpu/drm/i915/i915_scatterlist.c
> > @@ -41,8 +41,32 @@ bool i915_sg_trim(struct sg_table *orig_st)
> >         return true;
> >   }
> >   
> > +static void i915_refct_sgt_release(struct kref *ref)
> > +{
> > +       struct i915_refct_sgt *rsgt =
> > +               container_of(ref, typeof(*rsgt), kref);
> > +
> > +       sg_free_table(&rsgt->table);
> > +       kfree(rsgt);
> > +}
> > +
> > +static const struct i915_refct_sgt_ops rsgt_ops = {
> > +       .release = i915_refct_sgt_release
> > +};
> > +
> > +/**
> > + * i915_refct_sgt_init - Initialize a struct i915_refct_sgt with
> > default ops
> > + * @rsgt: The struct i915_refct_sgt to initialize.
> > + * size: The size of the underlying memory buffer.
> > + */
> > +void i915_refct_sgt_init(struct i915_refct_sgt *rsgt, size_t size)
> > +{
> > +       i915_refct_sgt_init_ops(rsgt, size, &rsgt_ops);
> > +}
> > +
> >   /**
> > - * i915_sg_from_mm_node - Create an sg_table from a struct
> > drm_mm_node
> > + * i915_rsgt_from_mm_node - Create a refcounted sg_table from a
> > struct
> > + * drm_mm_node
> >    * @node: The drm_mm_node.
> >    * @region_start: An offset to add to the dma addresses of the sg
> > list.
> >    *
> > @@ -50,25 +74,28 @@ bool i915_sg_trim(struct sg_table *orig_st)
> >    * taking a maximum segment length into account, splitting into
> > segments
> >    * if necessary.
> >    *
> > - * Return: A pointer to a kmalloced struct sg_table on success,
> > negative
> > + * Return: A pointer to a kmalloced struct i915_refct_sgt on
> > success, negative
> >    * error code cast to an error pointer on failure.
> >    */
> > -struct sg_table *i915_sg_from_mm_node(const struct drm_mm_node
> > *node,
> > -                                     u64 region_start)
> > +struct i915_refct_sgt *i915_rsgt_from_mm_node(const struct
> > drm_mm_node *node,
> > +                                             u64 region_start)
> >   {
> >         const u64 max_segment = SZ_1G; /* Do we have a limit on
> > this? */
> >         u64 segment_pages = max_segment >> PAGE_SHIFT;
> >         u64 block_size, offset, prev_end;
> > +       struct i915_refct_sgt *rsgt;
> >         struct sg_table *st;
> >         struct scatterlist *sg;
> >   
> > -       st = kmalloc(sizeof(*st), GFP_KERNEL);
> > -       if (!st)
> > +       rsgt = kmalloc(sizeof(*rsgt), GFP_KERNEL);
> > +       if (!rsgt)
> >                 return ERR_PTR(-ENOMEM);
> >   
> > +       i915_refct_sgt_init(rsgt, node->size << PAGE_SHIFT);
> > +       st = &rsgt->table;
> >         if (sg_alloc_table(st, DIV_ROUND_UP(node->size,
> > segment_pages),
> >                            GFP_KERNEL)) {
> > -               kfree(st);
> > +               i915_refct_sgt_put(rsgt);
> >                 return ERR_PTR(-ENOMEM);
> >         }
> >   
> > @@ -104,11 +131,11 @@ struct sg_table *i915_sg_from_mm_node(const
> > struct drm_mm_node *node,
> >         sg_mark_end(sg);
> >         i915_sg_trim(st);
> >   
> > -       return st;
> > +       return rsgt;
> >   }
> >   
> >   /**
> > - * i915_sg_from_buddy_resource - Create an sg_table from a struct
> > + * i915_rsgt_from_buddy_resource - Create a refcounted sg_table
> > from a struct
> >    * i915_buddy_block list
> >    * @res: The struct i915_ttm_buddy_resource.
> >    * @region_start: An offset to add to the dma addresses of the sg
> > list.
> > @@ -117,11 +144,11 @@ struct sg_table *i915_sg_from_mm_node(const
> > struct drm_mm_node *node,
> >    * taking a maximum segment length into account, splitting into
> > segments
> >    * if necessary.
> >    *
> > - * Return: A pointer to a kmalloced struct sg_table on success,
> > negative
> > + * Return: A pointer to a kmalloced struct i915_refct_sgts on
> > success, negative
> >    * error code cast to an error pointer on failure.
> >    */
> > -struct sg_table *i915_sg_from_buddy_resource(struct ttm_resource
> > *res,
> > -                                            u64 region_start)
> > +struct i915_refct_sgt *i915_rsgt_from_buddy_resource(struct
> > ttm_resource *res,
> > +                                                    u64
> > region_start)
> >   {
> >         struct i915_ttm_buddy_resource *bman_res =
> > to_ttm_buddy_resource(res);
> >         const u64 size = res->num_pages << PAGE_SHIFT;
> > @@ -129,18 +156,21 @@ struct sg_table
> > *i915_sg_from_buddy_resource(struct ttm_resource *res,
> >         struct i915_buddy_mm *mm = bman_res->mm;
> >         struct list_head *blocks = &bman_res->blocks;
> >         struct i915_buddy_block *block;
> > +       struct i915_refct_sgt *rsgt;
> >         struct scatterlist *sg;
> >         struct sg_table *st;
> >         resource_size_t prev_end;
> >   
> >         GEM_BUG_ON(list_empty(blocks));
> >   
> > -       st = kmalloc(sizeof(*st), GFP_KERNEL);
> > -       if (!st)
> > +       rsgt = kmalloc(sizeof(*rsgt), GFP_KERNEL);
> > +       if (!rsgt)
> >                 return ERR_PTR(-ENOMEM);
> >   
> > +       i915_refct_sgt_init(rsgt, size);
> > +       st = &rsgt->table;
> >         if (sg_alloc_table(st, res->num_pages, GFP_KERNEL)) {
> > -               kfree(st);
> > +               i915_refct_sgt_put(rsgt);
> >                 return ERR_PTR(-ENOMEM);
> >         }
> >   
> > @@ -181,7 +211,7 @@ struct sg_table
> > *i915_sg_from_buddy_resource(struct ttm_resource *res,
> >         sg_mark_end(sg);
> >         i915_sg_trim(st);
> >   
> > -       return st;
> > +       return rsgt;
> >   }
> >   
> >   #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
> > diff --git a/drivers/gpu/drm/i915/i915_scatterlist.h
> > b/drivers/gpu/drm/i915/i915_scatterlist.h
> > index b8bd5925b03f..321fd4a9f777 100644
> > --- a/drivers/gpu/drm/i915/i915_scatterlist.h
> > +++ b/drivers/gpu/drm/i915/i915_scatterlist.h
> > @@ -144,10 +144,78 @@ static inline unsigned int
> > i915_sg_segment_size(void)
> >   
> >   bool i915_sg_trim(struct sg_table *orig_st);
> >   
> > -struct sg_table *i915_sg_from_mm_node(const struct drm_mm_node
> > *node,
> > -                                     u64 region_start);
> > +/**
> > + * struct i915_refct_sgt_ops - Operations structure for struct
> > i915_refct_sgt
> > + */
> > +struct i915_refct_sgt_ops {
> > +       /**
> > +        * release() - Free the memory of the struct i915_refct_sgt
> > +        * @ref: struct kref that is embedded in the struct
> > i915_refct_sgt
> > +        */
> > +       void (*release)(struct kref *ref);
> > +};
> > +
> > +/**
> > + * struct i915_refct_sgt - A refcounted scatter-gather table
> > + * @kref: struct kref for refcounting
> > + * @table: struct sg_table holding the scatter-gather table
> > itself. Note that
> > + * @table->sgl = NULL can be used to determine whether a scatter-
> > gather table
> > + * is present or not.
> > + * @size: The size in bytes of the underlying memory buffer
> > + * @ops: The operations structure.
> > + */
> > +struct i915_refct_sgt {
> > +       struct kref kref;
> > +       struct sg_table table;
> > +       size_t size;
> > +       const struct i915_refct_sgt_ops *ops;
> > +};
> > +
> > +/**
> > + * i915_refct_sgt_put - Put a refcounted sg-table
> > + * @rsgt the struct i915_refct_sgt to put.
> > + */
> > +static inline void i915_refct_sgt_put(struct i915_refct_sgt *rsgt)
> > +{
> > +       if (rsgt)
> > +               kref_put(&rsgt->kref, rsgt->ops->release);
> > +}
> > +
> > +/**
> > + * i915_refct_sgt_get - Get a refcounted sg-table
> > + * @rsgt the struct i915_refct_sgt to get.
> > + */
> > +static inline struct i915_refct_sgt *
> > +i915_refct_sgt_get(struct i915_refct_sgt *rsgt)
> > +{
> > +       kref_get(&rsgt->kref);
> > +       return rsgt;
> > +}
> > +
> > +/**
> > + * i915_refct_sgt_init_ops - Initialize a refcounted sg-list with
> > a custom
> > + * operations structure
> > + * @rsgt The struct i915_refct_sgt to initialize.
> > + * @size: Size in bytes of the underlying memory buffer.
> > + * @ops: A customized operations structure in case the refcounted
> > sg-list
> > + * is embedded into another structure.
> > + */
> > +static inline void i915_refct_sgt_init_ops(struct i915_refct_sgt
> > *rsgt,
> > +                                          size_t size,
> > +                                          const struct
> > i915_refct_sgt_ops *ops)
> > +{
> > +       kref_init(&rsgt->kref);
> > +       rsgt->table.sgl = NULL;
> > +       rsgt->size = size;
> > +       rsgt->ops = ops;
> > +}
> 
> Could maybe make this __i915_refct_sgt_init(), since it inits more
> than 
> just the ops?
> 
> 

Will change.

Thanks,
Thomas
Matthew Auld Oct. 28, 2021, 8:47 a.m. UTC | #3
On 28/10/2021 08:04, Thomas Hellström wrote:
> On Wed, 2021-10-27 at 19:03 +0100, Matthew Auld wrote:
>> On 27/10/2021 11:52, Thomas Hellström wrote:
>>> As we start to introduce asynchronous failsafe object migration,
>>> where we update the object state and then submit asynchronous
>>> commands we need to record what memory resources are actually used
>>> by various part of the command stream. Initially for three
>>> purposes:
>>>
>>> 1) Error capture.
>>> 2) Asynchronous migration error recovery.
>>> 3) Asynchronous vma bind.
>>>
>>> At the time where these happens, the object state may have been
>>> updated
>>> to be several migrations ahead and object sg-tables discarded.
>>>
>>> In order to make it possible to keep sg-tables with memory resource
>>> information for these operations, introduce refcounted sg-tables
>>> that
>>> aren't freed until the last user is done with them.
>>>
>>> The alternative would be to reference information sitting on the
>>> corresponding ttm_resources which typically have the same lifetime
>>> as
>>> these refcountes sg_tables, but that leads to other awkward
>>> constructs:
>>> Due to the design direction chosen for ttm resource managers that
>>> would
>>> lead to diamond-style inheritance, the LMEM resources may sometimes
>>> be
>>> prematurely freed, and finally the subclassed struct ttm_resource
>>> would
>>> have to bleed into the asynchronous vma bind code.
>>>
>>> Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
>>> ---
>>>    drivers/gpu/drm/i915/gem/i915_gem_object.h    |   4 +-
>>>    .../gpu/drm/i915/gem/i915_gem_object_types.h  |   3 +-
>>>    drivers/gpu/drm/i915/gem/i915_gem_shmem.c     |  16 +-
>>>    drivers/gpu/drm/i915/gem/i915_gem_ttm.c       | 188 +++++++++++--
>>> -----
>>>    drivers/gpu/drm/i915/i915_scatterlist.c       |  62 ++++--
>>>    drivers/gpu/drm/i915/i915_scatterlist.h       |  76 ++++++-
>>>    drivers/gpu/drm/i915/intel_region_ttm.c       |  15 +-
>>>    drivers/gpu/drm/i915/intel_region_ttm.h       |   5 +-
>>>    drivers/gpu/drm/i915/selftests/mock_region.c  |  12 +-
>>>    9 files changed, 262 insertions(+), 119 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h
>>> b/drivers/gpu/drm/i915/gem/i915_gem_object.h
>>> index a5479ac7a4ad..c5ab364d4311 100644
>>> --- a/drivers/gpu/drm/i915/gem/i915_gem_object.h
>>> +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h
>>> @@ -624,8 +624,8 @@ struct sg_table *shmem_alloc_st(struct
>>> drm_i915_private *i915,
>>>                                  size_t size, struct
>>> intel_memory_region *mr,
>>>                                  struct address_space *mapping,
>>>                                  unsigned int max_segment);
>>> -void shmem_free_st(struct sg_table *st, struct address_space
>>> *mapping,
>>> -                  bool dirty, bool backup);
>>> +void shmem_free_st_table(struct sg_table *st, struct address_space
>>> *mapping,
>>> +                        bool dirty, bool backup);
>>
>> s/st_table/sg_table/?
>>
>> I thought st was shorthand for sg_table? Maybe shmem_sg_free_table?
> 
> Yes the naming is indeed a bit unfortunate here. To be consistent with
> the core's sg_free_table(), I changed to
> shmem_sg_free_table() / shmem_sg_alloc_table.
> 
>>
>>
>>>    void __shmem_writeback(size_t size, struct address_space
>>> *mapping);
>>>    
>>>    #ifdef CONFIG_MMU_NOTIFIER
>>> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
>>> b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
>>> index a4b69a43b898..604ed5ad77f5 100644
>>> --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
>>> +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
>>> @@ -544,6 +544,7 @@ struct drm_i915_gem_object {
>>>                   */
>>>                  struct list_head region_link;
>>>    
>>> +               struct i915_refct_sgt *rsgt;
>>>                  struct sg_table *pages;
>>>                  void *mapping;
>>>    
>>> @@ -597,7 +598,7 @@ struct drm_i915_gem_object {
>>>          } mm;
>>>    
>>>          struct {
>>> -               struct sg_table *cached_io_st;
>>> +               struct i915_refct_sgt *cached_io_rsgt;
>>>                  struct i915_gem_object_page_iter get_io_page;
>>>                  struct drm_i915_gem_object *backup;
>>>                  bool created:1;
>>> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
>>> b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
>>> index 01f332d8dbde..67c6bee695c7 100644
>>> --- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
>>> +++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
>>> @@ -25,8 +25,8 @@ static void check_release_pagevec(struct pagevec
>>> *pvec)
>>>          cond_resched();
>>>    }
>>>    
>>> -void shmem_free_st(struct sg_table *st, struct address_space
>>> *mapping,
>>> -                  bool dirty, bool backup)
>>> +void shmem_free_st_table(struct sg_table *st, struct address_space
>>> *mapping,
>>> +                        bool dirty, bool backup)
>>>    {
>>>          struct sgt_iter sgt_iter;
>>>          struct pagevec pvec;
>>> @@ -49,7 +49,6 @@ void shmem_free_st(struct sg_table *st, struct
>>> address_space *mapping,
>>>                  check_release_pagevec(&pvec);
>>>    
>>>          sg_free_table(st);
>>> -       kfree(st);
>>>    }
>>>    
>>>    struct sg_table *shmem_alloc_st(struct drm_i915_private *i915,
>>> @@ -171,7 +170,8 @@ struct sg_table *shmem_alloc_st(struct
>>> drm_i915_private *i915,
>>>    err_sg:
>>>          sg_mark_end(sg);
>>>          if (sg != st->sgl) {
>>> -               shmem_free_st(st, mapping, false, false);
>>> +               shmem_free_st_table(st, mapping, false, false);
>>> +               kfree(st);
>>>          } else {
>>>                  mapping_clear_unevictable(mapping);
>>>                  sg_free_table(st);
>>> @@ -254,7 +254,8 @@ static int shmem_get_pages(struct
>>> drm_i915_gem_object *obj)
>>>          return 0;
>>>    
>>>    err_pages:
>>> -       shmem_free_st(st, mapping, false, false);
>>> +       shmem_free_st_table(st, mapping, false, false);
>>> +       kfree(st);
>>>          /*
>>>           * shmemfs first checks if there is enough memory to
>>> allocate the page
>>>           * and reports ENOSPC should there be insufficient, along
>>> with the usual
>>> @@ -374,8 +375,9 @@ void i915_gem_object_put_pages_shmem(struct
>>> drm_i915_gem_object *obj, struct sg_
>>>          if (i915_gem_object_needs_bit17_swizzle(obj))
>>>                  i915_gem_object_save_bit_17_swizzle(obj, pages);
>>>    
>>> -       shmem_free_st(pages, file_inode(obj->base.filp)->i_mapping,
>>> -                     obj->mm.dirty, obj->mm.madv ==
>>> I915_MADV_WILLNEED);
>>> +       shmem_free_st_table(pages, file_inode(obj->base.filp)-
>>>> i_mapping,
>>> +                           obj->mm.dirty, obj->mm.madv ==
>>> I915_MADV_WILLNEED);
>>> +       kfree(pages);
>>>          obj->mm.dirty = false;
>>>    }
>>>    
>>> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
>>> b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
>>> index 4fd2edb20dd9..6826e3859e18 100644
>>> --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
>>> +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
>>> @@ -34,7 +34,7 @@
>>>     * struct i915_ttm_tt - TTM page vector with additional private
>>> information
>>>     * @ttm: The base TTM page vector.
>>>     * @dev: The struct device used for dma mapping and unmapping.
>>> - * @cached_st: The cached scatter-gather table.
>>> + * @cached_rsgt: The cached scatter-gather table.
>>>     * @is_shmem: Set if using shmem.
>>>     * @filp: The shmem file, if using shmem backend.
>>>     *
>>> @@ -47,7 +47,7 @@
>>>    struct i915_ttm_tt {
>>>          struct ttm_tt ttm;
>>>          struct device *dev;
>>> -       struct sg_table *cached_st;
>>> +       struct i915_refct_sgt cached_rsgt;
>>>    
>>>          bool is_shmem;
>>>          struct file *filp;
>>> @@ -221,14 +221,10 @@ static int i915_ttm_tt_shmem_populate(struct
>>> ttm_device *bdev,
>>>          if (IS_ERR(st))
>>>                  return PTR_ERR(st);
>>>    
>>> -       err = dma_map_sg_attrs(i915_tt->dev,
>>> -                              st->sgl, st->nents,
>>> -                              DMA_BIDIRECTIONAL,
>>> -                              DMA_ATTR_SKIP_CPU_SYNC);
>>> -       if (err <= 0) {
>>> -               err = -EINVAL;
>>> +       err = dma_map_sgtable(i915_tt->dev, st, DMA_BIDIRECTIONAL,
>>> +                             DMA_ATTR_SKIP_CPU_SYNC);
>>> +       if (err)
>>>                  goto err_free_st;
>>> -       }
>>>    
>>>          i = 0;
>>>          for_each_sgt_page(page, sgt_iter, st)
>>> @@ -237,11 +233,15 @@ static int i915_ttm_tt_shmem_populate(struct
>>> ttm_device *bdev,
>>>          if (ttm->page_flags & TTM_TT_FLAG_SWAPPED)
>>>                  ttm->page_flags &= ~TTM_TT_FLAG_SWAPPED;
>>>    
>>> -       i915_tt->cached_st = st;
>>> +       i915_tt->cached_rsgt.table = *st;
>>> +       kfree(st);
>>
>> Will it work if the above just operates on the rsgt.table?
> 
> I'll change the shmem utility here to not allocate the struct sg_table
> and then we can operate on it directly.
> 
>>
>>> +
>>>          return 0;
>>>    
>>>    err_free_st:
>>> -       shmem_free_st(st, filp->f_mapping, false, false);
>>> +       shmem_free_st_table(st, filp->f_mapping, false, false);
>>> +       kfree(st);
>>> +
>>>          return err;
>>>    }
>>>    
>>> @@ -249,16 +249,29 @@ static void
>>> i915_ttm_tt_shmem_unpopulate(struct ttm_tt *ttm)
>>>    {
>>>          struct i915_ttm_tt *i915_tt = container_of(ttm,
>>> typeof(*i915_tt), ttm);
>>>          bool backup = ttm->page_flags & TTM_TT_FLAG_SWAPPED;
>>> +       struct sg_table *st = &i915_tt->cached_rsgt.table;
>>>    
>>> -       dma_unmap_sg(i915_tt->dev, i915_tt->cached_st->sgl,
>>> -                    i915_tt->cached_st->nents,
>>> -                    DMA_BIDIRECTIONAL);
>>> +       if (st->sgl)
>>
>> Can we ever hit this? I would have presumed not? Below also.
> 
> Yes, here's where we typically free the scatterlist.

What I meant to say: can the above ever not be true? i.e sgl == NULL

> 
>>
>>> +               shmem_free_st_table(st,
>>> +                                   file_inode(i915_tt->filp)-
>>>> i_mapping,
>>> +                                   backup, backup);
>>> +}
>>>    
>>> -       shmem_free_st(fetch_and_zero(&i915_tt->cached_st),
>>> -                     file_inode(i915_tt->filp)->i_mapping,
>>> -                     backup, backup);
>>> +static void i915_ttm_tt_release(struct kref *ref)
>>> +{
>>> +       struct i915_ttm_tt *i915_tt =
>>> +               container_of(ref, typeof(*i915_tt),
>>> cached_rsgt.kref);
>>> +       struct sg_table *st = &i915_tt->cached_rsgt.table;
>>> +
>>> +       GEM_WARN_ON(st->sgl);
>>> +
>>> +       kfree(i915_tt);
>>>    }
>>>    
>>> +static const struct i915_refct_sgt_ops tt_rsgt_ops = {
>>> +       .release = i915_ttm_tt_release
>>> +};
>>> +
>>>    static struct ttm_tt *i915_ttm_tt_create(struct ttm_buffer_object
>>> *bo,
>>>                                           uint32_t page_flags)
>>>    {
>>> @@ -287,6 +300,9 @@ static struct ttm_tt *i915_ttm_tt_create(struct
>>> ttm_buffer_object *bo,
>>>          if (ret)
>>>                  goto err_free;
>>>    
>>> +       i915_refct_sgt_init_ops(&i915_tt->cached_rsgt, bo-
>>>> base.size,
>>> +                               &tt_rsgt_ops);
>>> +
>>>          i915_tt->dev = obj->base.dev->dev;
>>>    
>>>          return &i915_tt->ttm;
>>> @@ -311,17 +327,15 @@ static int i915_ttm_tt_populate(struct
>>> ttm_device *bdev,
>>>    static void i915_ttm_tt_unpopulate(struct ttm_device *bdev,
>>> struct ttm_tt *ttm)
>>>    {
>>>          struct i915_ttm_tt *i915_tt = container_of(ttm,
>>> typeof(*i915_tt), ttm);
>>> +       struct sg_table *st = &i915_tt->cached_rsgt.table;
>>> +
>>> +       if (st->sgl)
>>> +               dma_unmap_sgtable(i915_tt->dev, st,
>>> DMA_BIDIRECTIONAL, 0);
>>>    
>>>          if (i915_tt->is_shmem) {
>>>                  i915_ttm_tt_shmem_unpopulate(ttm);
>>>          } else {
>>> -               if (i915_tt->cached_st) {
>>> -                       dma_unmap_sgtable(i915_tt->dev, i915_tt-
>>>> cached_st,
>>> -                                         DMA_BIDIRECTIONAL, 0);
>>> -                       sg_free_table(i915_tt->cached_st);
>>> -                       kfree(i915_tt->cached_st);
>>> -                       i915_tt->cached_st = NULL;
>>> -               }
>>> +               sg_free_table(st);
>>>                  ttm_pool_free(&bdev->pool, ttm);
>>>          }
>>>    }
>>> @@ -334,7 +348,7 @@ static void i915_ttm_tt_destroy(struct
>>> ttm_device *bdev, struct ttm_tt *ttm)
>>>                  fput(i915_tt->filp);
>>>    
>>>          ttm_tt_fini(ttm);
>>> -       kfree(i915_tt);
>>> +       i915_refct_sgt_put(&i915_tt->cached_rsgt);
>>>    }
>>>    
>>>    static bool i915_ttm_eviction_valuable(struct ttm_buffer_object
>>> *bo,
>>> @@ -376,12 +390,12 @@ static int i915_ttm_move_notify(struct
>>> ttm_buffer_object *bo)
>>>          return 0;
>>>    }
>>>    
>>> -static void i915_ttm_free_cached_io_st(struct drm_i915_gem_object
>>> *obj)
>>> +static void i915_ttm_free_cached_io_rsgt(struct
>>> drm_i915_gem_object *obj)
>>>    {
>>>          struct radix_tree_iter iter;
>>>          void __rcu **slot;
>>>    
>>> -       if (!obj->ttm.cached_io_st)
>>> +       if (!obj->ttm.cached_io_rsgt)
>>>                  return;
>>>    
>>>          rcu_read_lock();
>>> @@ -389,9 +403,8 @@ static void i915_ttm_free_cached_io_st(struct
>>> drm_i915_gem_object *obj)
>>>                  radix_tree_delete(&obj->ttm.get_io_page.radix,
>>> iter.index);
>>>          rcu_read_unlock();
>>>    
>>> -       sg_free_table(obj->ttm.cached_io_st);
>>> -       kfree(obj->ttm.cached_io_st);
>>> -       obj->ttm.cached_io_st = NULL;
>>> +       i915_refct_sgt_put(obj->ttm.cached_io_rsgt);
>>> +       obj->ttm.cached_io_rsgt = NULL;
>>>    }
>>>    
>>>    static void
>>> @@ -477,7 +490,7 @@ static int i915_ttm_purge(struct
>>> drm_i915_gem_object *obj)
>>>          obj->write_domain = 0;
>>>          obj->read_domains = 0;
>>>          i915_ttm_adjust_gem_after_move(obj);
>>> -       i915_ttm_free_cached_io_st(obj);
>>> +       i915_ttm_free_cached_io_rsgt(obj);
>>>          obj->mm.madv = __I915_MADV_PURGED;
>>>          return 0;
>>>    }
>>> @@ -532,7 +545,7 @@ static void i915_ttm_swap_notify(struct
>>> ttm_buffer_object *bo)
>>>          int ret = i915_ttm_move_notify(bo);
>>>    
>>>          GEM_WARN_ON(ret);
>>> -       GEM_WARN_ON(obj->ttm.cached_io_st);
>>> +       GEM_WARN_ON(obj->ttm.cached_io_rsgt);
>>>          if (!ret && obj->mm.madv != I915_MADV_WILLNEED)
>>>                  i915_ttm_purge(obj);
>>>    }
>>> @@ -543,7 +556,7 @@ static void i915_ttm_delete_mem_notify(struct
>>> ttm_buffer_object *bo)
>>>    
>>>          if (likely(obj)) {
>>>                  __i915_gem_object_pages_fini(obj);
>>> -               i915_ttm_free_cached_io_st(obj);
>>> +               i915_ttm_free_cached_io_rsgt(obj);
>>>          }
>>>    }
>>>    
>>> @@ -563,40 +576,35 @@ i915_ttm_region(struct ttm_device *bdev, int
>>> ttm_mem_type)
>>>                                            ttm_mem_type -
>>> I915_PL_LMEM0);
>>>    }
>>>    
>>> -static struct sg_table *i915_ttm_tt_get_st(struct ttm_tt *ttm)
>>> +static struct i915_refct_sgt *i915_ttm_tt_get_st(struct ttm_tt
>>> *ttm)
>>>    {
>>>          struct i915_ttm_tt *i915_tt = container_of(ttm,
>>> typeof(*i915_tt), ttm);
>>>          struct sg_table *st;
>>>          int ret;
>>>    
>>> -       if (i915_tt->cached_st)
>>> -               return i915_tt->cached_st;
>>> -
>>> -       st = kzalloc(sizeof(*st), GFP_KERNEL);
>>> -       if (!st)
>>> -               return ERR_PTR(-ENOMEM);
>>> +       if (i915_tt->cached_rsgt.table.sgl)
>>> +               return i915_refct_sgt_get(&i915_tt->cached_rsgt);
>>>    
>>> +       st = &i915_tt->cached_rsgt.table;
>>>          ret = sg_alloc_table_from_pages_segment(st,
>>>                          ttm->pages, ttm->num_pages,
>>>                          0, (unsigned long)ttm->num_pages <<
>>> PAGE_SHIFT,
>>>                          i915_sg_segment_size(), GFP_KERNEL);
>>>          if (ret) {
>>> -               kfree(st);
>>> +               st->sgl = NULL;
>>>                  return ERR_PTR(ret);
>>>          }
>>>    
>>>          ret = dma_map_sgtable(i915_tt->dev, st, DMA_BIDIRECTIONAL,
>>> 0);
>>>          if (ret) {
>>>                  sg_free_table(st);
>>> -               kfree(st);
>>>                  return ERR_PTR(ret);
>>>          }
>>>    
>>> -       i915_tt->cached_st = st;
>>> -       return st;
>>> +       return i915_refct_sgt_get(&i915_tt->cached_rsgt);
>>>    }
>>>    
>>> -static struct sg_table *
>>> +static struct i915_refct_sgt *
>>>    i915_ttm_resource_get_st(struct drm_i915_gem_object *obj,
>>>                           struct ttm_resource *res)
>>>    {
>>> @@ -610,7 +618,21 @@ i915_ttm_resource_get_st(struct
>>> drm_i915_gem_object *obj,
>>>           * the resulting st. Might make sense for GGTT.
>>>           */
>>>          GEM_WARN_ON(!cpu_maps_iomem(res));
>>> -       return intel_region_ttm_resource_to_st(obj->mm.region,
>>> res);
>>> +       if (bo->resource == res) {
>>> +               if (!obj->ttm.cached_io_rsgt) {
>>> +                       struct i915_refct_sgt *rsgt;
>>> +
>>> +                       rsgt =
>>> intel_region_ttm_resource_to_rsgt(obj->mm.region,
>>> +
>>> res);
>>> +                       if (IS_ERR(rsgt))
>>> +                               return rsgt;
>>> +
>>> +                       obj->ttm.cached_io_rsgt = rsgt;
>>> +               }
>>> +               return i915_refct_sgt_get(obj->ttm.cached_io_rsgt);
>>> +       }
>>> +
>>> +       return intel_region_ttm_resource_to_rsgt(obj->mm.region,
>>> res);
>>>    }
>>>    
>>>    static int i915_ttm_accel_move(struct ttm_buffer_object *bo,
>>> @@ -621,10 +643,7 @@ static int i915_ttm_accel_move(struct
>>> ttm_buffer_object *bo,
>>>    {
>>>          struct drm_i915_private *i915 = container_of(bo->bdev,
>>> typeof(*i915),
>>>                                                       bdev);
>>> -       struct ttm_resource_manager *src_man =
>>> -               ttm_manager_type(bo->bdev, bo->resource->mem_type);
>>>          struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo);
>>> -       struct sg_table *src_st;
>>>          struct i915_request *rq;
>>>          struct ttm_tt *src_ttm = bo->ttm;
>>>          enum i915_cache_level src_level, dst_level;
>>> @@ -650,17 +669,22 @@ static int i915_ttm_accel_move(struct
>>> ttm_buffer_object *bo,
>>>                  }
>>>                  intel_engine_pm_put(i915->gt.migrate.context-
>>>> engine);
>>>          } else {
>>> -               src_st = src_man->use_tt ?
>>> i915_ttm_tt_get_st(src_ttm) :
>>> -                       obj->ttm.cached_io_st;
>>> +               struct i915_refct_sgt *src_rsgt =
>>> +                       i915_ttm_resource_get_st(obj, bo-
>>>> resource);
>>> +
>>> +               if (IS_ERR(src_rsgt))
>>> +                       return PTR_ERR(src_rsgt);
>>>    
>>>                  src_level = i915_ttm_cache_level(i915, bo-
>>>> resource, src_ttm);
>>>                  intel_engine_pm_get(i915->gt.migrate.context-
>>>> engine);
>>>                  ret = intel_context_migrate_copy(i915-
>>>> gt.migrate.context,
>>> -                                                NULL, src_st->sgl,
>>> src_level,
>>> +                                                NULL, src_rsgt-
>>>> table.sgl,
>>> +                                                src_level,
>>>                                                  
>>> gpu_binds_iomem(bo->resource),
>>>                                                   dst_st->sgl,
>>> dst_level,
>>>                                                  
>>> gpu_binds_iomem(dst_mem),
>>>                                                   &rq);
>>> +               i915_refct_sgt_put(src_rsgt);
>>>                  if (!ret && rq) {
>>>                          i915_request_wait(rq, 0,
>>> MAX_SCHEDULE_TIMEOUT);
>>>                          i915_request_put(rq);
>>> @@ -674,13 +698,14 @@ static int i915_ttm_accel_move(struct
>>> ttm_buffer_object *bo,
>>>    static void __i915_ttm_move(struct ttm_buffer_object *bo, bool
>>> clear,
>>>                              struct ttm_resource *dst_mem,
>>>                              struct ttm_tt *dst_ttm,
>>> -                           struct sg_table *dst_st,
>>> +                           struct i915_refct_sgt *dst_rsgt,
>>>                              bool allow_accel)
>>>    {
>>>          int ret = -EINVAL;
>>>    
>>>          if (allow_accel)
>>> -               ret = i915_ttm_accel_move(bo, clear, dst_mem,
>>> dst_ttm, dst_st);
>>> +               ret = i915_ttm_accel_move(bo, clear, dst_mem,
>>> dst_ttm,
>>> +                                         &dst_rsgt->table);
>>>          if (ret) {
>>>                  struct drm_i915_gem_object *obj =
>>> i915_ttm_to_gem(bo);
>>>                  struct intel_memory_region *dst_reg, *src_reg;
>>> @@ -697,12 +722,13 @@ static void __i915_ttm_move(struct
>>> ttm_buffer_object *bo, bool clear,
>>>                  dst_iter = !cpu_maps_iomem(dst_mem) ?
>>>                          ttm_kmap_iter_tt_init(&_dst_iter.tt,
>>> dst_ttm) :
>>>                          ttm_kmap_iter_iomap_init(&_dst_iter.io,
>>> &dst_reg->iomap,
>>> -                                                dst_st, dst_reg-
>>>> region.start);
>>> +                                                &dst_rsgt->table,
>>> +                                                dst_reg-
>>>> region.start);
>>>    
>>>                  src_iter = !cpu_maps_iomem(bo->resource) ?
>>>                          ttm_kmap_iter_tt_init(&_src_iter.tt, bo-
>>>> ttm) :
>>>                          ttm_kmap_iter_iomap_init(&_src_iter.io,
>>> &src_reg->iomap,
>>> -                                                obj-
>>>> ttm.cached_io_st,
>>> +                                                &obj-
>>>> ttm.cached_io_rsgt->table,
>>>                                                   src_reg-
>>>> region.start);
>>>    
>>>                  ttm_move_memcpy(clear, dst_mem->num_pages,
>>> dst_iter, src_iter);
>>> @@ -718,7 +744,7 @@ static int i915_ttm_move(struct
>>> ttm_buffer_object *bo, bool evict,
>>>          struct ttm_resource_manager *dst_man =
>>>                  ttm_manager_type(bo->bdev, dst_mem->mem_type);
>>>          struct ttm_tt *ttm = bo->ttm;
>>> -       struct sg_table *dst_st;
>>> +       struct i915_refct_sgt *dst_rsgt;
>>>          bool clear;
>>>          int ret;
>>>    
>>> @@ -744,22 +770,24 @@ static int i915_ttm_move(struct
>>> ttm_buffer_object *bo, bool evict,
>>>                          return ret;
>>>          }
>>>    
>>> -       dst_st = i915_ttm_resource_get_st(obj, dst_mem);
>>> -       if (IS_ERR(dst_st))
>>> -               return PTR_ERR(dst_st);
>>> +       dst_rsgt = i915_ttm_resource_get_st(obj, dst_mem);
>>> +       if (IS_ERR(dst_rsgt))
>>> +               return PTR_ERR(dst_rsgt);
>>>    
>>>          clear = !cpu_maps_iomem(bo->resource) && (!ttm ||
>>> !ttm_tt_is_populated(ttm));
>>>          if (!(clear && ttm && !(ttm->page_flags &
>>> TTM_TT_FLAG_ZERO_ALLOC)))
>>> -               __i915_ttm_move(bo, clear, dst_mem, bo->ttm,
>>> dst_st, true);
>>> +               __i915_ttm_move(bo, clear, dst_mem, bo->ttm,
>>> dst_rsgt, true);
>>>    
>>>          ttm_bo_move_sync_cleanup(bo, dst_mem);
>>>          i915_ttm_adjust_domains_after_move(obj);
>>> -       i915_ttm_free_cached_io_st(obj);
>>> +       i915_ttm_free_cached_io_rsgt(obj);
>>>    
>>>          if (gpu_binds_iomem(dst_mem) || cpu_maps_iomem(dst_mem)) {
>>> -               obj->ttm.cached_io_st = dst_st;
>>> -               obj->ttm.get_io_page.sg_pos = dst_st->sgl;
>>> +               obj->ttm.cached_io_rsgt = dst_rsgt;
>>> +               obj->ttm.get_io_page.sg_pos = dst_rsgt->table.sgl;
>>>                  obj->ttm.get_io_page.sg_idx = 0;
>>> +       } else {
>>> +               i915_refct_sgt_put(dst_rsgt);
>>>          }
>>>    
>>>          i915_ttm_adjust_lru(obj);
>>> @@ -825,7 +853,6 @@ static int __i915_ttm_get_pages(struct
>>> drm_i915_gem_object *obj,
>>>                  .interruptible = true,
>>>                  .no_wait_gpu = false,
>>>          };
>>> -       struct sg_table *st;
>>>          int real_num_busy;
>>>          int ret;
>>>    
>>> @@ -862,12 +889,16 @@ static int __i915_ttm_get_pages(struct
>>> drm_i915_gem_object *obj,
>>>          }
>>>    
>>>          if (!i915_gem_object_has_pages(obj)) {
>>> -               /* Object either has a page vector or is an iomem
>>> object */
>>> -               st = bo->ttm ? i915_ttm_tt_get_st(bo->ttm) : obj-
>>>> ttm.cached_io_st;
>>> -               if (IS_ERR(st))
>>> -                       return PTR_ERR(st);
>>> +               struct i915_refct_sgt *rsgt =
>>> +                       i915_ttm_resource_get_st(obj, bo-
>>>> resource);
>>> +
>>> +               if (IS_ERR(rsgt))
>>> +                       return PTR_ERR(rsgt);
>>>    
>>> -               __i915_gem_object_set_pages(obj, st,
>>> i915_sg_dma_sizes(st->sgl));
>>> +               GEM_BUG_ON(obj->mm.rsgt);
>>> +               obj->mm.rsgt = rsgt;
>>> +               __i915_gem_object_set_pages(obj, &rsgt->table,
>>> +                                           i915_sg_dma_sizes(rsgt-
>>>> table.sgl));
>>>          }
>>>    
>>>          i915_ttm_adjust_lru(obj);
>>> @@ -941,6 +972,13 @@ static void i915_ttm_put_pages(struct
>>> drm_i915_gem_object *obj,
>>>           * If the object is not destroyed next, The TTM eviction
>>> logic
>>>           * and shrinkers will move it out if needed.
>>>           */
>>> +
>>> +       if (obj->mm.rsgt) {
>>> +               i915_refct_sgt_put(obj->mm.rsgt);
>>> +               obj->mm.rsgt = NULL;
>>> +       }
>>
>> i915_refct_sgt_put(fetch_and_zero(&obj->mm.rsgt)) ?
>>
>>> +
>>> +       i915_ttm_adjust_lru(obj);
>>
>> Unrelated change, or do I need to look closer?
> 
> No that's unrelated. Probably a rebase mistake. Will remove.
> 
>>
>>>    }
>>>    
>>>    static void i915_ttm_adjust_lru(struct drm_i915_gem_object *obj)
>>> @@ -1278,7 +1316,7 @@ int i915_gem_obj_copy_ttm(struct
>>> drm_i915_gem_object *dst,
>>>          struct ttm_operation_ctx ctx = {
>>>                  .interruptible = intr,
>>>          };
>>> -       struct sg_table *dst_st;
>>> +       struct i915_refct_sgt *dst_rsgt;
>>>          int ret;
>>>    
>>>          assert_object_held(dst);
>>> @@ -1293,11 +1331,11 @@ int i915_gem_obj_copy_ttm(struct
>>> drm_i915_gem_object *dst,
>>>          if (ret)
>>>                  return ret;
>>>    
>>> -       dst_st = gpu_binds_iomem(dst_bo->resource) ?
>>> -               dst->ttm.cached_io_st : i915_ttm_tt_get_st(dst_bo-
>>>> ttm);
>>> -
>>> +       dst_rsgt = i915_ttm_resource_get_st(dst, dst_bo->resource);
>>>          __i915_ttm_move(src_bo, false, dst_bo->resource, dst_bo-
>>>> ttm,
>>> -                       dst_st, allow_accel);
>>> +                       dst_rsgt, allow_accel);
>>> +
>>> +       i915_refct_sgt_put(dst_rsgt);
>>>    
>>>          return 0;
>>>    }
>>> diff --git a/drivers/gpu/drm/i915/i915_scatterlist.c
>>> b/drivers/gpu/drm/i915/i915_scatterlist.c
>>> index 4a6712dca838..8a510ee5d1ad 100644
>>> --- a/drivers/gpu/drm/i915/i915_scatterlist.c
>>> +++ b/drivers/gpu/drm/i915/i915_scatterlist.c
>>> @@ -41,8 +41,32 @@ bool i915_sg_trim(struct sg_table *orig_st)
>>>          return true;
>>>    }
>>>    
>>> +static void i915_refct_sgt_release(struct kref *ref)
>>> +{
>>> +       struct i915_refct_sgt *rsgt =
>>> +               container_of(ref, typeof(*rsgt), kref);
>>> +
>>> +       sg_free_table(&rsgt->table);
>>> +       kfree(rsgt);
>>> +}
>>> +
>>> +static const struct i915_refct_sgt_ops rsgt_ops = {
>>> +       .release = i915_refct_sgt_release
>>> +};
>>> +
>>> +/**
>>> + * i915_refct_sgt_init - Initialize a struct i915_refct_sgt with
>>> default ops
>>> + * @rsgt: The struct i915_refct_sgt to initialize.
>>> + * size: The size of the underlying memory buffer.
>>> + */
>>> +void i915_refct_sgt_init(struct i915_refct_sgt *rsgt, size_t size)
>>> +{
>>> +       i915_refct_sgt_init_ops(rsgt, size, &rsgt_ops);
>>> +}
>>> +
>>>    /**
>>> - * i915_sg_from_mm_node - Create an sg_table from a struct
>>> drm_mm_node
>>> + * i915_rsgt_from_mm_node - Create a refcounted sg_table from a
>>> struct
>>> + * drm_mm_node
>>>     * @node: The drm_mm_node.
>>>     * @region_start: An offset to add to the dma addresses of the sg
>>> list.
>>>     *
>>> @@ -50,25 +74,28 @@ bool i915_sg_trim(struct sg_table *orig_st)
>>>     * taking a maximum segment length into account, splitting into
>>> segments
>>>     * if necessary.
>>>     *
>>> - * Return: A pointer to a kmalloced struct sg_table on success,
>>> negative
>>> + * Return: A pointer to a kmalloced struct i915_refct_sgt on
>>> success, negative
>>>     * error code cast to an error pointer on failure.
>>>     */
>>> -struct sg_table *i915_sg_from_mm_node(const struct drm_mm_node
>>> *node,
>>> -                                     u64 region_start)
>>> +struct i915_refct_sgt *i915_rsgt_from_mm_node(const struct
>>> drm_mm_node *node,
>>> +                                             u64 region_start)
>>>    {
>>>          const u64 max_segment = SZ_1G; /* Do we have a limit on
>>> this? */
>>>          u64 segment_pages = max_segment >> PAGE_SHIFT;
>>>          u64 block_size, offset, prev_end;
>>> +       struct i915_refct_sgt *rsgt;
>>>          struct sg_table *st;
>>>          struct scatterlist *sg;
>>>    
>>> -       st = kmalloc(sizeof(*st), GFP_KERNEL);
>>> -       if (!st)
>>> +       rsgt = kmalloc(sizeof(*rsgt), GFP_KERNEL);
>>> +       if (!rsgt)
>>>                  return ERR_PTR(-ENOMEM);
>>>    
>>> +       i915_refct_sgt_init(rsgt, node->size << PAGE_SHIFT);
>>> +       st = &rsgt->table;
>>>          if (sg_alloc_table(st, DIV_ROUND_UP(node->size,
>>> segment_pages),
>>>                             GFP_KERNEL)) {
>>> -               kfree(st);
>>> +               i915_refct_sgt_put(rsgt);
>>>                  return ERR_PTR(-ENOMEM);
>>>          }
>>>    
>>> @@ -104,11 +131,11 @@ struct sg_table *i915_sg_from_mm_node(const
>>> struct drm_mm_node *node,
>>>          sg_mark_end(sg);
>>>          i915_sg_trim(st);
>>>    
>>> -       return st;
>>> +       return rsgt;
>>>    }
>>>    
>>>    /**
>>> - * i915_sg_from_buddy_resource - Create an sg_table from a struct
>>> + * i915_rsgt_from_buddy_resource - Create a refcounted sg_table
>>> from a struct
>>>     * i915_buddy_block list
>>>     * @res: The struct i915_ttm_buddy_resource.
>>>     * @region_start: An offset to add to the dma addresses of the sg
>>> list.
>>> @@ -117,11 +144,11 @@ struct sg_table *i915_sg_from_mm_node(const
>>> struct drm_mm_node *node,
>>>     * taking a maximum segment length into account, splitting into
>>> segments
>>>     * if necessary.
>>>     *
>>> - * Return: A pointer to a kmalloced struct sg_table on success,
>>> negative
>>> + * Return: A pointer to a kmalloced struct i915_refct_sgts on
>>> success, negative
>>>     * error code cast to an error pointer on failure.
>>>     */
>>> -struct sg_table *i915_sg_from_buddy_resource(struct ttm_resource
>>> *res,
>>> -                                            u64 region_start)
>>> +struct i915_refct_sgt *i915_rsgt_from_buddy_resource(struct
>>> ttm_resource *res,
>>> +                                                    u64
>>> region_start)
>>>    {
>>>          struct i915_ttm_buddy_resource *bman_res =
>>> to_ttm_buddy_resource(res);
>>>          const u64 size = res->num_pages << PAGE_SHIFT;
>>> @@ -129,18 +156,21 @@ struct sg_table
>>> *i915_sg_from_buddy_resource(struct ttm_resource *res,
>>>          struct i915_buddy_mm *mm = bman_res->mm;
>>>          struct list_head *blocks = &bman_res->blocks;
>>>          struct i915_buddy_block *block;
>>> +       struct i915_refct_sgt *rsgt;
>>>          struct scatterlist *sg;
>>>          struct sg_table *st;
>>>          resource_size_t prev_end;
>>>    
>>>          GEM_BUG_ON(list_empty(blocks));
>>>    
>>> -       st = kmalloc(sizeof(*st), GFP_KERNEL);
>>> -       if (!st)
>>> +       rsgt = kmalloc(sizeof(*rsgt), GFP_KERNEL);
>>> +       if (!rsgt)
>>>                  return ERR_PTR(-ENOMEM);
>>>    
>>> +       i915_refct_sgt_init(rsgt, size);
>>> +       st = &rsgt->table;
>>>          if (sg_alloc_table(st, res->num_pages, GFP_KERNEL)) {
>>> -               kfree(st);
>>> +               i915_refct_sgt_put(rsgt);
>>>                  return ERR_PTR(-ENOMEM);
>>>          }
>>>    
>>> @@ -181,7 +211,7 @@ struct sg_table
>>> *i915_sg_from_buddy_resource(struct ttm_resource *res,
>>>          sg_mark_end(sg);
>>>          i915_sg_trim(st);
>>>    
>>> -       return st;
>>> +       return rsgt;
>>>    }
>>>    
>>>    #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
>>> diff --git a/drivers/gpu/drm/i915/i915_scatterlist.h
>>> b/drivers/gpu/drm/i915/i915_scatterlist.h
>>> index b8bd5925b03f..321fd4a9f777 100644
>>> --- a/drivers/gpu/drm/i915/i915_scatterlist.h
>>> +++ b/drivers/gpu/drm/i915/i915_scatterlist.h
>>> @@ -144,10 +144,78 @@ static inline unsigned int
>>> i915_sg_segment_size(void)
>>>    
>>>    bool i915_sg_trim(struct sg_table *orig_st);
>>>    
>>> -struct sg_table *i915_sg_from_mm_node(const struct drm_mm_node
>>> *node,
>>> -                                     u64 region_start);
>>> +/**
>>> + * struct i915_refct_sgt_ops - Operations structure for struct
>>> i915_refct_sgt
>>> + */
>>> +struct i915_refct_sgt_ops {
>>> +       /**
>>> +        * release() - Free the memory of the struct i915_refct_sgt
>>> +        * @ref: struct kref that is embedded in the struct
>>> i915_refct_sgt
>>> +        */
>>> +       void (*release)(struct kref *ref);
>>> +};
>>> +
>>> +/**
>>> + * struct i915_refct_sgt - A refcounted scatter-gather table
>>> + * @kref: struct kref for refcounting
>>> + * @table: struct sg_table holding the scatter-gather table
>>> itself. Note that
>>> + * @table->sgl = NULL can be used to determine whether a scatter-
>>> gather table
>>> + * is present or not.
>>> + * @size: The size in bytes of the underlying memory buffer
>>> + * @ops: The operations structure.
>>> + */
>>> +struct i915_refct_sgt {
>>> +       struct kref kref;
>>> +       struct sg_table table;
>>> +       size_t size;
>>> +       const struct i915_refct_sgt_ops *ops;
>>> +};
>>> +
>>> +/**
>>> + * i915_refct_sgt_put - Put a refcounted sg-table
>>> + * @rsgt the struct i915_refct_sgt to put.
>>> + */
>>> +static inline void i915_refct_sgt_put(struct i915_refct_sgt *rsgt)
>>> +{
>>> +       if (rsgt)
>>> +               kref_put(&rsgt->kref, rsgt->ops->release);
>>> +}
>>> +
>>> +/**
>>> + * i915_refct_sgt_get - Get a refcounted sg-table
>>> + * @rsgt the struct i915_refct_sgt to get.
>>> + */
>>> +static inline struct i915_refct_sgt *
>>> +i915_refct_sgt_get(struct i915_refct_sgt *rsgt)
>>> +{
>>> +       kref_get(&rsgt->kref);
>>> +       return rsgt;
>>> +}
>>> +
>>> +/**
>>> + * i915_refct_sgt_init_ops - Initialize a refcounted sg-list with
>>> a custom
>>> + * operations structure
>>> + * @rsgt The struct i915_refct_sgt to initialize.
>>> + * @size: Size in bytes of the underlying memory buffer.
>>> + * @ops: A customized operations structure in case the refcounted
>>> sg-list
>>> + * is embedded into another structure.
>>> + */
>>> +static inline void i915_refct_sgt_init_ops(struct i915_refct_sgt
>>> *rsgt,
>>> +                                          size_t size,
>>> +                                          const struct
>>> i915_refct_sgt_ops *ops)
>>> +{
>>> +       kref_init(&rsgt->kref);
>>> +       rsgt->table.sgl = NULL;
>>> +       rsgt->size = size;
>>> +       rsgt->ops = ops;
>>> +}
>>
>> Could maybe make this __i915_refct_sgt_init(), since it inits more
>> than
>> just the ops?
>>
>>
> 
> Will change.
> 
> Thanks,
> Thomas
> 
>
Thomas Hellström Oct. 28, 2021, 9:35 a.m. UTC | #4
On 10/28/21 10:47, Matthew Auld wrote:
> On 28/10/2021 08:04, Thomas Hellström wrote:
>> On Wed, 2021-10-27 at 19:03 +0100, Matthew Auld wrote:
>>> On 27/10/2021 11:52, Thomas Hellström wrote:
>>>> As we start to introduce asynchronous failsafe object migration,
>>>> where we update the object state and then submit asynchronous
>>>> commands we need to record what memory resources are actually used
>>>> by various part of the command stream. Initially for three
>>>> purposes:
>>>>
>>>> 1) Error capture.
>>>> 2) Asynchronous migration error recovery.
>>>> 3) Asynchronous vma bind.
>>>>
>>>> At the time where these happens, the object state may have been
>>>> updated
>>>> to be several migrations ahead and object sg-tables discarded.
>>>>
>>>> In order to make it possible to keep sg-tables with memory resource
>>>> information for these operations, introduce refcounted sg-tables
>>>> that
>>>> aren't freed until the last user is done with them.
>>>>
>>>> The alternative would be to reference information sitting on the
>>>> corresponding ttm_resources which typically have the same lifetime
>>>> as
>>>> these refcountes sg_tables, but that leads to other awkward
>>>> constructs:
>>>> Due to the design direction chosen for ttm resource managers that
>>>> would
>>>> lead to diamond-style inheritance, the LMEM resources may sometimes
>>>> be
>>>> prematurely freed, and finally the subclassed struct ttm_resource
>>>> would
>>>> have to bleed into the asynchronous vma bind code.
>>>>
>>>> Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
>>>> ---
>>>>    drivers/gpu/drm/i915/gem/i915_gem_object.h    |   4 +-
>>>>    .../gpu/drm/i915/gem/i915_gem_object_types.h  |   3 +-
>>>>    drivers/gpu/drm/i915/gem/i915_gem_shmem.c     |  16 +-
>>>>    drivers/gpu/drm/i915/gem/i915_gem_ttm.c       | 188 +++++++++++--
>>>> -----
>>>>    drivers/gpu/drm/i915/i915_scatterlist.c       |  62 ++++--
>>>>    drivers/gpu/drm/i915/i915_scatterlist.h       |  76 ++++++-
>>>>    drivers/gpu/drm/i915/intel_region_ttm.c       |  15 +-
>>>>    drivers/gpu/drm/i915/intel_region_ttm.h       |   5 +-
>>>>    drivers/gpu/drm/i915/selftests/mock_region.c  |  12 +-
>>>>    9 files changed, 262 insertions(+), 119 deletions(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h
>>>> b/drivers/gpu/drm/i915/gem/i915_gem_object.h
>>>> index a5479ac7a4ad..c5ab364d4311 100644
>>>> --- a/drivers/gpu/drm/i915/gem/i915_gem_object.h
>>>> +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h
>>>> @@ -624,8 +624,8 @@ struct sg_table *shmem_alloc_st(struct
>>>> drm_i915_private *i915,
>>>>                                  size_t size, struct
>>>> intel_memory_region *mr,
>>>>                                  struct address_space *mapping,
>>>>                                  unsigned int max_segment);
>>>> -void shmem_free_st(struct sg_table *st, struct address_space
>>>> *mapping,
>>>> -                  bool dirty, bool backup);
>>>> +void shmem_free_st_table(struct sg_table *st, struct address_space
>>>> *mapping,
>>>> +                        bool dirty, bool backup);
>>>
>>> s/st_table/sg_table/?
>>>
>>> I thought st was shorthand for sg_table? Maybe shmem_sg_free_table?
>>
>> Yes the naming is indeed a bit unfortunate here. To be consistent with
>> the core's sg_free_table(), I changed to
>> shmem_sg_free_table() / shmem_sg_alloc_table.
>>
>>>
>>>
>>>>    void __shmem_writeback(size_t size, struct address_space
>>>> *mapping);
>>>>       #ifdef CONFIG_MMU_NOTIFIER
>>>> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
>>>> b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
>>>> index a4b69a43b898..604ed5ad77f5 100644
>>>> --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
>>>> +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
>>>> @@ -544,6 +544,7 @@ struct drm_i915_gem_object {
>>>>                   */
>>>>                  struct list_head region_link;
>>>>    +               struct i915_refct_sgt *rsgt;
>>>>                  struct sg_table *pages;
>>>>                  void *mapping;
>>>>    @@ -597,7 +598,7 @@ struct drm_i915_gem_object {
>>>>          } mm;
>>>>             struct {
>>>> -               struct sg_table *cached_io_st;
>>>> +               struct i915_refct_sgt *cached_io_rsgt;
>>>>                  struct i915_gem_object_page_iter get_io_page;
>>>>                  struct drm_i915_gem_object *backup;
>>>>                  bool created:1;
>>>> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
>>>> b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
>>>> index 01f332d8dbde..67c6bee695c7 100644
>>>> --- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
>>>> +++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
>>>> @@ -25,8 +25,8 @@ static void check_release_pagevec(struct pagevec
>>>> *pvec)
>>>>          cond_resched();
>>>>    }
>>>>    -void shmem_free_st(struct sg_table *st, struct address_space
>>>> *mapping,
>>>> -                  bool dirty, bool backup)
>>>> +void shmem_free_st_table(struct sg_table *st, struct address_space
>>>> *mapping,
>>>> +                        bool dirty, bool backup)
>>>>    {
>>>>          struct sgt_iter sgt_iter;
>>>>          struct pagevec pvec;
>>>> @@ -49,7 +49,6 @@ void shmem_free_st(struct sg_table *st, struct
>>>> address_space *mapping,
>>>>                  check_release_pagevec(&pvec);
>>>>             sg_free_table(st);
>>>> -       kfree(st);
>>>>    }
>>>>       struct sg_table *shmem_alloc_st(struct drm_i915_private *i915,
>>>> @@ -171,7 +170,8 @@ struct sg_table *shmem_alloc_st(struct
>>>> drm_i915_private *i915,
>>>>    err_sg:
>>>>          sg_mark_end(sg);
>>>>          if (sg != st->sgl) {
>>>> -               shmem_free_st(st, mapping, false, false);
>>>> +               shmem_free_st_table(st, mapping, false, false);
>>>> +               kfree(st);
>>>>          } else {
>>>>                  mapping_clear_unevictable(mapping);
>>>>                  sg_free_table(st);
>>>> @@ -254,7 +254,8 @@ static int shmem_get_pages(struct
>>>> drm_i915_gem_object *obj)
>>>>          return 0;
>>>>       err_pages:
>>>> -       shmem_free_st(st, mapping, false, false);
>>>> +       shmem_free_st_table(st, mapping, false, false);
>>>> +       kfree(st);
>>>>          /*
>>>>           * shmemfs first checks if there is enough memory to
>>>> allocate the page
>>>>           * and reports ENOSPC should there be insufficient, along
>>>> with the usual
>>>> @@ -374,8 +375,9 @@ void i915_gem_object_put_pages_shmem(struct
>>>> drm_i915_gem_object *obj, struct sg_
>>>>          if (i915_gem_object_needs_bit17_swizzle(obj))
>>>>                  i915_gem_object_save_bit_17_swizzle(obj, pages);
>>>>    -       shmem_free_st(pages, file_inode(obj->base.filp)->i_mapping,
>>>> -                     obj->mm.dirty, obj->mm.madv ==
>>>> I915_MADV_WILLNEED);
>>>> +       shmem_free_st_table(pages, file_inode(obj->base.filp)-
>>>>> i_mapping,
>>>> +                           obj->mm.dirty, obj->mm.madv ==
>>>> I915_MADV_WILLNEED);
>>>> +       kfree(pages);
>>>>          obj->mm.dirty = false;
>>>>    }
>>>>    diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
>>>> b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
>>>> index 4fd2edb20dd9..6826e3859e18 100644
>>>> --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
>>>> +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
>>>> @@ -34,7 +34,7 @@
>>>>     * struct i915_ttm_tt - TTM page vector with additional private
>>>> information
>>>>     * @ttm: The base TTM page vector.
>>>>     * @dev: The struct device used for dma mapping and unmapping.
>>>> - * @cached_st: The cached scatter-gather table.
>>>> + * @cached_rsgt: The cached scatter-gather table.
>>>>     * @is_shmem: Set if using shmem.
>>>>     * @filp: The shmem file, if using shmem backend.
>>>>     *
>>>> @@ -47,7 +47,7 @@
>>>>    struct i915_ttm_tt {
>>>>          struct ttm_tt ttm;
>>>>          struct device *dev;
>>>> -       struct sg_table *cached_st;
>>>> +       struct i915_refct_sgt cached_rsgt;
>>>>             bool is_shmem;
>>>>          struct file *filp;
>>>> @@ -221,14 +221,10 @@ static int i915_ttm_tt_shmem_populate(struct
>>>> ttm_device *bdev,
>>>>          if (IS_ERR(st))
>>>>                  return PTR_ERR(st);
>>>>    -       err = dma_map_sg_attrs(i915_tt->dev,
>>>> -                              st->sgl, st->nents,
>>>> -                              DMA_BIDIRECTIONAL,
>>>> -                              DMA_ATTR_SKIP_CPU_SYNC);
>>>> -       if (err <= 0) {
>>>> -               err = -EINVAL;
>>>> +       err = dma_map_sgtable(i915_tt->dev, st, DMA_BIDIRECTIONAL,
>>>> +                             DMA_ATTR_SKIP_CPU_SYNC);
>>>> +       if (err)
>>>>                  goto err_free_st;
>>>> -       }
>>>>             i = 0;
>>>>          for_each_sgt_page(page, sgt_iter, st)
>>>> @@ -237,11 +233,15 @@ static int i915_ttm_tt_shmem_populate(struct
>>>> ttm_device *bdev,
>>>>          if (ttm->page_flags & TTM_TT_FLAG_SWAPPED)
>>>>                  ttm->page_flags &= ~TTM_TT_FLAG_SWAPPED;
>>>>    -       i915_tt->cached_st = st;
>>>> +       i915_tt->cached_rsgt.table = *st;
>>>> +       kfree(st);
>>>
>>> Will it work if the above just operates on the rsgt.table?
>>
>> I'll change the shmem utility here to not allocate the struct sg_table
>> and then we can operate on it directly.
>>
>>>
>>>> +
>>>>          return 0;
>>>>       err_free_st:
>>>> -       shmem_free_st(st, filp->f_mapping, false, false);
>>>> +       shmem_free_st_table(st, filp->f_mapping, false, false);
>>>> +       kfree(st);
>>>> +
>>>>          return err;
>>>>    }
>>>>    @@ -249,16 +249,29 @@ static void
>>>> i915_ttm_tt_shmem_unpopulate(struct ttm_tt *ttm)
>>>>    {
>>>>          struct i915_ttm_tt *i915_tt = container_of(ttm,
>>>> typeof(*i915_tt), ttm);
>>>>          bool backup = ttm->page_flags & TTM_TT_FLAG_SWAPPED;
>>>> +       struct sg_table *st = &i915_tt->cached_rsgt.table;
>>>>    -       dma_unmap_sg(i915_tt->dev, i915_tt->cached_st->sgl,
>>>> -                    i915_tt->cached_st->nents,
>>>> -                    DMA_BIDIRECTIONAL);
>>>> +       if (st->sgl)
>>>
>>> Can we ever hit this? I would have presumed not? Below also.
>>
>> Yes, here's where we typically free the scatterlist.
>
> What I meant to say: can the above ever not be true? i.e sgl == NULL

Hm, Yes I think it can. If we're populating a non-shmem ttm tt, that 
sg-list is, IIRC allocated lazily on first use. Although I haven't 
analyzed in detail whether it can actually be populated and then not 
lazily allocated under the same lock.
Matthew Auld Oct. 28, 2021, 9:58 a.m. UTC | #5
On 28/10/2021 10:35, Thomas Hellström wrote:
> 
> On 10/28/21 10:47, Matthew Auld wrote:
>> On 28/10/2021 08:04, Thomas Hellström wrote:
>>> On Wed, 2021-10-27 at 19:03 +0100, Matthew Auld wrote:
>>>> On 27/10/2021 11:52, Thomas Hellström wrote:
>>>>> As we start to introduce asynchronous failsafe object migration,
>>>>> where we update the object state and then submit asynchronous
>>>>> commands we need to record what memory resources are actually used
>>>>> by various part of the command stream. Initially for three
>>>>> purposes:
>>>>>
>>>>> 1) Error capture.
>>>>> 2) Asynchronous migration error recovery.
>>>>> 3) Asynchronous vma bind.
>>>>>
>>>>> At the time where these happens, the object state may have been
>>>>> updated
>>>>> to be several migrations ahead and object sg-tables discarded.
>>>>>
>>>>> In order to make it possible to keep sg-tables with memory resource
>>>>> information for these operations, introduce refcounted sg-tables
>>>>> that
>>>>> aren't freed until the last user is done with them.
>>>>>
>>>>> The alternative would be to reference information sitting on the
>>>>> corresponding ttm_resources which typically have the same lifetime
>>>>> as
>>>>> these refcountes sg_tables, but that leads to other awkward
>>>>> constructs:
>>>>> Due to the design direction chosen for ttm resource managers that
>>>>> would
>>>>> lead to diamond-style inheritance, the LMEM resources may sometimes
>>>>> be
>>>>> prematurely freed, and finally the subclassed struct ttm_resource
>>>>> would
>>>>> have to bleed into the asynchronous vma bind code.
>>>>>
>>>>> Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
>>>>> ---
>>>>>    drivers/gpu/drm/i915/gem/i915_gem_object.h    |   4 +-
>>>>>    .../gpu/drm/i915/gem/i915_gem_object_types.h  |   3 +-
>>>>>    drivers/gpu/drm/i915/gem/i915_gem_shmem.c     |  16 +-
>>>>>    drivers/gpu/drm/i915/gem/i915_gem_ttm.c       | 188 +++++++++++--
>>>>> -----
>>>>>    drivers/gpu/drm/i915/i915_scatterlist.c       |  62 ++++--
>>>>>    drivers/gpu/drm/i915/i915_scatterlist.h       |  76 ++++++-
>>>>>    drivers/gpu/drm/i915/intel_region_ttm.c       |  15 +-
>>>>>    drivers/gpu/drm/i915/intel_region_ttm.h       |   5 +-
>>>>>    drivers/gpu/drm/i915/selftests/mock_region.c  |  12 +-
>>>>>    9 files changed, 262 insertions(+), 119 deletions(-)
>>>>>
>>>>> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h
>>>>> b/drivers/gpu/drm/i915/gem/i915_gem_object.h
>>>>> index a5479ac7a4ad..c5ab364d4311 100644
>>>>> --- a/drivers/gpu/drm/i915/gem/i915_gem_object.h
>>>>> +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h
>>>>> @@ -624,8 +624,8 @@ struct sg_table *shmem_alloc_st(struct
>>>>> drm_i915_private *i915,
>>>>>                                  size_t size, struct
>>>>> intel_memory_region *mr,
>>>>>                                  struct address_space *mapping,
>>>>>                                  unsigned int max_segment);
>>>>> -void shmem_free_st(struct sg_table *st, struct address_space
>>>>> *mapping,
>>>>> -                  bool dirty, bool backup);
>>>>> +void shmem_free_st_table(struct sg_table *st, struct address_space
>>>>> *mapping,
>>>>> +                        bool dirty, bool backup);
>>>>
>>>> s/st_table/sg_table/?
>>>>
>>>> I thought st was shorthand for sg_table? Maybe shmem_sg_free_table?
>>>
>>> Yes the naming is indeed a bit unfortunate here. To be consistent with
>>> the core's sg_free_table(), I changed to
>>> shmem_sg_free_table() / shmem_sg_alloc_table.
>>>
>>>>
>>>>
>>>>>    void __shmem_writeback(size_t size, struct address_space
>>>>> *mapping);
>>>>>       #ifdef CONFIG_MMU_NOTIFIER
>>>>> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
>>>>> b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
>>>>> index a4b69a43b898..604ed5ad77f5 100644
>>>>> --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
>>>>> +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
>>>>> @@ -544,6 +544,7 @@ struct drm_i915_gem_object {
>>>>>                   */
>>>>>                  struct list_head region_link;
>>>>>    +               struct i915_refct_sgt *rsgt;
>>>>>                  struct sg_table *pages;
>>>>>                  void *mapping;
>>>>>    @@ -597,7 +598,7 @@ struct drm_i915_gem_object {
>>>>>          } mm;
>>>>>             struct {
>>>>> -               struct sg_table *cached_io_st;
>>>>> +               struct i915_refct_sgt *cached_io_rsgt;
>>>>>                  struct i915_gem_object_page_iter get_io_page;
>>>>>                  struct drm_i915_gem_object *backup;
>>>>>                  bool created:1;
>>>>> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
>>>>> b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
>>>>> index 01f332d8dbde..67c6bee695c7 100644
>>>>> --- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
>>>>> +++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
>>>>> @@ -25,8 +25,8 @@ static void check_release_pagevec(struct pagevec
>>>>> *pvec)
>>>>>          cond_resched();
>>>>>    }
>>>>>    -void shmem_free_st(struct sg_table *st, struct address_space
>>>>> *mapping,
>>>>> -                  bool dirty, bool backup)
>>>>> +void shmem_free_st_table(struct sg_table *st, struct address_space
>>>>> *mapping,
>>>>> +                        bool dirty, bool backup)
>>>>>    {
>>>>>          struct sgt_iter sgt_iter;
>>>>>          struct pagevec pvec;
>>>>> @@ -49,7 +49,6 @@ void shmem_free_st(struct sg_table *st, struct
>>>>> address_space *mapping,
>>>>>                  check_release_pagevec(&pvec);
>>>>>             sg_free_table(st);
>>>>> -       kfree(st);
>>>>>    }
>>>>>       struct sg_table *shmem_alloc_st(struct drm_i915_private *i915,
>>>>> @@ -171,7 +170,8 @@ struct sg_table *shmem_alloc_st(struct
>>>>> drm_i915_private *i915,
>>>>>    err_sg:
>>>>>          sg_mark_end(sg);
>>>>>          if (sg != st->sgl) {
>>>>> -               shmem_free_st(st, mapping, false, false);
>>>>> +               shmem_free_st_table(st, mapping, false, false);
>>>>> +               kfree(st);
>>>>>          } else {
>>>>>                  mapping_clear_unevictable(mapping);
>>>>>                  sg_free_table(st);
>>>>> @@ -254,7 +254,8 @@ static int shmem_get_pages(struct
>>>>> drm_i915_gem_object *obj)
>>>>>          return 0;
>>>>>       err_pages:
>>>>> -       shmem_free_st(st, mapping, false, false);
>>>>> +       shmem_free_st_table(st, mapping, false, false);
>>>>> +       kfree(st);
>>>>>          /*
>>>>>           * shmemfs first checks if there is enough memory to
>>>>> allocate the page
>>>>>           * and reports ENOSPC should there be insufficient, along
>>>>> with the usual
>>>>> @@ -374,8 +375,9 @@ void i915_gem_object_put_pages_shmem(struct
>>>>> drm_i915_gem_object *obj, struct sg_
>>>>>          if (i915_gem_object_needs_bit17_swizzle(obj))
>>>>>                  i915_gem_object_save_bit_17_swizzle(obj, pages);
>>>>>    -       shmem_free_st(pages, file_inode(obj->base.filp)->i_mapping,
>>>>> -                     obj->mm.dirty, obj->mm.madv ==
>>>>> I915_MADV_WILLNEED);
>>>>> +       shmem_free_st_table(pages, file_inode(obj->base.filp)-
>>>>>> i_mapping,
>>>>> +                           obj->mm.dirty, obj->mm.madv ==
>>>>> I915_MADV_WILLNEED);
>>>>> +       kfree(pages);
>>>>>          obj->mm.dirty = false;
>>>>>    }
>>>>>    diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
>>>>> b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
>>>>> index 4fd2edb20dd9..6826e3859e18 100644
>>>>> --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
>>>>> +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
>>>>> @@ -34,7 +34,7 @@
>>>>>     * struct i915_ttm_tt - TTM page vector with additional private
>>>>> information
>>>>>     * @ttm: The base TTM page vector.
>>>>>     * @dev: The struct device used for dma mapping and unmapping.
>>>>> - * @cached_st: The cached scatter-gather table.
>>>>> + * @cached_rsgt: The cached scatter-gather table.
>>>>>     * @is_shmem: Set if using shmem.
>>>>>     * @filp: The shmem file, if using shmem backend.
>>>>>     *
>>>>> @@ -47,7 +47,7 @@
>>>>>    struct i915_ttm_tt {
>>>>>          struct ttm_tt ttm;
>>>>>          struct device *dev;
>>>>> -       struct sg_table *cached_st;
>>>>> +       struct i915_refct_sgt cached_rsgt;
>>>>>             bool is_shmem;
>>>>>          struct file *filp;
>>>>> @@ -221,14 +221,10 @@ static int i915_ttm_tt_shmem_populate(struct
>>>>> ttm_device *bdev,
>>>>>          if (IS_ERR(st))
>>>>>                  return PTR_ERR(st);
>>>>>    -       err = dma_map_sg_attrs(i915_tt->dev,
>>>>> -                              st->sgl, st->nents,
>>>>> -                              DMA_BIDIRECTIONAL,
>>>>> -                              DMA_ATTR_SKIP_CPU_SYNC);
>>>>> -       if (err <= 0) {
>>>>> -               err = -EINVAL;
>>>>> +       err = dma_map_sgtable(i915_tt->dev, st, DMA_BIDIRECTIONAL,
>>>>> +                             DMA_ATTR_SKIP_CPU_SYNC);
>>>>> +       if (err)
>>>>>                  goto err_free_st;
>>>>> -       }
>>>>>             i = 0;
>>>>>          for_each_sgt_page(page, sgt_iter, st)
>>>>> @@ -237,11 +233,15 @@ static int i915_ttm_tt_shmem_populate(struct
>>>>> ttm_device *bdev,
>>>>>          if (ttm->page_flags & TTM_TT_FLAG_SWAPPED)
>>>>>                  ttm->page_flags &= ~TTM_TT_FLAG_SWAPPED;
>>>>>    -       i915_tt->cached_st = st;
>>>>> +       i915_tt->cached_rsgt.table = *st;
>>>>> +       kfree(st);
>>>>
>>>> Will it work if the above just operates on the rsgt.table?
>>>
>>> I'll change the shmem utility here to not allocate the struct sg_table
>>> and then we can operate on it directly.
>>>
>>>>
>>>>> +
>>>>>          return 0;
>>>>>       err_free_st:
>>>>> -       shmem_free_st(st, filp->f_mapping, false, false);
>>>>> +       shmem_free_st_table(st, filp->f_mapping, false, false);
>>>>> +       kfree(st);
>>>>> +
>>>>>          return err;
>>>>>    }
>>>>>    @@ -249,16 +249,29 @@ static void
>>>>> i915_ttm_tt_shmem_unpopulate(struct ttm_tt *ttm)
>>>>>    {
>>>>>          struct i915_ttm_tt *i915_tt = container_of(ttm,
>>>>> typeof(*i915_tt), ttm);
>>>>>          bool backup = ttm->page_flags & TTM_TT_FLAG_SWAPPED;
>>>>> +       struct sg_table *st = &i915_tt->cached_rsgt.table;
>>>>>    -       dma_unmap_sg(i915_tt->dev, i915_tt->cached_st->sgl,
>>>>> -                    i915_tt->cached_st->nents,
>>>>> -                    DMA_BIDIRECTIONAL);
>>>>> +       if (st->sgl)
>>>>
>>>> Can we ever hit this? I would have presumed not? Below also.
>>>
>>> Yes, here's where we typically free the scatterlist.
>>
>> What I meant to say: can the above ever not be true? i.e sgl == NULL
> 
> Hm, Yes I think it can. If we're populating a non-shmem ttm tt, that 
> sg-list is, IIRC allocated lazily on first use. Although I haven't 
> analyzed in detail whether it can actually be populated and then not 
> lazily allocated under the same lock.

Oh right. I guess we could maybe drop the check in the shmem part?

> 
>
Thomas Hellström Oct. 28, 2021, 11:20 a.m. UTC | #6
On 10/28/21 11:58, Matthew Auld wrote:
> On 28/10/2021 10:35, Thomas Hellström wrote:
>>
>> On 10/28/21 10:47, Matthew Auld wrote:
>>> On 28/10/2021 08:04, Thomas Hellström wrote:
>>>> On Wed, 2021-10-27 at 19:03 +0100, Matthew Auld wrote:
>>>>> On 27/10/2021 11:52, Thomas Hellström wrote:
>>>>>> As we start to introduce asynchronous failsafe object migration,
>>>>>> where we update the object state and then submit asynchronous
>>>>>> commands we need to record what memory resources are actually used
>>>>>> by various part of the command stream. Initially for three
>>>>>> purposes:
>>>>>>
>>>>>> 1) Error capture.
>>>>>> 2) Asynchronous migration error recovery.
>>>>>> 3) Asynchronous vma bind.
>>>>>>
>>>>>> At the time where these happens, the object state may have been
>>>>>> updated
>>>>>> to be several migrations ahead and object sg-tables discarded.
>>>>>>
>>>>>> In order to make it possible to keep sg-tables with memory resource
>>>>>> information for these operations, introduce refcounted sg-tables
>>>>>> that
>>>>>> aren't freed until the last user is done with them.
>>>>>>
>>>>>> The alternative would be to reference information sitting on the
>>>>>> corresponding ttm_resources which typically have the same lifetime
>>>>>> as
>>>>>> these refcountes sg_tables, but that leads to other awkward
>>>>>> constructs:
>>>>>> Due to the design direction chosen for ttm resource managers that
>>>>>> would
>>>>>> lead to diamond-style inheritance, the LMEM resources may sometimes
>>>>>> be
>>>>>> prematurely freed, and finally the subclassed struct ttm_resource
>>>>>> would
>>>>>> have to bleed into the asynchronous vma bind code.
>>>>>>
>>>>>> Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
>>>>>> ---
>>>>>>    drivers/gpu/drm/i915/gem/i915_gem_object.h    |   4 +-
>>>>>>    .../gpu/drm/i915/gem/i915_gem_object_types.h  |   3 +-
>>>>>>    drivers/gpu/drm/i915/gem/i915_gem_shmem.c     |  16 +-
>>>>>>    drivers/gpu/drm/i915/gem/i915_gem_ttm.c       | 188 +++++++++++--
>>>>>> -----
>>>>>>    drivers/gpu/drm/i915/i915_scatterlist.c       |  62 ++++--
>>>>>>    drivers/gpu/drm/i915/i915_scatterlist.h       |  76 ++++++-
>>>>>>    drivers/gpu/drm/i915/intel_region_ttm.c       |  15 +-
>>>>>>    drivers/gpu/drm/i915/intel_region_ttm.h       |   5 +-
>>>>>>    drivers/gpu/drm/i915/selftests/mock_region.c  |  12 +-
>>>>>>    9 files changed, 262 insertions(+), 119 deletions(-)
>>>>>>
>>>>>> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h
>>>>>> b/drivers/gpu/drm/i915/gem/i915_gem_object.h
>>>>>> index a5479ac7a4ad..c5ab364d4311 100644
>>>>>> --- a/drivers/gpu/drm/i915/gem/i915_gem_object.h
>>>>>> +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h
>>>>>> @@ -624,8 +624,8 @@ struct sg_table *shmem_alloc_st(struct
>>>>>> drm_i915_private *i915,
>>>>>>                                  size_t size, struct
>>>>>> intel_memory_region *mr,
>>>>>>                                  struct address_space *mapping,
>>>>>>                                  unsigned int max_segment);
>>>>>> -void shmem_free_st(struct sg_table *st, struct address_space
>>>>>> *mapping,
>>>>>> -                  bool dirty, bool backup);
>>>>>> +void shmem_free_st_table(struct sg_table *st, struct address_space
>>>>>> *mapping,
>>>>>> +                        bool dirty, bool backup);
>>>>>
>>>>> s/st_table/sg_table/?
>>>>>
>>>>> I thought st was shorthand for sg_table? Maybe shmem_sg_free_table?
>>>>
>>>> Yes the naming is indeed a bit unfortunate here. To be consistent with
>>>> the core's sg_free_table(), I changed to
>>>> shmem_sg_free_table() / shmem_sg_alloc_table.
>>>>
>>>>>
>>>>>
>>>>>>    void __shmem_writeback(size_t size, struct address_space
>>>>>> *mapping);
>>>>>>       #ifdef CONFIG_MMU_NOTIFIER
>>>>>> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
>>>>>> b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
>>>>>> index a4b69a43b898..604ed5ad77f5 100644
>>>>>> --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
>>>>>> +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
>>>>>> @@ -544,6 +544,7 @@ struct drm_i915_gem_object {
>>>>>>                   */
>>>>>>                  struct list_head region_link;
>>>>>>    +               struct i915_refct_sgt *rsgt;
>>>>>>                  struct sg_table *pages;
>>>>>>                  void *mapping;
>>>>>>    @@ -597,7 +598,7 @@ struct drm_i915_gem_object {
>>>>>>          } mm;
>>>>>>             struct {
>>>>>> -               struct sg_table *cached_io_st;
>>>>>> +               struct i915_refct_sgt *cached_io_rsgt;
>>>>>>                  struct i915_gem_object_page_iter get_io_page;
>>>>>>                  struct drm_i915_gem_object *backup;
>>>>>>                  bool created:1;
>>>>>> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
>>>>>> b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
>>>>>> index 01f332d8dbde..67c6bee695c7 100644
>>>>>> --- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
>>>>>> +++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
>>>>>> @@ -25,8 +25,8 @@ static void check_release_pagevec(struct pagevec
>>>>>> *pvec)
>>>>>>          cond_resched();
>>>>>>    }
>>>>>>    -void shmem_free_st(struct sg_table *st, struct address_space
>>>>>> *mapping,
>>>>>> -                  bool dirty, bool backup)
>>>>>> +void shmem_free_st_table(struct sg_table *st, struct address_space
>>>>>> *mapping,
>>>>>> +                        bool dirty, bool backup)
>>>>>>    {
>>>>>>          struct sgt_iter sgt_iter;
>>>>>>          struct pagevec pvec;
>>>>>> @@ -49,7 +49,6 @@ void shmem_free_st(struct sg_table *st, struct
>>>>>> address_space *mapping,
>>>>>>                  check_release_pagevec(&pvec);
>>>>>>             sg_free_table(st);
>>>>>> -       kfree(st);
>>>>>>    }
>>>>>>       struct sg_table *shmem_alloc_st(struct drm_i915_private *i915,
>>>>>> @@ -171,7 +170,8 @@ struct sg_table *shmem_alloc_st(struct
>>>>>> drm_i915_private *i915,
>>>>>>    err_sg:
>>>>>>          sg_mark_end(sg);
>>>>>>          if (sg != st->sgl) {
>>>>>> -               shmem_free_st(st, mapping, false, false);
>>>>>> +               shmem_free_st_table(st, mapping, false, false);
>>>>>> +               kfree(st);
>>>>>>          } else {
>>>>>>                  mapping_clear_unevictable(mapping);
>>>>>>                  sg_free_table(st);
>>>>>> @@ -254,7 +254,8 @@ static int shmem_get_pages(struct
>>>>>> drm_i915_gem_object *obj)
>>>>>>          return 0;
>>>>>>       err_pages:
>>>>>> -       shmem_free_st(st, mapping, false, false);
>>>>>> +       shmem_free_st_table(st, mapping, false, false);
>>>>>> +       kfree(st);
>>>>>>          /*
>>>>>>           * shmemfs first checks if there is enough memory to
>>>>>> allocate the page
>>>>>>           * and reports ENOSPC should there be insufficient, along
>>>>>> with the usual
>>>>>> @@ -374,8 +375,9 @@ void i915_gem_object_put_pages_shmem(struct
>>>>>> drm_i915_gem_object *obj, struct sg_
>>>>>>          if (i915_gem_object_needs_bit17_swizzle(obj))
>>>>>>                  i915_gem_object_save_bit_17_swizzle(obj, pages);
>>>>>>    -       shmem_free_st(pages, 
>>>>>> file_inode(obj->base.filp)->i_mapping,
>>>>>> -                     obj->mm.dirty, obj->mm.madv ==
>>>>>> I915_MADV_WILLNEED);
>>>>>> +       shmem_free_st_table(pages, file_inode(obj->base.filp)-
>>>>>>> i_mapping,
>>>>>> +                           obj->mm.dirty, obj->mm.madv ==
>>>>>> I915_MADV_WILLNEED);
>>>>>> +       kfree(pages);
>>>>>>          obj->mm.dirty = false;
>>>>>>    }
>>>>>>    diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
>>>>>> b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
>>>>>> index 4fd2edb20dd9..6826e3859e18 100644
>>>>>> --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
>>>>>> +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
>>>>>> @@ -34,7 +34,7 @@
>>>>>>     * struct i915_ttm_tt - TTM page vector with additional private
>>>>>> information
>>>>>>     * @ttm: The base TTM page vector.
>>>>>>     * @dev: The struct device used for dma mapping and unmapping.
>>>>>> - * @cached_st: The cached scatter-gather table.
>>>>>> + * @cached_rsgt: The cached scatter-gather table.
>>>>>>     * @is_shmem: Set if using shmem.
>>>>>>     * @filp: The shmem file, if using shmem backend.
>>>>>>     *
>>>>>> @@ -47,7 +47,7 @@
>>>>>>    struct i915_ttm_tt {
>>>>>>          struct ttm_tt ttm;
>>>>>>          struct device *dev;
>>>>>> -       struct sg_table *cached_st;
>>>>>> +       struct i915_refct_sgt cached_rsgt;
>>>>>>             bool is_shmem;
>>>>>>          struct file *filp;
>>>>>> @@ -221,14 +221,10 @@ static int i915_ttm_tt_shmem_populate(struct
>>>>>> ttm_device *bdev,
>>>>>>          if (IS_ERR(st))
>>>>>>                  return PTR_ERR(st);
>>>>>>    -       err = dma_map_sg_attrs(i915_tt->dev,
>>>>>> -                              st->sgl, st->nents,
>>>>>> -                              DMA_BIDIRECTIONAL,
>>>>>> -                              DMA_ATTR_SKIP_CPU_SYNC);
>>>>>> -       if (err <= 0) {
>>>>>> -               err = -EINVAL;
>>>>>> +       err = dma_map_sgtable(i915_tt->dev, st, DMA_BIDIRECTIONAL,
>>>>>> +                             DMA_ATTR_SKIP_CPU_SYNC);
>>>>>> +       if (err)
>>>>>>                  goto err_free_st;
>>>>>> -       }
>>>>>>             i = 0;
>>>>>>          for_each_sgt_page(page, sgt_iter, st)
>>>>>> @@ -237,11 +233,15 @@ static int i915_ttm_tt_shmem_populate(struct
>>>>>> ttm_device *bdev,
>>>>>>          if (ttm->page_flags & TTM_TT_FLAG_SWAPPED)
>>>>>>                  ttm->page_flags &= ~TTM_TT_FLAG_SWAPPED;
>>>>>>    -       i915_tt->cached_st = st;
>>>>>> +       i915_tt->cached_rsgt.table = *st;
>>>>>> +       kfree(st);
>>>>>
>>>>> Will it work if the above just operates on the rsgt.table?
>>>>
>>>> I'll change the shmem utility here to not allocate the struct sg_table
>>>> and then we can operate on it directly.
>>>>
>>>>>
>>>>>> +
>>>>>>          return 0;
>>>>>>       err_free_st:
>>>>>> -       shmem_free_st(st, filp->f_mapping, false, false);
>>>>>> +       shmem_free_st_table(st, filp->f_mapping, false, false);
>>>>>> +       kfree(st);
>>>>>> +
>>>>>>          return err;
>>>>>>    }
>>>>>>    @@ -249,16 +249,29 @@ static void
>>>>>> i915_ttm_tt_shmem_unpopulate(struct ttm_tt *ttm)
>>>>>>    {
>>>>>>          struct i915_ttm_tt *i915_tt = container_of(ttm,
>>>>>> typeof(*i915_tt), ttm);
>>>>>>          bool backup = ttm->page_flags & TTM_TT_FLAG_SWAPPED;
>>>>>> +       struct sg_table *st = &i915_tt->cached_rsgt.table;
>>>>>>    -       dma_unmap_sg(i915_tt->dev, i915_tt->cached_st->sgl,
>>>>>> -                    i915_tt->cached_st->nents,
>>>>>> -                    DMA_BIDIRECTIONAL);
>>>>>> +       if (st->sgl)
>>>>>
>>>>> Can we ever hit this? I would have presumed not? Below also.
>>>>
>>>> Yes, here's where we typically free the scatterlist.
>>>
>>> What I meant to say: can the above ever not be true? i.e sgl == NULL
>>
>> Hm, Yes I think it can. If we're populating a non-shmem ttm tt, that 
>> sg-list is, IIRC allocated lazily on first use. Although I haven't 
>> analyzed in detail whether it can actually be populated and then not 
>> lazily allocated under the same lock.
>
> Oh right. I guess we could maybe drop the check in the shmem part?


Ah yes, that should be safe. Will do that. Thanks.

/Thomas


>
>>
>>
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h
index a5479ac7a4ad..c5ab364d4311 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h
@@ -624,8 +624,8 @@  struct sg_table *shmem_alloc_st(struct drm_i915_private *i915,
 				size_t size, struct intel_memory_region *mr,
 				struct address_space *mapping,
 				unsigned int max_segment);
-void shmem_free_st(struct sg_table *st, struct address_space *mapping,
-		   bool dirty, bool backup);
+void shmem_free_st_table(struct sg_table *st, struct address_space *mapping,
+			 bool dirty, bool backup);
 void __shmem_writeback(size_t size, struct address_space *mapping);
 
 #ifdef CONFIG_MMU_NOTIFIER
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
index a4b69a43b898..604ed5ad77f5 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
@@ -544,6 +544,7 @@  struct drm_i915_gem_object {
 		 */
 		struct list_head region_link;
 
+		struct i915_refct_sgt *rsgt;
 		struct sg_table *pages;
 		void *mapping;
 
@@ -597,7 +598,7 @@  struct drm_i915_gem_object {
 	} mm;
 
 	struct {
-		struct sg_table *cached_io_st;
+		struct i915_refct_sgt *cached_io_rsgt;
 		struct i915_gem_object_page_iter get_io_page;
 		struct drm_i915_gem_object *backup;
 		bool created:1;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
index 01f332d8dbde..67c6bee695c7 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
@@ -25,8 +25,8 @@  static void check_release_pagevec(struct pagevec *pvec)
 	cond_resched();
 }
 
-void shmem_free_st(struct sg_table *st, struct address_space *mapping,
-		   bool dirty, bool backup)
+void shmem_free_st_table(struct sg_table *st, struct address_space *mapping,
+			 bool dirty, bool backup)
 {
 	struct sgt_iter sgt_iter;
 	struct pagevec pvec;
@@ -49,7 +49,6 @@  void shmem_free_st(struct sg_table *st, struct address_space *mapping,
 		check_release_pagevec(&pvec);
 
 	sg_free_table(st);
-	kfree(st);
 }
 
 struct sg_table *shmem_alloc_st(struct drm_i915_private *i915,
@@ -171,7 +170,8 @@  struct sg_table *shmem_alloc_st(struct drm_i915_private *i915,
 err_sg:
 	sg_mark_end(sg);
 	if (sg != st->sgl) {
-		shmem_free_st(st, mapping, false, false);
+		shmem_free_st_table(st, mapping, false, false);
+		kfree(st);
 	} else {
 		mapping_clear_unevictable(mapping);
 		sg_free_table(st);
@@ -254,7 +254,8 @@  static int shmem_get_pages(struct drm_i915_gem_object *obj)
 	return 0;
 
 err_pages:
-	shmem_free_st(st, mapping, false, false);
+	shmem_free_st_table(st, mapping, false, false);
+	kfree(st);
 	/*
 	 * shmemfs first checks if there is enough memory to allocate the page
 	 * and reports ENOSPC should there be insufficient, along with the usual
@@ -374,8 +375,9 @@  void i915_gem_object_put_pages_shmem(struct drm_i915_gem_object *obj, struct sg_
 	if (i915_gem_object_needs_bit17_swizzle(obj))
 		i915_gem_object_save_bit_17_swizzle(obj, pages);
 
-	shmem_free_st(pages, file_inode(obj->base.filp)->i_mapping,
-		      obj->mm.dirty, obj->mm.madv == I915_MADV_WILLNEED);
+	shmem_free_st_table(pages, file_inode(obj->base.filp)->i_mapping,
+			    obj->mm.dirty, obj->mm.madv == I915_MADV_WILLNEED);
+	kfree(pages);
 	obj->mm.dirty = false;
 }
 
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
index 4fd2edb20dd9..6826e3859e18 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
@@ -34,7 +34,7 @@ 
  * struct i915_ttm_tt - TTM page vector with additional private information
  * @ttm: The base TTM page vector.
  * @dev: The struct device used for dma mapping and unmapping.
- * @cached_st: The cached scatter-gather table.
+ * @cached_rsgt: The cached scatter-gather table.
  * @is_shmem: Set if using shmem.
  * @filp: The shmem file, if using shmem backend.
  *
@@ -47,7 +47,7 @@ 
 struct i915_ttm_tt {
 	struct ttm_tt ttm;
 	struct device *dev;
-	struct sg_table *cached_st;
+	struct i915_refct_sgt cached_rsgt;
 
 	bool is_shmem;
 	struct file *filp;
@@ -221,14 +221,10 @@  static int i915_ttm_tt_shmem_populate(struct ttm_device *bdev,
 	if (IS_ERR(st))
 		return PTR_ERR(st);
 
-	err = dma_map_sg_attrs(i915_tt->dev,
-			       st->sgl, st->nents,
-			       DMA_BIDIRECTIONAL,
-			       DMA_ATTR_SKIP_CPU_SYNC);
-	if (err <= 0) {
-		err = -EINVAL;
+	err = dma_map_sgtable(i915_tt->dev, st, DMA_BIDIRECTIONAL,
+			      DMA_ATTR_SKIP_CPU_SYNC);
+	if (err)
 		goto err_free_st;
-	}
 
 	i = 0;
 	for_each_sgt_page(page, sgt_iter, st)
@@ -237,11 +233,15 @@  static int i915_ttm_tt_shmem_populate(struct ttm_device *bdev,
 	if (ttm->page_flags & TTM_TT_FLAG_SWAPPED)
 		ttm->page_flags &= ~TTM_TT_FLAG_SWAPPED;
 
-	i915_tt->cached_st = st;
+	i915_tt->cached_rsgt.table = *st;
+	kfree(st);
+
 	return 0;
 
 err_free_st:
-	shmem_free_st(st, filp->f_mapping, false, false);
+	shmem_free_st_table(st, filp->f_mapping, false, false);
+	kfree(st);
+
 	return err;
 }
 
@@ -249,16 +249,29 @@  static void i915_ttm_tt_shmem_unpopulate(struct ttm_tt *ttm)
 {
 	struct i915_ttm_tt *i915_tt = container_of(ttm, typeof(*i915_tt), ttm);
 	bool backup = ttm->page_flags & TTM_TT_FLAG_SWAPPED;
+	struct sg_table *st = &i915_tt->cached_rsgt.table;
 
-	dma_unmap_sg(i915_tt->dev, i915_tt->cached_st->sgl,
-		     i915_tt->cached_st->nents,
-		     DMA_BIDIRECTIONAL);
+	if (st->sgl)
+		shmem_free_st_table(st,
+				    file_inode(i915_tt->filp)->i_mapping,
+				    backup, backup);
+}
 
-	shmem_free_st(fetch_and_zero(&i915_tt->cached_st),
-		      file_inode(i915_tt->filp)->i_mapping,
-		      backup, backup);
+static void i915_ttm_tt_release(struct kref *ref)
+{
+	struct i915_ttm_tt *i915_tt =
+		container_of(ref, typeof(*i915_tt), cached_rsgt.kref);
+	struct sg_table *st = &i915_tt->cached_rsgt.table;
+
+	GEM_WARN_ON(st->sgl);
+
+	kfree(i915_tt);
 }
 
+static const struct i915_refct_sgt_ops tt_rsgt_ops = {
+	.release = i915_ttm_tt_release
+};
+
 static struct ttm_tt *i915_ttm_tt_create(struct ttm_buffer_object *bo,
 					 uint32_t page_flags)
 {
@@ -287,6 +300,9 @@  static struct ttm_tt *i915_ttm_tt_create(struct ttm_buffer_object *bo,
 	if (ret)
 		goto err_free;
 
+	i915_refct_sgt_init_ops(&i915_tt->cached_rsgt, bo->base.size,
+				&tt_rsgt_ops);
+
 	i915_tt->dev = obj->base.dev->dev;
 
 	return &i915_tt->ttm;
@@ -311,17 +327,15 @@  static int i915_ttm_tt_populate(struct ttm_device *bdev,
 static void i915_ttm_tt_unpopulate(struct ttm_device *bdev, struct ttm_tt *ttm)
 {
 	struct i915_ttm_tt *i915_tt = container_of(ttm, typeof(*i915_tt), ttm);
+	struct sg_table *st = &i915_tt->cached_rsgt.table;
+
+	if (st->sgl)
+		dma_unmap_sgtable(i915_tt->dev, st, DMA_BIDIRECTIONAL, 0);
 
 	if (i915_tt->is_shmem) {
 		i915_ttm_tt_shmem_unpopulate(ttm);
 	} else {
-		if (i915_tt->cached_st) {
-			dma_unmap_sgtable(i915_tt->dev, i915_tt->cached_st,
-					  DMA_BIDIRECTIONAL, 0);
-			sg_free_table(i915_tt->cached_st);
-			kfree(i915_tt->cached_st);
-			i915_tt->cached_st = NULL;
-		}
+		sg_free_table(st);
 		ttm_pool_free(&bdev->pool, ttm);
 	}
 }
@@ -334,7 +348,7 @@  static void i915_ttm_tt_destroy(struct ttm_device *bdev, struct ttm_tt *ttm)
 		fput(i915_tt->filp);
 
 	ttm_tt_fini(ttm);
-	kfree(i915_tt);
+	i915_refct_sgt_put(&i915_tt->cached_rsgt);
 }
 
 static bool i915_ttm_eviction_valuable(struct ttm_buffer_object *bo,
@@ -376,12 +390,12 @@  static int i915_ttm_move_notify(struct ttm_buffer_object *bo)
 	return 0;
 }
 
-static void i915_ttm_free_cached_io_st(struct drm_i915_gem_object *obj)
+static void i915_ttm_free_cached_io_rsgt(struct drm_i915_gem_object *obj)
 {
 	struct radix_tree_iter iter;
 	void __rcu **slot;
 
-	if (!obj->ttm.cached_io_st)
+	if (!obj->ttm.cached_io_rsgt)
 		return;
 
 	rcu_read_lock();
@@ -389,9 +403,8 @@  static void i915_ttm_free_cached_io_st(struct drm_i915_gem_object *obj)
 		radix_tree_delete(&obj->ttm.get_io_page.radix, iter.index);
 	rcu_read_unlock();
 
-	sg_free_table(obj->ttm.cached_io_st);
-	kfree(obj->ttm.cached_io_st);
-	obj->ttm.cached_io_st = NULL;
+	i915_refct_sgt_put(obj->ttm.cached_io_rsgt);
+	obj->ttm.cached_io_rsgt = NULL;
 }
 
 static void
@@ -477,7 +490,7 @@  static int i915_ttm_purge(struct drm_i915_gem_object *obj)
 	obj->write_domain = 0;
 	obj->read_domains = 0;
 	i915_ttm_adjust_gem_after_move(obj);
-	i915_ttm_free_cached_io_st(obj);
+	i915_ttm_free_cached_io_rsgt(obj);
 	obj->mm.madv = __I915_MADV_PURGED;
 	return 0;
 }
@@ -532,7 +545,7 @@  static void i915_ttm_swap_notify(struct ttm_buffer_object *bo)
 	int ret = i915_ttm_move_notify(bo);
 
 	GEM_WARN_ON(ret);
-	GEM_WARN_ON(obj->ttm.cached_io_st);
+	GEM_WARN_ON(obj->ttm.cached_io_rsgt);
 	if (!ret && obj->mm.madv != I915_MADV_WILLNEED)
 		i915_ttm_purge(obj);
 }
@@ -543,7 +556,7 @@  static void i915_ttm_delete_mem_notify(struct ttm_buffer_object *bo)
 
 	if (likely(obj)) {
 		__i915_gem_object_pages_fini(obj);
-		i915_ttm_free_cached_io_st(obj);
+		i915_ttm_free_cached_io_rsgt(obj);
 	}
 }
 
@@ -563,40 +576,35 @@  i915_ttm_region(struct ttm_device *bdev, int ttm_mem_type)
 					  ttm_mem_type - I915_PL_LMEM0);
 }
 
-static struct sg_table *i915_ttm_tt_get_st(struct ttm_tt *ttm)
+static struct i915_refct_sgt *i915_ttm_tt_get_st(struct ttm_tt *ttm)
 {
 	struct i915_ttm_tt *i915_tt = container_of(ttm, typeof(*i915_tt), ttm);
 	struct sg_table *st;
 	int ret;
 
-	if (i915_tt->cached_st)
-		return i915_tt->cached_st;
-
-	st = kzalloc(sizeof(*st), GFP_KERNEL);
-	if (!st)
-		return ERR_PTR(-ENOMEM);
+	if (i915_tt->cached_rsgt.table.sgl)
+		return i915_refct_sgt_get(&i915_tt->cached_rsgt);
 
+	st = &i915_tt->cached_rsgt.table;
 	ret = sg_alloc_table_from_pages_segment(st,
 			ttm->pages, ttm->num_pages,
 			0, (unsigned long)ttm->num_pages << PAGE_SHIFT,
 			i915_sg_segment_size(), GFP_KERNEL);
 	if (ret) {
-		kfree(st);
+		st->sgl = NULL;
 		return ERR_PTR(ret);
 	}
 
 	ret = dma_map_sgtable(i915_tt->dev, st, DMA_BIDIRECTIONAL, 0);
 	if (ret) {
 		sg_free_table(st);
-		kfree(st);
 		return ERR_PTR(ret);
 	}
 
-	i915_tt->cached_st = st;
-	return st;
+	return i915_refct_sgt_get(&i915_tt->cached_rsgt);
 }
 
-static struct sg_table *
+static struct i915_refct_sgt *
 i915_ttm_resource_get_st(struct drm_i915_gem_object *obj,
 			 struct ttm_resource *res)
 {
@@ -610,7 +618,21 @@  i915_ttm_resource_get_st(struct drm_i915_gem_object *obj,
 	 * the resulting st. Might make sense for GGTT.
 	 */
 	GEM_WARN_ON(!cpu_maps_iomem(res));
-	return intel_region_ttm_resource_to_st(obj->mm.region, res);
+	if (bo->resource == res) {
+		if (!obj->ttm.cached_io_rsgt) {
+			struct i915_refct_sgt *rsgt;
+
+			rsgt = intel_region_ttm_resource_to_rsgt(obj->mm.region,
+								 res);
+			if (IS_ERR(rsgt))
+				return rsgt;
+
+			obj->ttm.cached_io_rsgt = rsgt;
+		}
+		return i915_refct_sgt_get(obj->ttm.cached_io_rsgt);
+	}
+
+	return intel_region_ttm_resource_to_rsgt(obj->mm.region, res);
 }
 
 static int i915_ttm_accel_move(struct ttm_buffer_object *bo,
@@ -621,10 +643,7 @@  static int i915_ttm_accel_move(struct ttm_buffer_object *bo,
 {
 	struct drm_i915_private *i915 = container_of(bo->bdev, typeof(*i915),
 						     bdev);
-	struct ttm_resource_manager *src_man =
-		ttm_manager_type(bo->bdev, bo->resource->mem_type);
 	struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo);
-	struct sg_table *src_st;
 	struct i915_request *rq;
 	struct ttm_tt *src_ttm = bo->ttm;
 	enum i915_cache_level src_level, dst_level;
@@ -650,17 +669,22 @@  static int i915_ttm_accel_move(struct ttm_buffer_object *bo,
 		}
 		intel_engine_pm_put(i915->gt.migrate.context->engine);
 	} else {
-		src_st = src_man->use_tt ? i915_ttm_tt_get_st(src_ttm) :
-			obj->ttm.cached_io_st;
+		struct i915_refct_sgt *src_rsgt =
+			i915_ttm_resource_get_st(obj, bo->resource);
+
+		if (IS_ERR(src_rsgt))
+			return PTR_ERR(src_rsgt);
 
 		src_level = i915_ttm_cache_level(i915, bo->resource, src_ttm);
 		intel_engine_pm_get(i915->gt.migrate.context->engine);
 		ret = intel_context_migrate_copy(i915->gt.migrate.context,
-						 NULL, src_st->sgl, src_level,
+						 NULL, src_rsgt->table.sgl,
+						 src_level,
 						 gpu_binds_iomem(bo->resource),
 						 dst_st->sgl, dst_level,
 						 gpu_binds_iomem(dst_mem),
 						 &rq);
+		i915_refct_sgt_put(src_rsgt);
 		if (!ret && rq) {
 			i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT);
 			i915_request_put(rq);
@@ -674,13 +698,14 @@  static int i915_ttm_accel_move(struct ttm_buffer_object *bo,
 static void __i915_ttm_move(struct ttm_buffer_object *bo, bool clear,
 			    struct ttm_resource *dst_mem,
 			    struct ttm_tt *dst_ttm,
-			    struct sg_table *dst_st,
+			    struct i915_refct_sgt *dst_rsgt,
 			    bool allow_accel)
 {
 	int ret = -EINVAL;
 
 	if (allow_accel)
-		ret = i915_ttm_accel_move(bo, clear, dst_mem, dst_ttm, dst_st);
+		ret = i915_ttm_accel_move(bo, clear, dst_mem, dst_ttm,
+					  &dst_rsgt->table);
 	if (ret) {
 		struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo);
 		struct intel_memory_region *dst_reg, *src_reg;
@@ -697,12 +722,13 @@  static void __i915_ttm_move(struct ttm_buffer_object *bo, bool clear,
 		dst_iter = !cpu_maps_iomem(dst_mem) ?
 			ttm_kmap_iter_tt_init(&_dst_iter.tt, dst_ttm) :
 			ttm_kmap_iter_iomap_init(&_dst_iter.io, &dst_reg->iomap,
-						 dst_st, dst_reg->region.start);
+						 &dst_rsgt->table,
+						 dst_reg->region.start);
 
 		src_iter = !cpu_maps_iomem(bo->resource) ?
 			ttm_kmap_iter_tt_init(&_src_iter.tt, bo->ttm) :
 			ttm_kmap_iter_iomap_init(&_src_iter.io, &src_reg->iomap,
-						 obj->ttm.cached_io_st,
+						 &obj->ttm.cached_io_rsgt->table,
 						 src_reg->region.start);
 
 		ttm_move_memcpy(clear, dst_mem->num_pages, dst_iter, src_iter);
@@ -718,7 +744,7 @@  static int i915_ttm_move(struct ttm_buffer_object *bo, bool evict,
 	struct ttm_resource_manager *dst_man =
 		ttm_manager_type(bo->bdev, dst_mem->mem_type);
 	struct ttm_tt *ttm = bo->ttm;
-	struct sg_table *dst_st;
+	struct i915_refct_sgt *dst_rsgt;
 	bool clear;
 	int ret;
 
@@ -744,22 +770,24 @@  static int i915_ttm_move(struct ttm_buffer_object *bo, bool evict,
 			return ret;
 	}
 
-	dst_st = i915_ttm_resource_get_st(obj, dst_mem);
-	if (IS_ERR(dst_st))
-		return PTR_ERR(dst_st);
+	dst_rsgt = i915_ttm_resource_get_st(obj, dst_mem);
+	if (IS_ERR(dst_rsgt))
+		return PTR_ERR(dst_rsgt);
 
 	clear = !cpu_maps_iomem(bo->resource) && (!ttm || !ttm_tt_is_populated(ttm));
 	if (!(clear && ttm && !(ttm->page_flags & TTM_TT_FLAG_ZERO_ALLOC)))
-		__i915_ttm_move(bo, clear, dst_mem, bo->ttm, dst_st, true);
+		__i915_ttm_move(bo, clear, dst_mem, bo->ttm, dst_rsgt, true);
 
 	ttm_bo_move_sync_cleanup(bo, dst_mem);
 	i915_ttm_adjust_domains_after_move(obj);
-	i915_ttm_free_cached_io_st(obj);
+	i915_ttm_free_cached_io_rsgt(obj);
 
 	if (gpu_binds_iomem(dst_mem) || cpu_maps_iomem(dst_mem)) {
-		obj->ttm.cached_io_st = dst_st;
-		obj->ttm.get_io_page.sg_pos = dst_st->sgl;
+		obj->ttm.cached_io_rsgt = dst_rsgt;
+		obj->ttm.get_io_page.sg_pos = dst_rsgt->table.sgl;
 		obj->ttm.get_io_page.sg_idx = 0;
+	} else {
+		i915_refct_sgt_put(dst_rsgt);
 	}
 
 	i915_ttm_adjust_lru(obj);
@@ -825,7 +853,6 @@  static int __i915_ttm_get_pages(struct drm_i915_gem_object *obj,
 		.interruptible = true,
 		.no_wait_gpu = false,
 	};
-	struct sg_table *st;
 	int real_num_busy;
 	int ret;
 
@@ -862,12 +889,16 @@  static int __i915_ttm_get_pages(struct drm_i915_gem_object *obj,
 	}
 
 	if (!i915_gem_object_has_pages(obj)) {
-		/* Object either has a page vector or is an iomem object */
-		st = bo->ttm ? i915_ttm_tt_get_st(bo->ttm) : obj->ttm.cached_io_st;
-		if (IS_ERR(st))
-			return PTR_ERR(st);
+		struct i915_refct_sgt *rsgt =
+			i915_ttm_resource_get_st(obj, bo->resource);
+
+		if (IS_ERR(rsgt))
+			return PTR_ERR(rsgt);
 
-		__i915_gem_object_set_pages(obj, st, i915_sg_dma_sizes(st->sgl));
+		GEM_BUG_ON(obj->mm.rsgt);
+		obj->mm.rsgt = rsgt;
+		__i915_gem_object_set_pages(obj, &rsgt->table,
+					    i915_sg_dma_sizes(rsgt->table.sgl));
 	}
 
 	i915_ttm_adjust_lru(obj);
@@ -941,6 +972,13 @@  static void i915_ttm_put_pages(struct drm_i915_gem_object *obj,
 	 * If the object is not destroyed next, The TTM eviction logic
 	 * and shrinkers will move it out if needed.
 	 */
+
+	if (obj->mm.rsgt) {
+		i915_refct_sgt_put(obj->mm.rsgt);
+		obj->mm.rsgt = NULL;
+	}
+
+	i915_ttm_adjust_lru(obj);
 }
 
 static void i915_ttm_adjust_lru(struct drm_i915_gem_object *obj)
@@ -1278,7 +1316,7 @@  int i915_gem_obj_copy_ttm(struct drm_i915_gem_object *dst,
 	struct ttm_operation_ctx ctx = {
 		.interruptible = intr,
 	};
-	struct sg_table *dst_st;
+	struct i915_refct_sgt *dst_rsgt;
 	int ret;
 
 	assert_object_held(dst);
@@ -1293,11 +1331,11 @@  int i915_gem_obj_copy_ttm(struct drm_i915_gem_object *dst,
 	if (ret)
 		return ret;
 
-	dst_st = gpu_binds_iomem(dst_bo->resource) ?
-		dst->ttm.cached_io_st : i915_ttm_tt_get_st(dst_bo->ttm);
-
+	dst_rsgt = i915_ttm_resource_get_st(dst, dst_bo->resource);
 	__i915_ttm_move(src_bo, false, dst_bo->resource, dst_bo->ttm,
-			dst_st, allow_accel);
+			dst_rsgt, allow_accel);
+
+	i915_refct_sgt_put(dst_rsgt);
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/i915/i915_scatterlist.c b/drivers/gpu/drm/i915/i915_scatterlist.c
index 4a6712dca838..8a510ee5d1ad 100644
--- a/drivers/gpu/drm/i915/i915_scatterlist.c
+++ b/drivers/gpu/drm/i915/i915_scatterlist.c
@@ -41,8 +41,32 @@  bool i915_sg_trim(struct sg_table *orig_st)
 	return true;
 }
 
+static void i915_refct_sgt_release(struct kref *ref)
+{
+	struct i915_refct_sgt *rsgt =
+		container_of(ref, typeof(*rsgt), kref);
+
+	sg_free_table(&rsgt->table);
+	kfree(rsgt);
+}
+
+static const struct i915_refct_sgt_ops rsgt_ops = {
+	.release = i915_refct_sgt_release
+};
+
+/**
+ * i915_refct_sgt_init - Initialize a struct i915_refct_sgt with default ops
+ * @rsgt: The struct i915_refct_sgt to initialize.
+ * size: The size of the underlying memory buffer.
+ */
+void i915_refct_sgt_init(struct i915_refct_sgt *rsgt, size_t size)
+{
+	i915_refct_sgt_init_ops(rsgt, size, &rsgt_ops);
+}
+
 /**
- * i915_sg_from_mm_node - Create an sg_table from a struct drm_mm_node
+ * i915_rsgt_from_mm_node - Create a refcounted sg_table from a struct
+ * drm_mm_node
  * @node: The drm_mm_node.
  * @region_start: An offset to add to the dma addresses of the sg list.
  *
@@ -50,25 +74,28 @@  bool i915_sg_trim(struct sg_table *orig_st)
  * taking a maximum segment length into account, splitting into segments
  * if necessary.
  *
- * Return: A pointer to a kmalloced struct sg_table on success, negative
+ * Return: A pointer to a kmalloced struct i915_refct_sgt on success, negative
  * error code cast to an error pointer on failure.
  */
-struct sg_table *i915_sg_from_mm_node(const struct drm_mm_node *node,
-				      u64 region_start)
+struct i915_refct_sgt *i915_rsgt_from_mm_node(const struct drm_mm_node *node,
+					      u64 region_start)
 {
 	const u64 max_segment = SZ_1G; /* Do we have a limit on this? */
 	u64 segment_pages = max_segment >> PAGE_SHIFT;
 	u64 block_size, offset, prev_end;
+	struct i915_refct_sgt *rsgt;
 	struct sg_table *st;
 	struct scatterlist *sg;
 
-	st = kmalloc(sizeof(*st), GFP_KERNEL);
-	if (!st)
+	rsgt = kmalloc(sizeof(*rsgt), GFP_KERNEL);
+	if (!rsgt)
 		return ERR_PTR(-ENOMEM);
 
+	i915_refct_sgt_init(rsgt, node->size << PAGE_SHIFT);
+	st = &rsgt->table;
 	if (sg_alloc_table(st, DIV_ROUND_UP(node->size, segment_pages),
 			   GFP_KERNEL)) {
-		kfree(st);
+		i915_refct_sgt_put(rsgt);
 		return ERR_PTR(-ENOMEM);
 	}
 
@@ -104,11 +131,11 @@  struct sg_table *i915_sg_from_mm_node(const struct drm_mm_node *node,
 	sg_mark_end(sg);
 	i915_sg_trim(st);
 
-	return st;
+	return rsgt;
 }
 
 /**
- * i915_sg_from_buddy_resource - Create an sg_table from a struct
+ * i915_rsgt_from_buddy_resource - Create a refcounted sg_table from a struct
  * i915_buddy_block list
  * @res: The struct i915_ttm_buddy_resource.
  * @region_start: An offset to add to the dma addresses of the sg list.
@@ -117,11 +144,11 @@  struct sg_table *i915_sg_from_mm_node(const struct drm_mm_node *node,
  * taking a maximum segment length into account, splitting into segments
  * if necessary.
  *
- * Return: A pointer to a kmalloced struct sg_table on success, negative
+ * Return: A pointer to a kmalloced struct i915_refct_sgts on success, negative
  * error code cast to an error pointer on failure.
  */
-struct sg_table *i915_sg_from_buddy_resource(struct ttm_resource *res,
-					     u64 region_start)
+struct i915_refct_sgt *i915_rsgt_from_buddy_resource(struct ttm_resource *res,
+						     u64 region_start)
 {
 	struct i915_ttm_buddy_resource *bman_res = to_ttm_buddy_resource(res);
 	const u64 size = res->num_pages << PAGE_SHIFT;
@@ -129,18 +156,21 @@  struct sg_table *i915_sg_from_buddy_resource(struct ttm_resource *res,
 	struct i915_buddy_mm *mm = bman_res->mm;
 	struct list_head *blocks = &bman_res->blocks;
 	struct i915_buddy_block *block;
+	struct i915_refct_sgt *rsgt;
 	struct scatterlist *sg;
 	struct sg_table *st;
 	resource_size_t prev_end;
 
 	GEM_BUG_ON(list_empty(blocks));
 
-	st = kmalloc(sizeof(*st), GFP_KERNEL);
-	if (!st)
+	rsgt = kmalloc(sizeof(*rsgt), GFP_KERNEL);
+	if (!rsgt)
 		return ERR_PTR(-ENOMEM);
 
+	i915_refct_sgt_init(rsgt, size);
+	st = &rsgt->table;
 	if (sg_alloc_table(st, res->num_pages, GFP_KERNEL)) {
-		kfree(st);
+		i915_refct_sgt_put(rsgt);
 		return ERR_PTR(-ENOMEM);
 	}
 
@@ -181,7 +211,7 @@  struct sg_table *i915_sg_from_buddy_resource(struct ttm_resource *res,
 	sg_mark_end(sg);
 	i915_sg_trim(st);
 
-	return st;
+	return rsgt;
 }
 
 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
diff --git a/drivers/gpu/drm/i915/i915_scatterlist.h b/drivers/gpu/drm/i915/i915_scatterlist.h
index b8bd5925b03f..321fd4a9f777 100644
--- a/drivers/gpu/drm/i915/i915_scatterlist.h
+++ b/drivers/gpu/drm/i915/i915_scatterlist.h
@@ -144,10 +144,78 @@  static inline unsigned int i915_sg_segment_size(void)
 
 bool i915_sg_trim(struct sg_table *orig_st);
 
-struct sg_table *i915_sg_from_mm_node(const struct drm_mm_node *node,
-				      u64 region_start);
+/**
+ * struct i915_refct_sgt_ops - Operations structure for struct i915_refct_sgt
+ */
+struct i915_refct_sgt_ops {
+	/**
+	 * release() - Free the memory of the struct i915_refct_sgt
+	 * @ref: struct kref that is embedded in the struct i915_refct_sgt
+	 */
+	void (*release)(struct kref *ref);
+};
+
+/**
+ * struct i915_refct_sgt - A refcounted scatter-gather table
+ * @kref: struct kref for refcounting
+ * @table: struct sg_table holding the scatter-gather table itself. Note that
+ * @table->sgl = NULL can be used to determine whether a scatter-gather table
+ * is present or not.
+ * @size: The size in bytes of the underlying memory buffer
+ * @ops: The operations structure.
+ */
+struct i915_refct_sgt {
+	struct kref kref;
+	struct sg_table table;
+	size_t size;
+	const struct i915_refct_sgt_ops *ops;
+};
+
+/**
+ * i915_refct_sgt_put - Put a refcounted sg-table
+ * @rsgt the struct i915_refct_sgt to put.
+ */
+static inline void i915_refct_sgt_put(struct i915_refct_sgt *rsgt)
+{
+	if (rsgt)
+		kref_put(&rsgt->kref, rsgt->ops->release);
+}
+
+/**
+ * i915_refct_sgt_get - Get a refcounted sg-table
+ * @rsgt the struct i915_refct_sgt to get.
+ */
+static inline struct i915_refct_sgt *
+i915_refct_sgt_get(struct i915_refct_sgt *rsgt)
+{
+	kref_get(&rsgt->kref);
+	return rsgt;
+}
+
+/**
+ * i915_refct_sgt_init_ops - Initialize a refcounted sg-list with a custom
+ * operations structure
+ * @rsgt The struct i915_refct_sgt to initialize.
+ * @size: Size in bytes of the underlying memory buffer.
+ * @ops: A customized operations structure in case the refcounted sg-list
+ * is embedded into another structure.
+ */
+static inline void i915_refct_sgt_init_ops(struct i915_refct_sgt *rsgt,
+					   size_t size,
+					   const struct i915_refct_sgt_ops *ops)
+{
+	kref_init(&rsgt->kref);
+	rsgt->table.sgl = NULL;
+	rsgt->size = size;
+	rsgt->ops = ops;
+}
+
+void i915_refct_sgt_init(struct i915_refct_sgt *rsgt, size_t size);
+
+struct i915_refct_sgt *i915_rsgt_from_mm_node(const struct drm_mm_node *node,
+					      u64 region_start);
 
-struct sg_table *i915_sg_from_buddy_resource(struct ttm_resource *res,
-					     u64 region_start);
+struct i915_refct_sgt *i915_rsgt_from_buddy_resource(struct ttm_resource *res,
+						     u64 region_start);
 
 #endif
diff --git a/drivers/gpu/drm/i915/intel_region_ttm.c b/drivers/gpu/drm/i915/intel_region_ttm.c
index 98c7339bf8ba..2e901a27e259 100644
--- a/drivers/gpu/drm/i915/intel_region_ttm.c
+++ b/drivers/gpu/drm/i915/intel_region_ttm.c
@@ -115,8 +115,8 @@  void intel_region_ttm_fini(struct intel_memory_region *mem)
 }
 
 /**
- * intel_region_ttm_resource_to_st - Convert an opaque TTM resource manager resource
- * to an sg_table.
+ * intel_region_ttm_resource_to_rsgt -
+ * Convert an opaque TTM resource manager resource to a refcounted sg_table.
  * @mem: The memory region.
  * @res: The resource manager resource obtained from the TTM resource manager.
  *
@@ -126,17 +126,18 @@  void intel_region_ttm_fini(struct intel_memory_region *mem)
  *
  * Return: A malloced sg_table on success, an error pointer on failure.
  */
-struct sg_table *intel_region_ttm_resource_to_st(struct intel_memory_region *mem,
-						 struct ttm_resource *res)
+struct i915_refct_sgt *
+intel_region_ttm_resource_to_rsgt(struct intel_memory_region *mem,
+				  struct ttm_resource *res)
 {
 	if (mem->is_range_manager) {
 		struct ttm_range_mgr_node *range_node =
 			to_ttm_range_mgr_node(res);
 
-		return i915_sg_from_mm_node(&range_node->mm_nodes[0],
-					    mem->region.start);
+		return i915_rsgt_from_mm_node(&range_node->mm_nodes[0],
+					      mem->region.start);
 	} else {
-		return i915_sg_from_buddy_resource(res, mem->region.start);
+		return i915_rsgt_from_buddy_resource(res, mem->region.start);
 	}
 }
 
diff --git a/drivers/gpu/drm/i915/intel_region_ttm.h b/drivers/gpu/drm/i915/intel_region_ttm.h
index 6f44075920f2..7bbe2b46b504 100644
--- a/drivers/gpu/drm/i915/intel_region_ttm.h
+++ b/drivers/gpu/drm/i915/intel_region_ttm.h
@@ -22,8 +22,9 @@  int intel_region_ttm_init(struct intel_memory_region *mem);
 
 void intel_region_ttm_fini(struct intel_memory_region *mem);
 
-struct sg_table *intel_region_ttm_resource_to_st(struct intel_memory_region *mem,
-						 struct ttm_resource *res);
+struct i915_refct_sgt *
+intel_region_ttm_resource_to_rsgt(struct intel_memory_region *mem,
+				  struct ttm_resource *res);
 
 void intel_region_ttm_resource_free(struct intel_memory_region *mem,
 				    struct ttm_resource *res);
diff --git a/drivers/gpu/drm/i915/selftests/mock_region.c b/drivers/gpu/drm/i915/selftests/mock_region.c
index 75793008c4ef..7ec5037eaa58 100644
--- a/drivers/gpu/drm/i915/selftests/mock_region.c
+++ b/drivers/gpu/drm/i915/selftests/mock_region.c
@@ -15,9 +15,9 @@ 
 static void mock_region_put_pages(struct drm_i915_gem_object *obj,
 				  struct sg_table *pages)
 {
+	i915_refct_sgt_put(obj->mm.rsgt);
+	obj->mm.rsgt = NULL;
 	intel_region_ttm_resource_free(obj->mm.region, obj->mm.res);
-	sg_free_table(pages);
-	kfree(pages);
 }
 
 static int mock_region_get_pages(struct drm_i915_gem_object *obj)
@@ -36,12 +36,14 @@  static int mock_region_get_pages(struct drm_i915_gem_object *obj)
 	if (IS_ERR(obj->mm.res))
 		return PTR_ERR(obj->mm.res);
 
-	pages = intel_region_ttm_resource_to_st(obj->mm.region, obj->mm.res);
-	if (IS_ERR(pages)) {
-		err = PTR_ERR(pages);
+	obj->mm.rsgt = intel_region_ttm_resource_to_rsgt(obj->mm.region,
+							 obj->mm.res);
+	if (IS_ERR(obj->mm.rsgt)) {
+		err = PTR_ERR(obj->mm.rsgt);
 		goto err_free_resource;
 	}
 
+	pages = &obj->mm.rsgt->table;
 	__i915_gem_object_set_pages(obj, pages, i915_sg_dma_sizes(pages->sgl));
 
 	return 0;