diff mbox series

[1/8] drm/i915/gem: Break out some shmem backend utils

Message ID 20210913180605.2778493-1-matthew.auld@intel.com (mailing list archive)
State New, archived
Headers show
Series [1/8] drm/i915/gem: Break out some shmem backend utils | expand

Commit Message

Matthew Auld Sept. 13, 2021, 6:05 p.m. UTC
From: Thomas Hellström <thomas.hellstrom@linux.intel.com>

Break out some shmem backend utils for future reuse by the TTM backend:
shmem_alloc_st(), shmem_free_st() and __shmem_writeback() which we can
use to provide a shmem-backed TTM page pool for cached-only TTM
buffer objects.

Main functional change here is that we now compute the page sizes using
the dma segments rather than using the physical page address segments.

Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
Signed-off-by: Matthew Auld <matthew.auld@intel.com>
---
 drivers/gpu/drm/i915/gem/i915_gem_shmem.c | 182 +++++++++++++---------
 1 file changed, 107 insertions(+), 75 deletions(-)

Comments

kernel test robot Sept. 14, 2021, 3:45 a.m. UTC | #1
Hi Matthew,

Thank you for the patch! Perhaps something to improve:

[auto build test WARNING on drm-tip/drm-tip]
[also build test WARNING on drm-exynos/exynos-drm-next linus/master v5.15-rc1 next-20210913]
[cannot apply to drm-intel/for-linux-next tegra-drm/drm/tegra/for-next drm/drm-next]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:    https://github.com/0day-ci/linux/commits/Matthew-Auld/drm-i915-gem-Break-out-some-shmem-backend-utils/20210914-021041
base:   git://anongit.freedesktop.org/drm/drm-tip drm-tip
config: x86_64-randconfig-a015-20210913 (attached as .config)
compiler: clang version 14.0.0 (https://github.com/llvm/llvm-project 261cbe98c38f8c1ee1a482fe76511110e790f58a)
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # https://github.com/0day-ci/linux/commit/94ccd9fd87e302b0435e60b7fe7747c0d0599133
        git remote add linux-review https://github.com/0day-ci/linux
        git fetch --no-tags linux-review Matthew-Auld/drm-i915-gem-Break-out-some-shmem-backend-utils/20210914-021041
        git checkout 94ccd9fd87e302b0435e60b7fe7747c0d0599133
        # save the attached .config to linux build tree
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross ARCH=x86_64 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>

All warnings (new ones prefixed by >>):

>> drivers/gpu/drm/i915/gem/i915_gem_shmem.c:273:20: warning: variable 'mapping' is uninitialized when used here [-Wuninitialized]
           shmem_free_st(st, mapping, false, false);
                             ^~~~~~~
   drivers/gpu/drm/i915/gem/i915_gem_shmem.c:201:31: note: initialize the variable 'mapping' to silence this warning
           struct address_space *mapping;
                                        ^
                                         = NULL
   1 warning generated.


vim +/mapping +273 drivers/gpu/drm/i915/gem/i915_gem_shmem.c

   195	
   196	static int shmem_get_pages(struct drm_i915_gem_object *obj)
   197	{
   198		struct drm_i915_private *i915 = to_i915(obj->base.dev);
   199		struct intel_memory_region *mem = obj->mm.region;
   200		const unsigned long page_count = obj->base.size / PAGE_SIZE;
   201		struct address_space *mapping;
   202		struct sg_table *st;
   203		struct sgt_iter sgt_iter;
   204		struct page *page;
   205		unsigned int max_segment = i915_sg_segment_size();
   206		int ret;
   207	
   208		/*
   209		 * Assert that the object is not currently in any GPU domain. As it
   210		 * wasn't in the GTT, there shouldn't be any way it could have been in
   211		 * a GPU cache
   212		 */
   213		GEM_BUG_ON(obj->read_domains & I915_GEM_GPU_DOMAINS);
   214		GEM_BUG_ON(obj->write_domain & I915_GEM_GPU_DOMAINS);
   215	
   216	rebuild_st:
   217		st = shmem_alloc_st(i915, obj->base.size, mem,
   218				    obj->base.filp->f_mapping, max_segment);
   219		if (IS_ERR(st)) {
   220			ret = PTR_ERR(st);
   221			goto err_st;
   222		}
   223	
   224		ret = i915_gem_gtt_prepare_pages(obj, st);
   225		if (ret) {
   226			/*
   227			 * DMA remapping failed? One possible cause is that
   228			 * it could not reserve enough large entries, asking
   229			 * for PAGE_SIZE chunks instead may be helpful.
   230			 */
   231			if (max_segment > PAGE_SIZE) {
   232				for_each_sgt_page(page, sgt_iter, st)
   233					put_page(page);
   234				sg_free_table(st);
   235				kfree(st);
   236	
   237				max_segment = PAGE_SIZE;
   238				goto rebuild_st;
   239			} else {
   240				dev_warn(i915->drm.dev,
   241					 "Failed to DMA remap %lu pages\n",
   242					 page_count);
   243				goto err_pages;
   244			}
   245		}
   246	
   247		if (i915_gem_object_needs_bit17_swizzle(obj))
   248			i915_gem_object_do_bit_17_swizzle(obj, st);
   249	
   250		/*
   251		 * EHL and JSL add the 'Bypass LLC' MOCS entry, which should make it
   252		 * possible for userspace to bypass the GTT caching bits set by the
   253		 * kernel, as per the given object cache_level. This is troublesome
   254		 * since the heavy flush we apply when first gathering the pages is
   255		 * skipped if the kernel thinks the object is coherent with the GPU. As
   256		 * a result it might be possible to bypass the cache and read the
   257		 * contents of the page directly, which could be stale data. If it's
   258		 * just a case of userspace shooting themselves in the foot then so be
   259		 * it, but since i915 takes the stance of always zeroing memory before
   260		 * handing it to userspace, we need to prevent this.
   261		 *
   262		 * By setting cache_dirty here we make the clflush in set_pages
   263		 * unconditional on such platforms.
   264		 */
   265		if (IS_JSL_EHL(i915) && obj->flags & I915_BO_ALLOC_USER)
   266			obj->cache_dirty = true;
   267	
   268		__i915_gem_object_set_pages(obj, st, i915_sg_dma_sizes(st->sgl));
   269	
   270		return 0;
   271	
   272	err_pages:
 > 273		shmem_free_st(st, mapping, false, false);
   274		/*
   275		 * shmemfs first checks if there is enough memory to allocate the page
   276		 * and reports ENOSPC should there be insufficient, along with the usual
   277		 * ENOMEM for a genuine allocation failure.
   278		 *
   279		 * We use ENOSPC in our driver to mean that we have run out of aperture
   280		 * space and so want to translate the error from shmemfs back to our
   281		 * usual understanding of ENOMEM.
   282		 */
   283	err_st:
   284		if (ret == -ENOSPC)
   285			ret = -ENOMEM;
   286	
   287		return ret;
   288	}
   289	

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
index 11f072193f3b..ab02309f0752 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
@@ -25,46 +25,61 @@  static void check_release_pagevec(struct pagevec *pvec)
 	cond_resched();
 }
 
-static int shmem_get_pages(struct drm_i915_gem_object *obj)
+static void shmem_free_st(struct sg_table *st, struct address_space *mapping,
+			  bool dirty, bool backup)
 {
-	struct drm_i915_private *i915 = to_i915(obj->base.dev);
-	struct intel_memory_region *mem = obj->mm.region;
-	const unsigned long page_count = obj->base.size / PAGE_SIZE;
+	struct sgt_iter sgt_iter;
+	struct pagevec pvec;
+	struct page *page;
+
+	mapping_clear_unevictable(mapping);
+
+	pagevec_init(&pvec);
+	for_each_sgt_page(page, sgt_iter, st) {
+		if (dirty)
+			set_page_dirty(page);
+
+		if (backup)
+			mark_page_accessed(page);
+
+		if (!pagevec_add(&pvec, page))
+			check_release_pagevec(&pvec);
+	}
+	if (pagevec_count(&pvec))
+		check_release_pagevec(&pvec);
+
+	sg_free_table(st);
+	kfree(st);
+}
+
+static struct sg_table *shmem_alloc_st(struct drm_i915_private *i915,
+				       size_t size, struct intel_memory_region *mr,
+				       struct address_space *mapping,
+				       unsigned int max_segment)
+{
+	const unsigned long page_count = size / PAGE_SIZE;
 	unsigned long i;
-	struct address_space *mapping;
 	struct sg_table *st;
 	struct scatterlist *sg;
-	struct sgt_iter sgt_iter;
 	struct page *page;
 	unsigned long last_pfn = 0;	/* suppress gcc warning */
-	unsigned int max_segment = i915_sg_segment_size();
-	unsigned int sg_page_sizes;
 	gfp_t noreclaim;
 	int ret;
 
-	/*
-	 * Assert that the object is not currently in any GPU domain. As it
-	 * wasn't in the GTT, there shouldn't be any way it could have been in
-	 * a GPU cache
-	 */
-	GEM_BUG_ON(obj->read_domains & I915_GEM_GPU_DOMAINS);
-	GEM_BUG_ON(obj->write_domain & I915_GEM_GPU_DOMAINS);
-
 	/*
 	 * If there's no chance of allocating enough pages for the whole
 	 * object, bail early.
 	 */
-	if (obj->base.size > resource_size(&mem->region))
-		return -ENOMEM;
+	if (size > resource_size(&mr->region))
+		return ERR_PTR(-ENOMEM);
 
 	st = kmalloc(sizeof(*st), GFP_KERNEL);
 	if (!st)
-		return -ENOMEM;
+		return ERR_PTR(-ENOMEM);
 
-rebuild_st:
 	if (sg_alloc_table(st, page_count, GFP_KERNEL)) {
 		kfree(st);
-		return -ENOMEM;
+		return ERR_PTR(-ENOMEM);
 	}
 
 	/*
@@ -73,14 +88,12 @@  static int shmem_get_pages(struct drm_i915_gem_object *obj)
 	 *
 	 * Fail silently without starting the shrinker
 	 */
-	mapping = obj->base.filp->f_mapping;
 	mapping_set_unevictable(mapping);
 	noreclaim = mapping_gfp_constraint(mapping, ~__GFP_RECLAIM);
 	noreclaim |= __GFP_NORETRY | __GFP_NOWARN;
 
 	sg = st->sgl;
 	st->nents = 0;
-	sg_page_sizes = 0;
 	for (i = 0; i < page_count; i++) {
 		const unsigned int shrink[] = {
 			I915_SHRINK_BOUND | I915_SHRINK_UNBOUND,
@@ -135,10 +148,9 @@  static int shmem_get_pages(struct drm_i915_gem_object *obj)
 		if (!i ||
 		    sg->length >= max_segment ||
 		    page_to_pfn(page) != last_pfn + 1) {
-			if (i) {
-				sg_page_sizes |= sg->length;
+			if (i)
 				sg = sg_next(sg);
-			}
+
 			st->nents++;
 			sg_set_page(sg, page, PAGE_SIZE, 0);
 		} else {
@@ -149,14 +161,66 @@  static int shmem_get_pages(struct drm_i915_gem_object *obj)
 		/* Check that the i965g/gm workaround works. */
 		GEM_BUG_ON(gfp & __GFP_DMA32 && last_pfn >= 0x00100000UL);
 	}
-	if (sg) { /* loop terminated early; short sg table */
-		sg_page_sizes |= sg->length;
+	if (sg) /* loop terminated early; short sg table */
 		sg_mark_end(sg);
-	}
 
 	/* Trim unused sg entries to avoid wasting memory. */
 	i915_sg_trim(st);
 
+	return st;
+err_sg:
+	sg_mark_end(sg);
+	if (sg != st->sgl) {
+		shmem_free_st(st, mapping, false, false);
+	} else {
+		mapping_clear_unevictable(mapping);
+		sg_free_table(st);
+		kfree(st);
+	}
+
+	/*
+	 * shmemfs first checks if there is enough memory to allocate the page
+	 * and reports ENOSPC should there be insufficient, along with the usual
+	 * ENOMEM for a genuine allocation failure.
+	 *
+	 * We use ENOSPC in our driver to mean that we have run out of aperture
+	 * space and so want to translate the error from shmemfs back to our
+	 * usual understanding of ENOMEM.
+	 */
+	if (ret == -ENOSPC)
+		ret = -ENOMEM;
+
+	return ERR_PTR(ret);
+}
+
+static int shmem_get_pages(struct drm_i915_gem_object *obj)
+{
+	struct drm_i915_private *i915 = to_i915(obj->base.dev);
+	struct intel_memory_region *mem = obj->mm.region;
+	const unsigned long page_count = obj->base.size / PAGE_SIZE;
+	struct address_space *mapping;
+	struct sg_table *st;
+	struct sgt_iter sgt_iter;
+	struct page *page;
+	unsigned int max_segment = i915_sg_segment_size();
+	int ret;
+
+	/*
+	 * Assert that the object is not currently in any GPU domain. As it
+	 * wasn't in the GTT, there shouldn't be any way it could have been in
+	 * a GPU cache
+	 */
+	GEM_BUG_ON(obj->read_domains & I915_GEM_GPU_DOMAINS);
+	GEM_BUG_ON(obj->write_domain & I915_GEM_GPU_DOMAINS);
+
+rebuild_st:
+	st = shmem_alloc_st(i915, obj->base.size, mem,
+			    obj->base.filp->f_mapping, max_segment);
+	if (IS_ERR(st)) {
+		ret = PTR_ERR(st);
+		goto err_st;
+	}
+
 	ret = i915_gem_gtt_prepare_pages(obj, st);
 	if (ret) {
 		/*
@@ -168,6 +232,7 @@  static int shmem_get_pages(struct drm_i915_gem_object *obj)
 			for_each_sgt_page(page, sgt_iter, st)
 				put_page(page);
 			sg_free_table(st);
+			kfree(st);
 
 			max_segment = PAGE_SIZE;
 			goto rebuild_st;
@@ -200,28 +265,12 @@  static int shmem_get_pages(struct drm_i915_gem_object *obj)
 	if (IS_JSL_EHL(i915) && obj->flags & I915_BO_ALLOC_USER)
 		obj->cache_dirty = true;
 
-	__i915_gem_object_set_pages(obj, st, sg_page_sizes);
+	__i915_gem_object_set_pages(obj, st, i915_sg_dma_sizes(st->sgl));
 
 	return 0;
 
-err_sg:
-	sg_mark_end(sg);
 err_pages:
-	mapping_clear_unevictable(mapping);
-	if (sg != st->sgl) {
-		struct pagevec pvec;
-
-		pagevec_init(&pvec);
-		for_each_sgt_page(page, sgt_iter, st) {
-			if (!pagevec_add(&pvec, page))
-				check_release_pagevec(&pvec);
-		}
-		if (pagevec_count(&pvec))
-			check_release_pagevec(&pvec);
-	}
-	sg_free_table(st);
-	kfree(st);
-
+	shmem_free_st(st, mapping, false, false);
 	/*
 	 * shmemfs first checks if there is enough memory to allocate the page
 	 * and reports ENOSPC should there be insufficient, along with the usual
@@ -231,6 +280,7 @@  static int shmem_get_pages(struct drm_i915_gem_object *obj)
 	 * space and so want to translate the error from shmemfs back to our
 	 * usual understanding of ENOMEM.
 	 */
+err_st:
 	if (ret == -ENOSPC)
 		ret = -ENOMEM;
 
@@ -251,10 +301,8 @@  shmem_truncate(struct drm_i915_gem_object *obj)
 	obj->mm.pages = ERR_PTR(-EFAULT);
 }
 
-static void
-shmem_writeback(struct drm_i915_gem_object *obj)
+static void __shmem_writeback(size_t size, struct address_space *mapping)
 {
-	struct address_space *mapping;
 	struct writeback_control wbc = {
 		.sync_mode = WB_SYNC_NONE,
 		.nr_to_write = SWAP_CLUSTER_MAX,
@@ -270,10 +318,9 @@  shmem_writeback(struct drm_i915_gem_object *obj)
 	 * instead of invoking writeback so they are aged and paged out
 	 * as normal.
 	 */
-	mapping = obj->base.filp->f_mapping;
 
 	/* Begin writeback on each dirty page */
-	for (i = 0; i < obj->base.size >> PAGE_SHIFT; i++) {
+	for (i = 0; i < size >> PAGE_SHIFT; i++) {
 		struct page *page;
 
 		page = find_lock_page(mapping, i);
@@ -296,6 +343,12 @@  shmem_writeback(struct drm_i915_gem_object *obj)
 	}
 }
 
+static void
+shmem_writeback(struct drm_i915_gem_object *obj)
+{
+	__shmem_writeback(obj->base.size, obj->base.filp->f_mapping);
+}
+
 void
 __i915_gem_object_release_shmem(struct drm_i915_gem_object *obj,
 				struct sg_table *pages,
@@ -316,11 +369,6 @@  __i915_gem_object_release_shmem(struct drm_i915_gem_object *obj,
 
 void i915_gem_object_put_pages_shmem(struct drm_i915_gem_object *obj, struct sg_table *pages)
 {
-	struct sgt_iter sgt_iter;
-	struct pagevec pvec;
-	struct page *page;
-
-	GEM_WARN_ON(IS_DGFX(to_i915(obj->base.dev)));
 	__i915_gem_object_release_shmem(obj, pages, true);
 
 	i915_gem_gtt_finish_pages(obj, pages);
@@ -328,25 +376,9 @@  void i915_gem_object_put_pages_shmem(struct drm_i915_gem_object *obj, struct sg_
 	if (i915_gem_object_needs_bit17_swizzle(obj))
 		i915_gem_object_save_bit_17_swizzle(obj, pages);
 
-	mapping_clear_unevictable(file_inode(obj->base.filp)->i_mapping);
-
-	pagevec_init(&pvec);
-	for_each_sgt_page(page, sgt_iter, pages) {
-		if (obj->mm.dirty)
-			set_page_dirty(page);
-
-		if (obj->mm.madv == I915_MADV_WILLNEED)
-			mark_page_accessed(page);
-
-		if (!pagevec_add(&pvec, page))
-			check_release_pagevec(&pvec);
-	}
-	if (pagevec_count(&pvec))
-		check_release_pagevec(&pvec);
+	shmem_free_st(pages, file_inode(obj->base.filp)->i_mapping,
+		      obj->mm.dirty, obj->mm.madv == I915_MADV_WILLNEED);
 	obj->mm.dirty = false;
-
-	sg_free_table(pages);
-	kfree(pages);
 }
 
 static void