diff mbox

[05/22] drm/i915: introduce page_size members

Message ID 20170815181215.18310-6-matthew.auld@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Matthew Auld Aug. 15, 2017, 6:11 p.m. UTC
In preparation for supporting huge gtt pages for the ppgtt, we introduce
page size members for gem objects.  We fill in the page sizes by
scanning the sg table.

v2: pass the sg_mask to set_pages

v3: calculate the sg_mask inline with populating the sg_table where
possible, and pass to set_pages along with the pages.

Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Daniel Vetter <daniel@ffwll.ch>
---
 drivers/gpu/drm/i915/i915_drv.h                  |  5 ++-
 drivers/gpu/drm/i915/i915_gem.c                  | 43 ++++++++++++++++++++----
 drivers/gpu/drm/i915/i915_gem_dmabuf.c           | 17 ++++++++--
 drivers/gpu/drm/i915/i915_gem_internal.c         |  5 ++-
 drivers/gpu/drm/i915/i915_gem_object.h           | 20 ++++++++++-
 drivers/gpu/drm/i915/i915_gem_stolen.c           | 13 ++++---
 drivers/gpu/drm/i915/i915_gem_userptr.c          | 26 ++++++++++----
 drivers/gpu/drm/i915/selftests/huge_gem_object.c |  4 ++-
 drivers/gpu/drm/i915/selftests/i915_gem_gtt.c    |  3 +-
 9 files changed, 110 insertions(+), 26 deletions(-)

Comments

Chris Wilson Aug. 15, 2017, 6:31 p.m. UTC | #1
Quoting Matthew Auld (2017-08-15 19:11:58)
>  static int ____i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
>  {
>         struct sg_table *pages;
> +       unsigned int sg_mask = 0;
>  
>         GEM_BUG_ON(i915_gem_object_has_pinned_pages(obj));
>  
> @@ -2485,11 +2514,11 @@ static int ____i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
>                 return -EFAULT;
>         }
>  
> -       pages = obj->ops->get_pages(obj);
> +       pages = obj->ops->get_pages(obj, &sg_mask);
>         if (unlikely(IS_ERR(pages)))
>                 return PTR_ERR(pages);
>  
> -       __i915_gem_object_set_pages(obj, pages);
> +       __i915_gem_object_set_pages(obj, pages, sg_mask);

Just makes me keep wishing I did more than hint that we should move
__i915_gem_object_set_pages() down to the backend so that we don't need
this clumsy interface anymore. /whinge
-Chris
Chris Wilson Aug. 15, 2017, 6:37 p.m. UTC | #2
Quoting Matthew Auld (2017-08-15 19:11:58)
> diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c
> index f152a38d7079..6861ebd1323b 100644
> --- a/drivers/gpu/drm/i915/i915_gem_userptr.c
> +++ b/drivers/gpu/drm/i915/i915_gem_userptr.c
> @@ -406,7 +406,8 @@ struct get_pages_work {
>  #endif
>  
>  static int
> -st_set_pages(struct sg_table **st, struct page **pvec, int num_pages)
> +st_set_pages(struct sg_table **st, struct page **pvec, int num_pages,
> +            unsigned int *sg_mask)
>  {
>         struct scatterlist *sg;
>         int ret, n;
> @@ -422,12 +423,17 @@ st_set_pages(struct sg_table **st, struct page **pvec, int num_pages)
>  
>                 for_each_sg((*st)->sgl, sg, num_pages, n)
>                         sg_set_page(sg, pvec[n], PAGE_SIZE, 0);
> +
> +               *sg_mask = PAGE_SIZE;
>         } else {
>                 ret = sg_alloc_table_from_pages(*st, pvec, num_pages,
>                                                 0, num_pages << PAGE_SHIFT,
>                                                 GFP_KERNEL);
>                 if (ret)
>                         goto err;
> +
> +               for_each_sg((*st)->sgl, sg, num_pages, n)
> +                       *sg_mask |= sg->length;

Hmm. userptr may be shortlived, i.e. see more than their fair share of
get_pages. Need to keep an eye on this to see if this additional walk
shows up - the latency will most likely be on the execbuf submission path.

One way around that would be a flag to do the getpages at creation time,
though the reason it was deferred was that many userptrs were being made
that never were used for execbuf (backing SHM images).

But this reminds me we need to land Tvrtko's work so that we can handle
superpages here.
-Chris
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index e6ffaab5dcde..0b219ce7cff1 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3017,6 +3017,8 @@  intel_info(const struct drm_i915_private *dev_priv)
 #define USES_PPGTT(dev_priv)		(i915.enable_ppgtt)
 #define USES_FULL_PPGTT(dev_priv)	(i915.enable_ppgtt >= 2)
 #define USES_FULL_48BIT_PPGTT(dev_priv)	(i915.enable_ppgtt == 3)
+#define HAS_PAGE_SIZE(dev_priv, page_size) \
+	((dev_priv)->info.page_size_mask & (page_size))
 
 #define HAS_OVERLAY(dev_priv)		 ((dev_priv)->info.has_overlay)
 #define OVERLAY_NEEDS_PHYSICAL(dev_priv) \
@@ -3426,7 +3428,8 @@  i915_gem_object_get_dma_address(struct drm_i915_gem_object *obj,
 				unsigned long n);
 
 void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj,
-				 struct sg_table *pages);
+				 struct sg_table *pages,
+				 unsigned int sg_mask);
 int __i915_gem_object_get_pages(struct drm_i915_gem_object *obj);
 
 static inline int __must_check
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 03bff1a2ca13..1f69128a975c 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -163,7 +163,8 @@  i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
 }
 
 static struct sg_table *
-i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj)
+i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj,
+			       unsigned int *sg_mask)
 {
 	struct address_space *mapping = obj->base.filp->f_mapping;
 	drm_dma_handle_t *phys;
@@ -223,6 +224,8 @@  i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj)
 	sg->offset = 0;
 	sg->length = obj->base.size;
 
+	*sg_mask = sg->length;
+
 	sg_dma_address(sg) = phys->busaddr;
 	sg_dma_len(sg) = obj->base.size;
 
@@ -2260,6 +2263,8 @@  void __i915_gem_object_put_pages(struct drm_i915_gem_object *obj,
 	if (!IS_ERR(pages))
 		obj->ops->put_pages(obj, pages);
 
+	obj->mm.page_sizes.phys = obj->mm.page_sizes.sg = 0;
+
 unlock:
 	mutex_unlock(&obj->mm.lock);
 }
@@ -2291,7 +2296,8 @@  static bool i915_sg_trim(struct sg_table *orig_st)
 }
 
 static struct sg_table *
-i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
+i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj,
+			      unsigned int *sg_mask)
 {
 	struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
 	const unsigned long page_count = obj->base.size / PAGE_SIZE;
@@ -2338,6 +2344,7 @@  i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
 
 	sg = st->sgl;
 	st->nents = 0;
+	*sg_mask = 0;
 	for (i = 0; i < page_count; i++) {
 		const unsigned int shrink[] = {
 			I915_SHRINK_BOUND | I915_SHRINK_UNBOUND | I915_SHRINK_PURGEABLE,
@@ -2390,8 +2397,10 @@  i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
 		if (!i ||
 		    sg->length >= max_segment ||
 		    page_to_pfn(page) != last_pfn + 1) {
-			if (i)
+			if (i) {
+				*sg_mask |= sg->length;
 				sg = sg_next(sg);
+			}
 			st->nents++;
 			sg_set_page(sg, page, PAGE_SIZE, 0);
 		} else {
@@ -2402,8 +2411,10 @@  i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
 		/* Check that the i965g/gm workaround works. */
 		WARN_ON((gfp & __GFP_DMA32) && (last_pfn >= 0x00100000UL));
 	}
-	if (sg) /* loop terminated early; short sg table */
+	if (sg) { /* loop terminated early; short sg table */
+		*sg_mask |= sg->length;
 		sg_mark_end(sg);
+	}
 
 	/* Trim unused sg entries to avoid wasting memory. */
 	i915_sg_trim(st);
@@ -2457,8 +2468,13 @@  i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
 }
 
 void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj,
-				 struct sg_table *pages)
+				 struct sg_table *pages,
+				 unsigned int sg_mask)
 {
+	struct drm_i915_private *i915 = to_i915(obj->base.dev);
+	unsigned long supported_page_sizes = INTEL_INFO(i915)->page_size_mask;
+	unsigned int bit;
+
 	lockdep_assert_held(&obj->mm.lock);
 
 	obj->mm.get_page.sg_pos = pages->sgl;
@@ -2472,11 +2488,24 @@  void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj,
 		__i915_gem_object_pin_pages(obj);
 		obj->mm.quirked = true;
 	}
+
+	GEM_BUG_ON(!sg_mask);
+
+	obj->mm.page_sizes.phys = sg_mask;
+
+	obj->mm.page_sizes.sg = 0;
+	for_each_set_bit(bit, &supported_page_sizes, BITS_PER_LONG) {
+		if (obj->mm.page_sizes.phys & ~0u << bit)
+			obj->mm.page_sizes.sg |= BIT(bit);
+	}
+
+	GEM_BUG_ON(!HAS_PAGE_SIZE(i915, obj->mm.page_sizes.sg));
 }
 
 static int ____i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
 {
 	struct sg_table *pages;
+	unsigned int sg_mask = 0;
 
 	GEM_BUG_ON(i915_gem_object_has_pinned_pages(obj));
 
@@ -2485,11 +2514,11 @@  static int ____i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
 		return -EFAULT;
 	}
 
-	pages = obj->ops->get_pages(obj);
+	pages = obj->ops->get_pages(obj, &sg_mask);
 	if (unlikely(IS_ERR(pages)))
 		return PTR_ERR(pages);
 
-	__i915_gem_object_set_pages(obj, pages);
+	__i915_gem_object_set_pages(obj, pages, sg_mask);
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/i915/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/i915_gem_dmabuf.c
index 6176e589cf09..2b3b16d88d4b 100644
--- a/drivers/gpu/drm/i915/i915_gem_dmabuf.c
+++ b/drivers/gpu/drm/i915/i915_gem_dmabuf.c
@@ -257,10 +257,21 @@  struct dma_buf *i915_gem_prime_export(struct drm_device *dev,
 }
 
 static struct sg_table *
-i915_gem_object_get_pages_dmabuf(struct drm_i915_gem_object *obj)
+i915_gem_object_get_pages_dmabuf(struct drm_i915_gem_object *obj,
+				 unsigned int *sg_mask)
 {
-	return dma_buf_map_attachment(obj->base.import_attach,
-				      DMA_BIDIRECTIONAL);
+	struct sg_table *pages;
+	struct scatterlist *sg;
+	int n;
+
+	pages = dma_buf_map_attachment(obj->base.import_attach,
+				       DMA_BIDIRECTIONAL);
+	if (!IS_ERR(pages)) {
+		for_each_sg(pages->sgl, sg, pages->nents, n)
+			*sg_mask |= sg->length;
+	}
+
+	return pages;
 }
 
 static void i915_gem_object_put_pages_dmabuf(struct drm_i915_gem_object *obj,
diff --git a/drivers/gpu/drm/i915/i915_gem_internal.c b/drivers/gpu/drm/i915/i915_gem_internal.c
index c1f64ddaf8aa..86669ebe0da1 100644
--- a/drivers/gpu/drm/i915/i915_gem_internal.c
+++ b/drivers/gpu/drm/i915/i915_gem_internal.c
@@ -45,7 +45,8 @@  static void internal_free_pages(struct sg_table *st)
 }
 
 static struct sg_table *
-i915_gem_object_get_pages_internal(struct drm_i915_gem_object *obj)
+i915_gem_object_get_pages_internal(struct drm_i915_gem_object *obj,
+				   unsigned int *sg_mask)
 {
 	struct drm_i915_private *i915 = to_i915(obj->base.dev);
 	struct sg_table *st;
@@ -76,6 +77,7 @@  i915_gem_object_get_pages_internal(struct drm_i915_gem_object *obj)
 	}
 
 create_st:
+	*sg_mask = 0;
 	st = kmalloc(sizeof(*st), GFP_KERNEL);
 	if (!st)
 		return ERR_PTR(-ENOMEM);
@@ -105,6 +107,7 @@  i915_gem_object_get_pages_internal(struct drm_i915_gem_object *obj)
 		} while (1);
 
 		sg_set_page(sg, page, PAGE_SIZE << order, 0);
+		*sg_mask |= PAGE_SIZE << order;
 		st->nents++;
 
 		npages -= 1 << order;
diff --git a/drivers/gpu/drm/i915/i915_gem_object.h b/drivers/gpu/drm/i915/i915_gem_object.h
index 3baa341432db..5526b29587e4 100644
--- a/drivers/gpu/drm/i915/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/i915_gem_object.h
@@ -56,7 +56,8 @@  struct drm_i915_gem_object_ops {
 	 * being released or under memory pressure (where we attempt to
 	 * reap pages for the shrinker).
 	 */
-	struct sg_table *(*get_pages)(struct drm_i915_gem_object *);
+	struct sg_table *(*get_pages)(struct drm_i915_gem_object *,
+				      unsigned int *sg_mask);
 	void (*put_pages)(struct drm_i915_gem_object *, struct sg_table *);
 
 	int (*pwrite)(struct drm_i915_gem_object *,
@@ -148,6 +149,23 @@  struct drm_i915_gem_object {
 		struct sg_table *pages;
 		void *mapping;
 
+		struct i915_page_sizes {
+			/**
+			 * The sg mask of the pages sg_table. i.e the mask of
+			 * of the lengths for each sg entry.
+			 */
+			unsigned int phys;
+
+			/**
+			 * The gtt page sizes we are allowed to use given the
+			 * sg mask and the supported page sizes. This will
+			 * express the smallest unit we can use for the whole
+			 * object, as well as the larger sizes we may be able
+			 * to use opportunistically.
+			 */
+			unsigned int sg;
+		} page_sizes;
+
 		struct i915_gem_object_page_iter {
 			struct scatterlist *sg_pos;
 			unsigned int sg_idx; /* in pages, but 32bit eek! */
diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c
index 507c9f0d8df1..c6b578409200 100644
--- a/drivers/gpu/drm/i915/i915_gem_stolen.c
+++ b/drivers/gpu/drm/i915/i915_gem_stolen.c
@@ -540,11 +540,16 @@  i915_pages_create_for_stolen(struct drm_device *dev,
 }
 
 static struct sg_table *
-i915_gem_object_get_pages_stolen(struct drm_i915_gem_object *obj)
+i915_gem_object_get_pages_stolen(struct drm_i915_gem_object *obj,
+				 unsigned int *sg_mask)
 {
-	return i915_pages_create_for_stolen(obj->base.dev,
-					    obj->stolen->start,
-					    obj->stolen->size);
+	struct sg_table *pages =
+		i915_pages_create_for_stolen(obj->base.dev,
+					     obj->stolen->start,
+					     obj->stolen->size);
+	*sg_mask = obj->stolen->size;
+
+	return pages;
 }
 
 static void i915_gem_object_put_pages_stolen(struct drm_i915_gem_object *obj,
diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c
index f152a38d7079..6861ebd1323b 100644
--- a/drivers/gpu/drm/i915/i915_gem_userptr.c
+++ b/drivers/gpu/drm/i915/i915_gem_userptr.c
@@ -406,7 +406,8 @@  struct get_pages_work {
 #endif
 
 static int
-st_set_pages(struct sg_table **st, struct page **pvec, int num_pages)
+st_set_pages(struct sg_table **st, struct page **pvec, int num_pages,
+	     unsigned int *sg_mask)
 {
 	struct scatterlist *sg;
 	int ret, n;
@@ -422,12 +423,17 @@  st_set_pages(struct sg_table **st, struct page **pvec, int num_pages)
 
 		for_each_sg((*st)->sgl, sg, num_pages, n)
 			sg_set_page(sg, pvec[n], PAGE_SIZE, 0);
+
+		*sg_mask = PAGE_SIZE;
 	} else {
 		ret = sg_alloc_table_from_pages(*st, pvec, num_pages,
 						0, num_pages << PAGE_SHIFT,
 						GFP_KERNEL);
 		if (ret)
 			goto err;
+
+		for_each_sg((*st)->sgl, sg, num_pages, n)
+			*sg_mask |= sg->length;
 	}
 
 	return 0;
@@ -440,12 +446,13 @@  st_set_pages(struct sg_table **st, struct page **pvec, int num_pages)
 
 static struct sg_table *
 __i915_gem_userptr_set_pages(struct drm_i915_gem_object *obj,
-			     struct page **pvec, int num_pages)
+			     struct page **pvec, int num_pages,
+			     unsigned int *sg_mask)
 {
 	struct sg_table *pages;
 	int ret;
 
-	ret = st_set_pages(&pages, pvec, num_pages);
+	ret = st_set_pages(&pages, pvec, num_pages, sg_mask);
 	if (ret)
 		return ERR_PTR(ret);
 
@@ -540,9 +547,12 @@  __i915_gem_userptr_get_pages_worker(struct work_struct *_work)
 		struct sg_table *pages = ERR_PTR(ret);
 
 		if (pinned == npages) {
-			pages = __i915_gem_userptr_set_pages(obj, pvec, npages);
+			unsigned int sg_mask = 0;
+
+			pages = __i915_gem_userptr_set_pages(obj, pvec, npages,
+							     &sg_mask);
 			if (!IS_ERR(pages)) {
-				__i915_gem_object_set_pages(obj, pages);
+				__i915_gem_object_set_pages(obj, pages, sg_mask);
 				pinned = 0;
 				pages = NULL;
 			}
@@ -604,7 +614,8 @@  __i915_gem_userptr_get_pages_schedule(struct drm_i915_gem_object *obj)
 }
 
 static struct sg_table *
-i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj)
+i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj,
+			   unsigned int *sg_mask)
 {
 	const int num_pages = obj->base.size >> PAGE_SHIFT;
 	struct mm_struct *mm = obj->userptr.mm->mm;
@@ -661,7 +672,8 @@  i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj)
 		pages = __i915_gem_userptr_get_pages_schedule(obj);
 		active = pages == ERR_PTR(-EAGAIN);
 	} else {
-		pages = __i915_gem_userptr_set_pages(obj, pvec, num_pages);
+		pages = __i915_gem_userptr_set_pages(obj, pvec, num_pages,
+						     sg_mask);
 		active = !IS_ERR(pages);
 	}
 	if (active)
diff --git a/drivers/gpu/drm/i915/selftests/huge_gem_object.c b/drivers/gpu/drm/i915/selftests/huge_gem_object.c
index c5c7e8efbdd3..42ceab0d0429 100644
--- a/drivers/gpu/drm/i915/selftests/huge_gem_object.c
+++ b/drivers/gpu/drm/i915/selftests/huge_gem_object.c
@@ -38,7 +38,7 @@  static void huge_free_pages(struct drm_i915_gem_object *obj,
 }
 
 static struct sg_table *
-huge_get_pages(struct drm_i915_gem_object *obj)
+huge_get_pages(struct drm_i915_gem_object *obj, unsigned int *sg_mask)
 {
 #define GFP (GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY)
 	const unsigned long nreal = obj->scratch / PAGE_SIZE;
@@ -81,6 +81,8 @@  huge_get_pages(struct drm_i915_gem_object *obj)
 	if (i915_gem_gtt_prepare_pages(obj, pages))
 		goto err;
 
+	*sg_mask = PAGE_SIZE;
+
 	return pages;
 
 err:
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
index 6b132caffa18..0e1ded4239f9 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
@@ -40,7 +40,7 @@  static void fake_free_pages(struct drm_i915_gem_object *obj,
 }
 
 static struct sg_table *
-fake_get_pages(struct drm_i915_gem_object *obj)
+fake_get_pages(struct drm_i915_gem_object *obj, unsigned int *sg_mask)
 {
 #define GFP (GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY)
 #define PFN_BIAS 0x1000
@@ -66,6 +66,7 @@  fake_get_pages(struct drm_i915_gem_object *obj)
 		sg_set_page(sg, pfn_to_page(PFN_BIAS), len, 0);
 		sg_dma_address(sg) = page_to_phys(sg_page(sg));
 		sg_dma_len(sg) = len;
+		*sg_mask |= len;
 
 		rem -= len;
 	}