diff mbox

[02/17] drm/i915: introduce gtt page size

Message ID 20170516082948.28090-3-matthew.auld@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Matthew Auld May 16, 2017, 8:29 a.m. UTC
In preparation for supporting huge gtt pages for the ppgtt, we introduce
a gtt_page_size member for gem objects.  We fill in the gtt page size by
scanning the sg table to determine the max page size which satisfies the
alignment for each sg entry.

Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Daniel Vetter <daniel@ffwll.ch>
---
 drivers/gpu/drm/i915/i915_drv.h        |  2 ++
 drivers/gpu/drm/i915/i915_gem.c        | 23 +++++++++++++++++++++++
 drivers/gpu/drm/i915/i915_gem_object.h |  2 ++
 3 files changed, 27 insertions(+)

Comments

Chris Wilson May 16, 2017, 8:41 a.m. UTC | #1
On Tue, May 16, 2017 at 09:29:33AM +0100, Matthew Auld wrote:
> In preparation for supporting huge gtt pages for the ppgtt, we introduce
> a gtt_page_size member for gem objects.  We fill in the gtt page size by
> scanning the sg table to determine the max page size which satisfies the
> alignment for each sg entry.
> 
> Signed-off-by: Matthew Auld <matthew.auld@intel.com>
> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> Cc: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Daniel Vetter <daniel@ffwll.ch>
> ---
>  drivers/gpu/drm/i915/i915_drv.h        |  2 ++
>  drivers/gpu/drm/i915/i915_gem.c        | 23 +++++++++++++++++++++++
>  drivers/gpu/drm/i915/i915_gem_object.h |  2 ++
>  3 files changed, 27 insertions(+)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index e18f11f77f35..a7a108d18a2d 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -2843,6 +2843,8 @@ intel_info(const struct drm_i915_private *dev_priv)
>  #define USES_PPGTT(dev_priv)		(i915.enable_ppgtt)
>  #define USES_FULL_PPGTT(dev_priv)	(i915.enable_ppgtt >= 2)
>  #define USES_FULL_48BIT_PPGTT(dev_priv)	(i915.enable_ppgtt == 3)
> +#define HAS_PAGE_SIZE(dev_priv, page_size) \
> +	((dev_priv)->info.page_size_mask & (page_size))
>  
>  #define HAS_OVERLAY(dev_priv)		 ((dev_priv)->info.has_overlay)
>  #define OVERLAY_NEEDS_PHYSICAL(dev_priv) \
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 0c1cbe98c994..6a5e864d7710 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -2294,6 +2294,8 @@ void __i915_gem_object_put_pages(struct drm_i915_gem_object *obj,
>  	if (!IS_ERR(pages))
>  		obj->ops->put_pages(obj, pages);
>  
> +	obj->gtt_page_size = 0;
> +
>  unlock:
>  	mutex_unlock(&obj->mm.lock);
>  }
> @@ -2473,6 +2475,13 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
>  void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj,
>  				 struct sg_table *pages)
>  {
> +	struct drm_i915_private *i915 = to_i915(obj->base.dev);
> +	unsigned long supported_page_sizes = INTEL_INFO(i915)->page_size_mask;
> +	struct scatterlist *sg;
> +	unsigned int sg_mask = 0;
> +	unsigned int i;
> +	unsigned int bit;
> +
>  	lockdep_assert_held(&obj->mm.lock);
>  
>  	obj->mm.get_page.sg_pos = pages->sgl;
> @@ -2486,6 +2495,20 @@ void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj,
>  		__i915_gem_object_pin_pages(obj);
>  		obj->mm.quirked = true;
>  	}
> +
> +	for_each_sg(pages->sgl, sg, pages->nents, i)
> +		sg_mask |= sg->length;
> +
> +	GEM_BUG_ON(!sg_mask);
> +
> +	for_each_set_bit(bit, &supported_page_sizes, BITS_PER_LONG) {
> +		if (!IS_ALIGNED(sg_mask, 1 << bit))
> +			break;
> +
> +		obj->gtt_page_size = 1 << bit;
> +	}

Not here. This is the synchronous part, and we really do not want to
loop again. However, we have just looped over and can compute this mask
inline in the asynchronous portion of get_pages.
-Chris
Chris Wilson May 16, 2017, 9:59 a.m. UTC | #2
On Tue, May 16, 2017 at 09:29:33AM +0100, Matthew Auld wrote:
> In preparation for supporting huge gtt pages for the ppgtt, we introduce
> a gtt_page_size member for gem objects.  We fill in the gtt page size by
> scanning the sg table to determine the max page size which satisfies the
> alignment for each sg entry.
> 
> Signed-off-by: Matthew Auld <matthew.auld@intel.com>
> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> Cc: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Daniel Vetter <daniel@ffwll.ch>
> ---
>  drivers/gpu/drm/i915/i915_drv.h        |  2 ++
>  drivers/gpu/drm/i915/i915_gem.c        | 23 +++++++++++++++++++++++
>  drivers/gpu/drm/i915/i915_gem_object.h |  2 ++
>  3 files changed, 27 insertions(+)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index e18f11f77f35..a7a108d18a2d 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -2843,6 +2843,8 @@ intel_info(const struct drm_i915_private *dev_priv)
>  #define USES_PPGTT(dev_priv)		(i915.enable_ppgtt)
>  #define USES_FULL_PPGTT(dev_priv)	(i915.enable_ppgtt >= 2)
>  #define USES_FULL_48BIT_PPGTT(dev_priv)	(i915.enable_ppgtt == 3)
> +#define HAS_PAGE_SIZE(dev_priv, page_size) \
> +	((dev_priv)->info.page_size_mask & (page_size))
>  
>  #define HAS_OVERLAY(dev_priv)		 ((dev_priv)->info.has_overlay)
>  #define OVERLAY_NEEDS_PHYSICAL(dev_priv) \
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 0c1cbe98c994..6a5e864d7710 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -2294,6 +2294,8 @@ void __i915_gem_object_put_pages(struct drm_i915_gem_object *obj,
>  	if (!IS_ERR(pages))
>  		obj->ops->put_pages(obj, pages);
>  
> +	obj->gtt_page_size = 0;
> +
>  unlock:
>  	mutex_unlock(&obj->mm.lock);
>  }
> @@ -2473,6 +2475,13 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
>  void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj,
>  				 struct sg_table *pages)
>  {
> +	struct drm_i915_private *i915 = to_i915(obj->base.dev);
> +	unsigned long supported_page_sizes = INTEL_INFO(i915)->page_size_mask;
> +	struct scatterlist *sg;
> +	unsigned int sg_mask = 0;
> +	unsigned int i;
> +	unsigned int bit;
> +
>  	lockdep_assert_held(&obj->mm.lock);
>  
>  	obj->mm.get_page.sg_pos = pages->sgl;
> @@ -2486,6 +2495,20 @@ void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj,
>  		__i915_gem_object_pin_pages(obj);
>  		obj->mm.quirked = true;
>  	}
> +
> +	for_each_sg(pages->sgl, sg, pages->nents, i)
> +		sg_mask |= sg->length;
> +
> +	GEM_BUG_ON(!sg_mask);
> +

This should just be obj->gtt_page_sizes = sg_mask & supported_sizes;

And it should be obj->mm.gtt_page_sizes.

Then latter steps can make decisions based on the most strict
requirements, or least strict etc.
-Chris
Matthew Auld May 23, 2017, 12:42 p.m. UTC | #3
On 16 May 2017 at 10:59, Chris Wilson <chris@chris-wilson.co.uk> wrote:
> On Tue, May 16, 2017 at 09:29:33AM +0100, Matthew Auld wrote:
>> In preparation for supporting huge gtt pages for the ppgtt, we introduce
>> a gtt_page_size member for gem objects.  We fill in the gtt page size by
>> scanning the sg table to determine the max page size which satisfies the
>> alignment for each sg entry.
>>
>> Signed-off-by: Matthew Auld <matthew.auld@intel.com>
>> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
>> Cc: Chris Wilson <chris@chris-wilson.co.uk>
>> Cc: Daniel Vetter <daniel@ffwll.ch>
>> ---
>>  drivers/gpu/drm/i915/i915_drv.h        |  2 ++
>>  drivers/gpu/drm/i915/i915_gem.c        | 23 +++++++++++++++++++++++
>>  drivers/gpu/drm/i915/i915_gem_object.h |  2 ++
>>  3 files changed, 27 insertions(+)
>>
>> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
>> index e18f11f77f35..a7a108d18a2d 100644
>> --- a/drivers/gpu/drm/i915/i915_drv.h
>> +++ b/drivers/gpu/drm/i915/i915_drv.h
>> @@ -2843,6 +2843,8 @@ intel_info(const struct drm_i915_private *dev_priv)
>>  #define USES_PPGTT(dev_priv)         (i915.enable_ppgtt)
>>  #define USES_FULL_PPGTT(dev_priv)    (i915.enable_ppgtt >= 2)
>>  #define USES_FULL_48BIT_PPGTT(dev_priv)      (i915.enable_ppgtt == 3)
>> +#define HAS_PAGE_SIZE(dev_priv, page_size) \
>> +     ((dev_priv)->info.page_size_mask & (page_size))
>>
>>  #define HAS_OVERLAY(dev_priv)                 ((dev_priv)->info.has_overlay)
>>  #define OVERLAY_NEEDS_PHYSICAL(dev_priv) \
>> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
>> index 0c1cbe98c994..6a5e864d7710 100644
>> --- a/drivers/gpu/drm/i915/i915_gem.c
>> +++ b/drivers/gpu/drm/i915/i915_gem.c
>> @@ -2294,6 +2294,8 @@ void __i915_gem_object_put_pages(struct drm_i915_gem_object *obj,
>>       if (!IS_ERR(pages))
>>               obj->ops->put_pages(obj, pages);
>>
>> +     obj->gtt_page_size = 0;
>> +
>>  unlock:
>>       mutex_unlock(&obj->mm.lock);
>>  }
>> @@ -2473,6 +2475,13 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
>>  void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj,
>>                                struct sg_table *pages)
>>  {
>> +     struct drm_i915_private *i915 = to_i915(obj->base.dev);
>> +     unsigned long supported_page_sizes = INTEL_INFO(i915)->page_size_mask;
>> +     struct scatterlist *sg;
>> +     unsigned int sg_mask = 0;
>> +     unsigned int i;
>> +     unsigned int bit;
>> +
>>       lockdep_assert_held(&obj->mm.lock);
>>
>>       obj->mm.get_page.sg_pos = pages->sgl;
>> @@ -2486,6 +2495,20 @@ void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj,
>>               __i915_gem_object_pin_pages(obj);
>>               obj->mm.quirked = true;
>>       }
>> +
>> +     for_each_sg(pages->sgl, sg, pages->nents, i)
>> +             sg_mask |= sg->length;
>> +
>> +     GEM_BUG_ON(!sg_mask);
>> +
>
> This should just be obj->gtt_page_sizes = sg_mask & supported_sizes;
But wouldn't this assume that sg->length is exactly a page size, I
would have imagined it would be possible for shmem to give us two or
more continuous super-pages, or am I missing something?

>
> And it should be obj->mm.gtt_page_sizes.
>
> Then latter steps can make decisions based on the most strict
> requirements, or least strict etc.
> -Chris
>
> --
> Chris Wilson, Intel Open Source Technology Centre
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Chris Wilson May 23, 2017, 12:54 p.m. UTC | #4
On Tue, May 23, 2017 at 01:42:56PM +0100, Matthew Auld wrote:
> On 16 May 2017 at 10:59, Chris Wilson <chris@chris-wilson.co.uk> wrote:
> > On Tue, May 16, 2017 at 09:29:33AM +0100, Matthew Auld wrote:
> >> In preparation for supporting huge gtt pages for the ppgtt, we introduce
> >> a gtt_page_size member for gem objects.  We fill in the gtt page size by
> >> scanning the sg table to determine the max page size which satisfies the
> >> alignment for each sg entry.
> >>
> >> Signed-off-by: Matthew Auld <matthew.auld@intel.com>
> >> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> >> Cc: Chris Wilson <chris@chris-wilson.co.uk>
> >> Cc: Daniel Vetter <daniel@ffwll.ch>
> >> ---
> >>  drivers/gpu/drm/i915/i915_drv.h        |  2 ++
> >>  drivers/gpu/drm/i915/i915_gem.c        | 23 +++++++++++++++++++++++
> >>  drivers/gpu/drm/i915/i915_gem_object.h |  2 ++
> >>  3 files changed, 27 insertions(+)
> >>
> >> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> >> index e18f11f77f35..a7a108d18a2d 100644
> >> --- a/drivers/gpu/drm/i915/i915_drv.h
> >> +++ b/drivers/gpu/drm/i915/i915_drv.h
> >> @@ -2843,6 +2843,8 @@ intel_info(const struct drm_i915_private *dev_priv)
> >>  #define USES_PPGTT(dev_priv)         (i915.enable_ppgtt)
> >>  #define USES_FULL_PPGTT(dev_priv)    (i915.enable_ppgtt >= 2)
> >>  #define USES_FULL_48BIT_PPGTT(dev_priv)      (i915.enable_ppgtt == 3)
> >> +#define HAS_PAGE_SIZE(dev_priv, page_size) \
> >> +     ((dev_priv)->info.page_size_mask & (page_size))
> >>
> >>  #define HAS_OVERLAY(dev_priv)                 ((dev_priv)->info.has_overlay)
> >>  #define OVERLAY_NEEDS_PHYSICAL(dev_priv) \
> >> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> >> index 0c1cbe98c994..6a5e864d7710 100644
> >> --- a/drivers/gpu/drm/i915/i915_gem.c
> >> +++ b/drivers/gpu/drm/i915/i915_gem.c
> >> @@ -2294,6 +2294,8 @@ void __i915_gem_object_put_pages(struct drm_i915_gem_object *obj,
> >>       if (!IS_ERR(pages))
> >>               obj->ops->put_pages(obj, pages);
> >>
> >> +     obj->gtt_page_size = 0;
> >> +
> >>  unlock:
> >>       mutex_unlock(&obj->mm.lock);
> >>  }
> >> @@ -2473,6 +2475,13 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
> >>  void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj,
> >>                                struct sg_table *pages)
> >>  {
> >> +     struct drm_i915_private *i915 = to_i915(obj->base.dev);
> >> +     unsigned long supported_page_sizes = INTEL_INFO(i915)->page_size_mask;
> >> +     struct scatterlist *sg;
> >> +     unsigned int sg_mask = 0;
> >> +     unsigned int i;
> >> +     unsigned int bit;
> >> +
> >>       lockdep_assert_held(&obj->mm.lock);
> >>
> >>       obj->mm.get_page.sg_pos = pages->sgl;
> >> @@ -2486,6 +2495,20 @@ void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj,
> >>               __i915_gem_object_pin_pages(obj);
> >>               obj->mm.quirked = true;
> >>       }
> >> +
> >> +     for_each_sg(pages->sgl, sg, pages->nents, i)
> >> +             sg_mask |= sg->length;
> >> +
> >> +     GEM_BUG_ON(!sg_mask);
> >> +
> >
> > This should just be obj->gtt_page_sizes = sg_mask & supported_sizes;
> But wouldn't this assume that sg->length is exactly a page size, I
> would have imagined it would be possible for shmem to give us two or
> more continuous super-pages, or am I missing something?

I'd actually report obj->mm.phys_page_sizes = sg_mask; and cook a value for
obj->mm.gtt_pages_sizes:

	obj->mm.gtt_page_sizes = 0;
	for_each_set_bit(bit, &i915->info.supported_gtt_pages_size) { // add salt
		if (obj->mm.phys_page_sizes & ~0u << bit)
			obj->mm.gtt_page_sizes |= BIT(bit);
	}

Certainly for the internal objects we will have a variety of different
orders.
-Chris
Matthew Auld May 23, 2017, 1:57 p.m. UTC | #5
On 23 May 2017 at 13:54, Chris Wilson <chris@chris-wilson.co.uk> wrote:
> On Tue, May 23, 2017 at 01:42:56PM +0100, Matthew Auld wrote:
>> On 16 May 2017 at 10:59, Chris Wilson <chris@chris-wilson.co.uk> wrote:
>> > On Tue, May 16, 2017 at 09:29:33AM +0100, Matthew Auld wrote:
>> >> In preparation for supporting huge gtt pages for the ppgtt, we introduce
>> >> a gtt_page_size member for gem objects.  We fill in the gtt page size by
>> >> scanning the sg table to determine the max page size which satisfies the
>> >> alignment for each sg entry.
>> >>
>> >> Signed-off-by: Matthew Auld <matthew.auld@intel.com>
>> >> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
>> >> Cc: Chris Wilson <chris@chris-wilson.co.uk>
>> >> Cc: Daniel Vetter <daniel@ffwll.ch>
>> >> ---
>> >>  drivers/gpu/drm/i915/i915_drv.h        |  2 ++
>> >>  drivers/gpu/drm/i915/i915_gem.c        | 23 +++++++++++++++++++++++
>> >>  drivers/gpu/drm/i915/i915_gem_object.h |  2 ++
>> >>  3 files changed, 27 insertions(+)
>> >>
>> >> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
>> >> index e18f11f77f35..a7a108d18a2d 100644
>> >> --- a/drivers/gpu/drm/i915/i915_drv.h
>> >> +++ b/drivers/gpu/drm/i915/i915_drv.h
>> >> @@ -2843,6 +2843,8 @@ intel_info(const struct drm_i915_private *dev_priv)
>> >>  #define USES_PPGTT(dev_priv)         (i915.enable_ppgtt)
>> >>  #define USES_FULL_PPGTT(dev_priv)    (i915.enable_ppgtt >= 2)
>> >>  #define USES_FULL_48BIT_PPGTT(dev_priv)      (i915.enable_ppgtt == 3)
>> >> +#define HAS_PAGE_SIZE(dev_priv, page_size) \
>> >> +     ((dev_priv)->info.page_size_mask & (page_size))
>> >>
>> >>  #define HAS_OVERLAY(dev_priv)                 ((dev_priv)->info.has_overlay)
>> >>  #define OVERLAY_NEEDS_PHYSICAL(dev_priv) \
>> >> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
>> >> index 0c1cbe98c994..6a5e864d7710 100644
>> >> --- a/drivers/gpu/drm/i915/i915_gem.c
>> >> +++ b/drivers/gpu/drm/i915/i915_gem.c
>> >> @@ -2294,6 +2294,8 @@ void __i915_gem_object_put_pages(struct drm_i915_gem_object *obj,
>> >>       if (!IS_ERR(pages))
>> >>               obj->ops->put_pages(obj, pages);
>> >>
>> >> +     obj->gtt_page_size = 0;
>> >> +
>> >>  unlock:
>> >>       mutex_unlock(&obj->mm.lock);
>> >>  }
>> >> @@ -2473,6 +2475,13 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
>> >>  void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj,
>> >>                                struct sg_table *pages)
>> >>  {
>> >> +     struct drm_i915_private *i915 = to_i915(obj->base.dev);
>> >> +     unsigned long supported_page_sizes = INTEL_INFO(i915)->page_size_mask;
>> >> +     struct scatterlist *sg;
>> >> +     unsigned int sg_mask = 0;
>> >> +     unsigned int i;
>> >> +     unsigned int bit;
>> >> +
>> >>       lockdep_assert_held(&obj->mm.lock);
>> >>
>> >>       obj->mm.get_page.sg_pos = pages->sgl;
>> >> @@ -2486,6 +2495,20 @@ void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj,
>> >>               __i915_gem_object_pin_pages(obj);
>> >>               obj->mm.quirked = true;
>> >>       }
>> >> +
>> >> +     for_each_sg(pages->sgl, sg, pages->nents, i)
>> >> +             sg_mask |= sg->length;
>> >> +
>> >> +     GEM_BUG_ON(!sg_mask);
>> >> +
>> >
>> > This should just be obj->gtt_page_sizes = sg_mask & supported_sizes;
>> But wouldn't this assume that sg->length is exactly a page size, I
>> would have imagined it would be possible for shmem to give us two or
>> more continuous super-pages, or am I missing something?
>
> I'd actually report obj->mm.phys_page_sizes = sg_mask; and cook a value for
> obj->mm.gtt_pages_sizes:
>
>         obj->mm.gtt_page_sizes = 0;
>         for_each_set_bit(bit, &i915->info.supported_gtt_pages_size) { // add salt
>                 if (obj->mm.phys_page_sizes & ~0u << bit)
>                         obj->mm.gtt_page_sizes |= BIT(bit);
>         }
Nifty.

So in mixed-mode what would be the alignment strategy? Align to
largest, smallest, don't align at all etc. For example if we were
unlucky and got something like 4K->2M? The obj->mm.gtt_page_sizes
should always be representative of how we end up inserting it into the
gtt, right? Would it not be more apt to move the gtt_page_sizes
tracking to when we do the insert?

>
> Certainly for the internal objects we will have a variety of different
> orders.
> -Chris
>
> --
> Chris Wilson, Intel Open Source Technology Centre
Chris Wilson May 23, 2017, 2:30 p.m. UTC | #6
On Tue, May 23, 2017 at 02:57:16PM +0100, Matthew Auld wrote:
> On 23 May 2017 at 13:54, Chris Wilson <chris@chris-wilson.co.uk> wrote:
> > On Tue, May 23, 2017 at 01:42:56PM +0100, Matthew Auld wrote:
> >> On 16 May 2017 at 10:59, Chris Wilson <chris@chris-wilson.co.uk> wrote:
> >> > This should just be obj->gtt_page_sizes = sg_mask & supported_sizes;
> >> But wouldn't this assume that sg->length is exactly a page size, I
> >> would have imagined it would be possible for shmem to give us two or
> >> more continuous super-pages, or am I missing something?
> >
> > I'd actually report obj->mm.phys_page_sizes = sg_mask; and cook a value for
> > obj->mm.gtt_pages_sizes:
> >
> >         obj->mm.gtt_page_sizes = 0;
> >         for_each_set_bit(bit, &i915->info.supported_gtt_pages_size) { // add salt
> >                 if (obj->mm.phys_page_sizes & ~0u << bit)
> >                         obj->mm.gtt_page_sizes |= BIT(bit);
> >         }
> Nifty.
> 
> So in mixed-mode what would be the alignment strategy? Align to
> largest, smallest, don't align at all etc. For example if we were
> unlucky and got something like 4K->2M? The obj->mm.gtt_page_sizes
> should always be representative of how we end up inserting it into the
> gtt, right? Would it not be more apt to move the gtt_page_sizes
> tracking to when we do the insert?

My first thought was align to worst (and then hope for the best, i.e.
that we can make use of that alignment, I'm still thinking even if we
get a huge physical page, wasting that alignment in a 48b aperture still
isn't terrible -- caveats if we need to fit inside the low 4G!).
Reporting the actual GTT sizes used might be useful for debug, but the
focus should be on tracking the values that make the code simpler :)
-Chris
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index e18f11f77f35..a7a108d18a2d 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2843,6 +2843,8 @@  intel_info(const struct drm_i915_private *dev_priv)
 #define USES_PPGTT(dev_priv)		(i915.enable_ppgtt)
 #define USES_FULL_PPGTT(dev_priv)	(i915.enable_ppgtt >= 2)
 #define USES_FULL_48BIT_PPGTT(dev_priv)	(i915.enable_ppgtt == 3)
+#define HAS_PAGE_SIZE(dev_priv, page_size) \
+	((dev_priv)->info.page_size_mask & (page_size))
 
 #define HAS_OVERLAY(dev_priv)		 ((dev_priv)->info.has_overlay)
 #define OVERLAY_NEEDS_PHYSICAL(dev_priv) \
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 0c1cbe98c994..6a5e864d7710 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2294,6 +2294,8 @@  void __i915_gem_object_put_pages(struct drm_i915_gem_object *obj,
 	if (!IS_ERR(pages))
 		obj->ops->put_pages(obj, pages);
 
+	obj->gtt_page_size = 0;
+
 unlock:
 	mutex_unlock(&obj->mm.lock);
 }
@@ -2473,6 +2475,13 @@  i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
 void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj,
 				 struct sg_table *pages)
 {
+	struct drm_i915_private *i915 = to_i915(obj->base.dev);
+	unsigned long supported_page_sizes = INTEL_INFO(i915)->page_size_mask;
+	struct scatterlist *sg;
+	unsigned int sg_mask = 0;
+	unsigned int i;
+	unsigned int bit;
+
 	lockdep_assert_held(&obj->mm.lock);
 
 	obj->mm.get_page.sg_pos = pages->sgl;
@@ -2486,6 +2495,20 @@  void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj,
 		__i915_gem_object_pin_pages(obj);
 		obj->mm.quirked = true;
 	}
+
+	for_each_sg(pages->sgl, sg, pages->nents, i)
+		sg_mask |= sg->length;
+
+	GEM_BUG_ON(!sg_mask);
+
+	for_each_set_bit(bit, &supported_page_sizes, BITS_PER_LONG) {
+		if (!IS_ALIGNED(sg_mask, 1 << bit))
+			break;
+
+		obj->gtt_page_size = 1 << bit;
+	}
+
+	GEM_BUG_ON(!HAS_PAGE_SIZE(i915, obj->gtt_page_size));
 }
 
 static int ____i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
diff --git a/drivers/gpu/drm/i915/i915_gem_object.h b/drivers/gpu/drm/i915/i915_gem_object.h
index 174cf923c236..75beb6a79635 100644
--- a/drivers/gpu/drm/i915/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/i915_gem_object.h
@@ -107,6 +107,8 @@  struct drm_i915_gem_object {
 	unsigned int cache_level:3;
 	unsigned int cache_dirty:1;
 
+	unsigned int gtt_page_size;
+
 	atomic_t frontbuffer_bits;
 	unsigned int frontbuffer_ggtt_origin; /* write once */
 	struct i915_gem_active frontbuffer_write;