diff mbox

[15/18] drm/i915: Add a new "remapped" gtt_view

Message ID 20180719182214.4323-16-ville.syrjala@linux.intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Ville Syrjälä July 19, 2018, 6:22 p.m. UTC
From: Ville Syrjälä <ville.syrjala@linux.intel.com>

To overcome display engine stride limits we'll want to remap the
pages in the GTT. To that end we need a new gtt_view type which
is just like the "rotated" type except not rotated.

Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_debugfs.c  | 12 +++++
 drivers/gpu/drm/i915/i915_gem_gtt.c  | 91 ++++++++++++++++++++++++++++++++++++
 drivers/gpu/drm/i915/i915_gem_gtt.h  | 16 +++++++
 drivers/gpu/drm/i915/i915_vma.c      |  6 ++-
 drivers/gpu/drm/i915/i915_vma.h      |  5 +-
 drivers/gpu/drm/i915/intel_display.c | 11 +++++
 drivers/gpu/drm/i915/intel_drv.h     |  1 +
 7 files changed, 140 insertions(+), 2 deletions(-)

Comments

Chris Wilson July 19, 2018, 6:59 p.m. UTC | #1
Quoting Ville Syrjala (2018-07-19 19:22:11)
> +static struct scatterlist *
> +remap_pages(const dma_addr_t *in, unsigned int offset,
> +           unsigned int width, unsigned int height,
> +           unsigned int stride,
> +           struct sg_table *st, struct scatterlist *sg)
> +{
> +       unsigned int column, row;
> +
> +       for (row = 0; row < height; row++) {
> +               for (column = 0; column < width; column++) {
> +                       st->nents++;
> +                       /* We don't need the pages, but need to initialize
> +                        * the entries so the sg list can be happily traversed.
> +                        * The only thing we need are DMA addresses.
> +                        */
> +                       sg_set_page(sg, NULL, PAGE_SIZE, 0);
> +                       sg_dma_address(sg) = in[offset + column];
> +                       sg_dma_len(sg) = PAGE_SIZE;
> +                       sg = sg_next(sg);

Ok. But should be I915_GTT_PAGE_SIZE?

And yes, following that argument looks like rotation should be converted.

> +               }
> +               offset += stride;
> +       }
> +
> +       return sg;
> +}
> +
> +static noinline struct sg_table *
> +intel_remap_pages(struct intel_remapped_info *rem_info,
> +                 struct drm_i915_gem_object *obj)
> +{
> +       const unsigned long n_pages = obj->base.size / PAGE_SIZE;
> +       unsigned int size = intel_remapped_info_size(rem_info);
> +       struct sgt_iter sgt_iter;
> +       dma_addr_t dma_addr;
> +       unsigned long i;
> +       dma_addr_t *page_addr_list;
> +       struct sg_table *st;
> +       struct scatterlist *sg;
> +       int ret = -ENOMEM;
> +
> +       /* Allocate a temporary list of source pages for random access. */
> +       page_addr_list = kvmalloc_array(n_pages,
> +                                       sizeof(dma_addr_t),
> +                                       GFP_KERNEL);
> +       if (!page_addr_list)
> +               return ERR_PTR(ret);
> +
> +       /* Allocate target SG list. */
> +       st = kmalloc(sizeof(*st), GFP_KERNEL);
> +       if (!st)
> +               goto err_st_alloc;
> +
> +       ret = sg_alloc_table(st, size, GFP_KERNEL);
> +       if (ret)
> +               goto err_sg_alloc;
> +
> +       /* Populate source page list from the object. */
> +       i = 0;
> +       for_each_sgt_dma(dma_addr, sgt_iter, obj->mm.pages)
> +               page_addr_list[i++] = dma_addr;

You could use the i915_gem_object_get_dma_address() for this. We
definitely will reuse the index (as the one object will likely be
remapped into each CRTC). (Avoids having a temporary vmalloc here.)

> +
> +       GEM_BUG_ON(i != n_pages);
> +       st->nents = 0;
> +       sg = st->sgl;
> +
> +       for (i = 0 ; i < ARRAY_SIZE(rem_info->plane); i++) {
> +               sg = remap_pages(page_addr_list, rem_info->plane[i].offset,
> +                                rem_info->plane[i].width, rem_info->plane[i].height,
> +                                rem_info->plane[i].stride, st, sg);
> +       }

> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
> index 2a116a91420b..e15ac9f64c36 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.h
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
> @@ -160,6 +160,19 @@ typedef u64 gen8_ppgtt_pml4e_t;
>  
>  struct sg_table;
>  
> +struct intel_remapped_info {
> +       struct intel_remapped_plane_info {
> +               /* tiles */
> +               unsigned int width, height, stride, offset;
> +       } plane[2];
> +       unsigned int unused;

Tag it as mbz, since we do use it inside the compare. Hmm, I wonder if
it actually is better if it doesn't exist if it isn't used, then it
should be zero.. Hmm, not sure if that's defined at all, might have to
say memset and don't rely on {} zeroing?

Should work fine as a memcmp key for the rbtree.

> +} __packed;
> +
> +static inline void assert_intel_remapped_info_is_packed(void)
> +{
> +       BUILD_BUG_ON(sizeof(struct intel_remapped_info) != 10*sizeof(unsigned int));
> +}
> +
>  struct intel_rotation_info {
>         struct intel_rotation_plane_info {
>                 /* tiles */
> @@ -186,6 +199,7 @@ enum i915_ggtt_view_type {
>         I915_GGTT_VIEW_NORMAL = 0,
>         I915_GGTT_VIEW_ROTATED = sizeof(struct intel_rotation_info),
>         I915_GGTT_VIEW_PARTIAL = sizeof(struct intel_partial_info),
> +       I915_GGTT_VIEW_REMAPPED = sizeof(struct intel_remapped_info),
>  };
>  
>  static inline void assert_i915_ggtt_view_type_is_unique(void)
> @@ -197,6 +211,7 @@ static inline void assert_i915_ggtt_view_type_is_unique(void)
>         case I915_GGTT_VIEW_NORMAL:
>         case I915_GGTT_VIEW_PARTIAL:
>         case I915_GGTT_VIEW_ROTATED:
> +       case I915_GGTT_VIEW_REMAPPED:
>                 /* gcc complains if these are identical cases */
>                 break;
>         }
> @@ -208,6 +223,7 @@ struct i915_ggtt_view {
>                 /* Members need to contain no holes/padding */
>                 struct intel_partial_info partial;
>                 struct intel_rotation_info rotated;
> +               struct intel_remapped_info remapped;
>         };
>  };
>  
> diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
> index 11d834f94220..eb54755ec25b 100644
> --- a/drivers/gpu/drm/i915/i915_vma.c
> +++ b/drivers/gpu/drm/i915/i915_vma.c
> @@ -164,6 +164,9 @@ vma_create(struct drm_i915_gem_object *obj,
>                 } else if (view->type == I915_GGTT_VIEW_ROTATED) {
>                         vma->size = intel_rotation_info_size(&view->rotated);
>                         vma->size <<= PAGE_SHIFT;
> +               } else if (view->type == I915_GGTT_VIEW_REMAPPED) {
> +                       vma->size = intel_remapped_info_size(&view->remapped);
> +                       vma->size <<= PAGE_SHIFT;
>                 }
>         }
>  
> @@ -462,7 +465,8 @@ void __i915_vma_set_map_and_fenceable(struct i915_vma *vma)
>          * Explicitly disable for rotated VMA since the display does not
>          * need the fence and the VMA is not accessible to other users.
>          */
> -       if (vma->ggtt_view.type == I915_GGTT_VIEW_ROTATED)
> +       if (vma->ggtt_view.type == I915_GGTT_VIEW_ROTATED ||
> +           vma->ggtt_view.type == I915_GGTT_VIEW_REMAPPED)
>                 return;

Not fenceable? I suppose, you only want the fence tracking from GGTT
mmaps. Do we need to make sure FBC finally works on unfenced vma? I
guess we do.

>         fenceable = (vma->node.size >= vma->fence_size &&
> diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
> index f06d66377107..19bb3115226c 100644
> --- a/drivers/gpu/drm/i915/i915_vma.h
> +++ b/drivers/gpu/drm/i915/i915_vma.h
> @@ -257,8 +257,11 @@ i915_vma_compare(struct i915_vma *vma,
>          */
>         BUILD_BUG_ON(I915_GGTT_VIEW_NORMAL >= I915_GGTT_VIEW_PARTIAL);
>         BUILD_BUG_ON(I915_GGTT_VIEW_PARTIAL >= I915_GGTT_VIEW_ROTATED);
> +       BUILD_BUG_ON(I915_GGTT_VIEW_ROTATED >= I915_GGTT_VIEW_REMAPPED);
>         BUILD_BUG_ON(offsetof(typeof(*view), rotated) !=
> -                    offsetof(typeof(*view), partial));
> +                    offsetof(typeof(*view), partial) ||
> +                    offsetof(typeof(*view), rotated) !=
> +                    offsetof(typeof(*view), remapped));

Two separate BUILD_BUG_ON() I think, one pair of offsetof() will be
harder to interpret should if fail; two pairs never!
-Chris
Ville Syrjälä July 19, 2018, 7:33 p.m. UTC | #2
On Thu, Jul 19, 2018 at 07:59:33PM +0100, Chris Wilson wrote:
> Quoting Ville Syrjala (2018-07-19 19:22:11)
> > +static struct scatterlist *
> > +remap_pages(const dma_addr_t *in, unsigned int offset,
> > +           unsigned int width, unsigned int height,
> > +           unsigned int stride,
> > +           struct sg_table *st, struct scatterlist *sg)
> > +{
> > +       unsigned int column, row;
> > +
> > +       for (row = 0; row < height; row++) {
> > +               for (column = 0; column < width; column++) {
> > +                       st->nents++;
> > +                       /* We don't need the pages, but need to initialize
> > +                        * the entries so the sg list can be happily traversed.
> > +                        * The only thing we need are DMA addresses.
> > +                        */
> > +                       sg_set_page(sg, NULL, PAGE_SIZE, 0);
> > +                       sg_dma_address(sg) = in[offset + column];
> > +                       sg_dma_len(sg) = PAGE_SIZE;
> > +                       sg = sg_next(sg);
> 
> Ok. But should be I915_GTT_PAGE_SIZE?

I suppose. And now I wonder what would happen on gen2 with its
2KiB gtt pages. Probably nothing good.

> 
> And yes, following that argument looks like rotation should be converted.
> 
> > +               }
> > +               offset += stride;
> > +       }
> > +
> > +       return sg;
> > +}
> > +
> > +static noinline struct sg_table *
> > +intel_remap_pages(struct intel_remapped_info *rem_info,
> > +                 struct drm_i915_gem_object *obj)
> > +{
> > +       const unsigned long n_pages = obj->base.size / PAGE_SIZE;
> > +       unsigned int size = intel_remapped_info_size(rem_info);
> > +       struct sgt_iter sgt_iter;
> > +       dma_addr_t dma_addr;
> > +       unsigned long i;
> > +       dma_addr_t *page_addr_list;
> > +       struct sg_table *st;
> > +       struct scatterlist *sg;
> > +       int ret = -ENOMEM;
> > +
> > +       /* Allocate a temporary list of source pages for random access. */
> > +       page_addr_list = kvmalloc_array(n_pages,
> > +                                       sizeof(dma_addr_t),
> > +                                       GFP_KERNEL);
> > +       if (!page_addr_list)
> > +               return ERR_PTR(ret);
> > +
> > +       /* Allocate target SG list. */
> > +       st = kmalloc(sizeof(*st), GFP_KERNEL);
> > +       if (!st)
> > +               goto err_st_alloc;
> > +
> > +       ret = sg_alloc_table(st, size, GFP_KERNEL);
> > +       if (ret)
> > +               goto err_sg_alloc;
> > +
> > +       /* Populate source page list from the object. */
> > +       i = 0;
> > +       for_each_sgt_dma(dma_addr, sgt_iter, obj->mm.pages)
> > +               page_addr_list[i++] = dma_addr;
> 
> You could use the i915_gem_object_get_dma_address() for this. We
> definitely will reuse the index (as the one object will likely be
> remapped into each CRTC). (Avoids having a temporary vmalloc here.)
> 
> > +
> > +       GEM_BUG_ON(i != n_pages);
> > +       st->nents = 0;
> > +       sg = st->sgl;
> > +
> > +       for (i = 0 ; i < ARRAY_SIZE(rem_info->plane); i++) {
> > +               sg = remap_pages(page_addr_list, rem_info->plane[i].offset,
> > +                                rem_info->plane[i].width, rem_info->plane[i].height,
> > +                                rem_info->plane[i].stride, st, sg);
> > +       }
> 
> > diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
> > index 2a116a91420b..e15ac9f64c36 100644
> > --- a/drivers/gpu/drm/i915/i915_gem_gtt.h
> > +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
> > @@ -160,6 +160,19 @@ typedef u64 gen8_ppgtt_pml4e_t;
> >  
> >  struct sg_table;
> >  
> > +struct intel_remapped_info {
> > +       struct intel_remapped_plane_info {
> > +               /* tiles */
> > +               unsigned int width, height, stride, offset;
> > +       } plane[2];
> > +       unsigned int unused;
> 
> Tag it as mbz, since we do use it inside the compare. Hmm, I wonder if
> it actually is better if it doesn't exist if it isn't used, then it
> should be zero.. Hmm, not sure if that's defined at all, might have to
> say memset and don't rely on {} zeroing?
> 
> Should work fine as a memcmp key for the rbtree.

This whole thing is a bit questionale the way I did it. When populating
the gtt_view I just poke at view->rotated and rely on matching layout
for view->remapped. To make it less magic maybe I should embed one
inside the other?

> 
> > +} __packed;
> > +
> > +static inline void assert_intel_remapped_info_is_packed(void)
> > +{
> > +       BUILD_BUG_ON(sizeof(struct intel_remapped_info) != 10*sizeof(unsigned int));
> > +}
> > +
> >  struct intel_rotation_info {
> >         struct intel_rotation_plane_info {
> >                 /* tiles */
> > @@ -186,6 +199,7 @@ enum i915_ggtt_view_type {
> >         I915_GGTT_VIEW_NORMAL = 0,
> >         I915_GGTT_VIEW_ROTATED = sizeof(struct intel_rotation_info),
> >         I915_GGTT_VIEW_PARTIAL = sizeof(struct intel_partial_info),
> > +       I915_GGTT_VIEW_REMAPPED = sizeof(struct intel_remapped_info),
> >  };
> >  
> >  static inline void assert_i915_ggtt_view_type_is_unique(void)
> > @@ -197,6 +211,7 @@ static inline void assert_i915_ggtt_view_type_is_unique(void)
> >         case I915_GGTT_VIEW_NORMAL:
> >         case I915_GGTT_VIEW_PARTIAL:
> >         case I915_GGTT_VIEW_ROTATED:
> > +       case I915_GGTT_VIEW_REMAPPED:
> >                 /* gcc complains if these are identical cases */
> >                 break;
> >         }
> > @@ -208,6 +223,7 @@ struct i915_ggtt_view {
> >                 /* Members need to contain no holes/padding */
> >                 struct intel_partial_info partial;
> >                 struct intel_rotation_info rotated;
> > +               struct intel_remapped_info remapped;
> >         };
> >  };
> >  
> > diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
> > index 11d834f94220..eb54755ec25b 100644
> > --- a/drivers/gpu/drm/i915/i915_vma.c
> > +++ b/drivers/gpu/drm/i915/i915_vma.c
> > @@ -164,6 +164,9 @@ vma_create(struct drm_i915_gem_object *obj,
> >                 } else if (view->type == I915_GGTT_VIEW_ROTATED) {
> >                         vma->size = intel_rotation_info_size(&view->rotated);
> >                         vma->size <<= PAGE_SHIFT;
> > +               } else if (view->type == I915_GGTT_VIEW_REMAPPED) {
> > +                       vma->size = intel_remapped_info_size(&view->remapped);
> > +                       vma->size <<= PAGE_SHIFT;
> >                 }
> >         }
> >  
> > @@ -462,7 +465,8 @@ void __i915_vma_set_map_and_fenceable(struct i915_vma *vma)
> >          * Explicitly disable for rotated VMA since the display does not
> >          * need the fence and the VMA is not accessible to other users.
> >          */
> > -       if (vma->ggtt_view.type == I915_GGTT_VIEW_ROTATED)
> > +       if (vma->ggtt_view.type == I915_GGTT_VIEW_ROTATED ||
> > +           vma->ggtt_view.type == I915_GGTT_VIEW_REMAPPED)
> >                 return;
> 
> Not fenceable? I suppose, you only want the fence tracking from GGTT
> mmaps. Do we need to make sure FBC finally works on unfenced vma? I
> guess we do.

Might be a nice bonus. I keep putting off touching FBC in a significant
way though because I know it won't stop with the first patch.

> 
> >         fenceable = (vma->node.size >= vma->fence_size &&
> > diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
> > index f06d66377107..19bb3115226c 100644
> > --- a/drivers/gpu/drm/i915/i915_vma.h
> > +++ b/drivers/gpu/drm/i915/i915_vma.h
> > @@ -257,8 +257,11 @@ i915_vma_compare(struct i915_vma *vma,
> >          */
> >         BUILD_BUG_ON(I915_GGTT_VIEW_NORMAL >= I915_GGTT_VIEW_PARTIAL);
> >         BUILD_BUG_ON(I915_GGTT_VIEW_PARTIAL >= I915_GGTT_VIEW_ROTATED);
> > +       BUILD_BUG_ON(I915_GGTT_VIEW_ROTATED >= I915_GGTT_VIEW_REMAPPED);
> >         BUILD_BUG_ON(offsetof(typeof(*view), rotated) !=
> > -                    offsetof(typeof(*view), partial));
> > +                    offsetof(typeof(*view), partial) ||
> > +                    offsetof(typeof(*view), rotated) !=
> > +                    offsetof(typeof(*view), remapped));
> 
> Two separate BUILD_BUG_ON() I think, one pair of offsetof() will be
> harder to interpret should if fail; two pairs never!

ack
Chris Wilson July 19, 2018, 7:46 p.m. UTC | #3
Quoting Ville Syrjälä (2018-07-19 20:33:57)
> On Thu, Jul 19, 2018 at 07:59:33PM +0100, Chris Wilson wrote:
> > Quoting Ville Syrjala (2018-07-19 19:22:11)
> > > +static struct scatterlist *
> > > +remap_pages(const dma_addr_t *in, unsigned int offset,
> > > +           unsigned int width, unsigned int height,
> > > +           unsigned int stride,
> > > +           struct sg_table *st, struct scatterlist *sg)
> > > +{
> > > +       unsigned int column, row;
> > > +
> > > +       for (row = 0; row < height; row++) {
> > > +               for (column = 0; column < width; column++) {
> > > +                       st->nents++;
> > > +                       /* We don't need the pages, but need to initialize
> > > +                        * the entries so the sg list can be happily traversed.
> > > +                        * The only thing we need are DMA addresses.
> > > +                        */
> > > +                       sg_set_page(sg, NULL, PAGE_SIZE, 0);
> > > +                       sg_dma_address(sg) = in[offset + column];
> > > +                       sg_dma_len(sg) = PAGE_SIZE;
> > > +                       sg = sg_next(sg);
> > 
> > Ok. But should be I915_GTT_PAGE_SIZE?
> 
> I suppose. And now I wonder what would happen on gen2 with its
> 2KiB gtt pages. Probably nothing good.

Pffifle. We call it 4KiB. It's just about the semantics, and here we
should be splitting the dma addresses by GTT_PAGE_SIZE rather than
system page size.

> > > +struct intel_remapped_info {
> > > +       struct intel_remapped_plane_info {
> > > +               /* tiles */
> > > +               unsigned int width, height, stride, offset;
> > > +       } plane[2];
> > > +       unsigned int unused;
> > 
> > Tag it as mbz, since we do use it inside the compare. Hmm, I wonder if
> > it actually is better if it doesn't exist if it isn't used, then it
> > should be zero.. Hmm, not sure if that's defined at all, might have to
> > say memset and don't rely on {} zeroing?
> > 
> > Should work fine as a memcmp key for the rbtree.
> 
> This whole thing is a bit questionale the way I did it. When populating
> the gtt_view I just poke at view->rotated and rely on matching layout
> for view->remapped. To make it less magic maybe I should embed one
> inside the other?

Hmm. If it's intentionally the same layout, then we should just use the
same struct for both. remapped/remap_info is generic enough to cover
rotation as well.

> > > +} __packed;
> > > +
> > > +static inline void assert_intel_remapped_info_is_packed(void)
> > > +{
> > > +       BUILD_BUG_ON(sizeof(struct intel_remapped_info) != 10*sizeof(unsigned int));
> > > +}
> > > +
> > >  struct intel_rotation_info {
> > >         struct intel_rotation_plane_info {
> > >                 /* tiles */
> > > @@ -186,6 +199,7 @@ enum i915_ggtt_view_type {
> > >         I915_GGTT_VIEW_NORMAL = 0,
> > >         I915_GGTT_VIEW_ROTATED = sizeof(struct intel_rotation_info),
> > >         I915_GGTT_VIEW_PARTIAL = sizeof(struct intel_partial_info),
> > > +       I915_GGTT_VIEW_REMAPPED = sizeof(struct intel_remapped_info),

Oh, forgot about that trick. Yeah, they do need to differ in structs.

Hmm, so I think keep the remap_plane_info and reuse that?

struct intel_remapped_info {
       struct intel_remapped_plane_info {
               /* tiles */
               unsigned int width, height, stride, offset;
       } plane[2];
       int unused_mbz; 
};


struct intel_rotation_info {
	struct intel_remmaped_plane_info plane[2];
};

static inline void assert_intel_rotation_matches_remapped_info(void)
{
	/* Check that rotation/remapped shares offsets for simplicity */
	BUILD_BUG_ON(offsetof(struct intel_remapped_info, plane[0]) !=
	             offsetof(struct intel_rotation_info, plane[0]));
	BUILD_BUG_ON(offsetofend(struct intel_remapped_info, plane[1]) !=
	             offsetofend(struct intel_rotation_info, plane[1]));
}
-Chris
Ville Syrjälä July 19, 2018, 7:55 p.m. UTC | #4
On Thu, Jul 19, 2018 at 08:46:54PM +0100, Chris Wilson wrote:
> Quoting Ville Syrjälä (2018-07-19 20:33:57)
> > On Thu, Jul 19, 2018 at 07:59:33PM +0100, Chris Wilson wrote:
> > > Quoting Ville Syrjala (2018-07-19 19:22:11)
> > > > +static struct scatterlist *
> > > > +remap_pages(const dma_addr_t *in, unsigned int offset,
> > > > +           unsigned int width, unsigned int height,
> > > > +           unsigned int stride,
> > > > +           struct sg_table *st, struct scatterlist *sg)
> > > > +{
> > > > +       unsigned int column, row;
> > > > +
> > > > +       for (row = 0; row < height; row++) {
> > > > +               for (column = 0; column < width; column++) {
> > > > +                       st->nents++;
> > > > +                       /* We don't need the pages, but need to initialize
> > > > +                        * the entries so the sg list can be happily traversed.
> > > > +                        * The only thing we need are DMA addresses.
> > > > +                        */
> > > > +                       sg_set_page(sg, NULL, PAGE_SIZE, 0);
> > > > +                       sg_dma_address(sg) = in[offset + column];
> > > > +                       sg_dma_len(sg) = PAGE_SIZE;
> > > > +                       sg = sg_next(sg);
> > > 
> > > Ok. But should be I915_GTT_PAGE_SIZE?
> > 
> > I suppose. And now I wonder what would happen on gen2 with its
> > 2KiB gtt pages. Probably nothing good.
> 
> Pffifle. We call it 4KiB. It's just about the semantics, and here we
> should be splitting the dma addresses by GTT_PAGE_SIZE rather than
> system page size.
> 
> > > > +struct intel_remapped_info {
> > > > +       struct intel_remapped_plane_info {
> > > > +               /* tiles */
> > > > +               unsigned int width, height, stride, offset;
> > > > +       } plane[2];
> > > > +       unsigned int unused;
> > > 
> > > Tag it as mbz, since we do use it inside the compare. Hmm, I wonder if
> > > it actually is better if it doesn't exist if it isn't used, then it
> > > should be zero.. Hmm, not sure if that's defined at all, might have to
> > > say memset and don't rely on {} zeroing?
> > > 
> > > Should work fine as a memcmp key for the rbtree.
> > 
> > This whole thing is a bit questionale the way I did it. When populating
> > the gtt_view I just poke at view->rotated and rely on matching layout
> > for view->remapped. To make it less magic maybe I should embed one
> > inside the other?
> 
> Hmm. If it's intentionally the same layout, then we should just use the
> same struct for both. remapped/remap_info is generic enough to cover
> rotation as well.
> 
> > > > +} __packed;
> > > > +
> > > > +static inline void assert_intel_remapped_info_is_packed(void)
> > > > +{
> > > > +       BUILD_BUG_ON(sizeof(struct intel_remapped_info) != 10*sizeof(unsigned int));
> > > > +}
> > > > +
> > > >  struct intel_rotation_info {
> > > >         struct intel_rotation_plane_info {
> > > >                 /* tiles */
> > > > @@ -186,6 +199,7 @@ enum i915_ggtt_view_type {
> > > >         I915_GGTT_VIEW_NORMAL = 0,
> > > >         I915_GGTT_VIEW_ROTATED = sizeof(struct intel_rotation_info),
> > > >         I915_GGTT_VIEW_PARTIAL = sizeof(struct intel_partial_info),
> > > > +       I915_GGTT_VIEW_REMAPPED = sizeof(struct intel_remapped_info),
> 
> Oh, forgot about that trick. Yeah, they do need to differ in structs.
> 
> Hmm, so I think keep the remap_plane_info and reuse that?
> 
> struct intel_remapped_info {
>        struct intel_remapped_plane_info {
>                /* tiles */
>                unsigned int width, height, stride, offset;
>        } plane[2];
>        int unused_mbz; 
> };
> 
> 
> struct intel_rotation_info {
> 	struct intel_remmaped_plane_info plane[2];
> };
> 
> static inline void assert_intel_rotation_matches_remapped_info(void)
> {
> 	/* Check that rotation/remapped shares offsets for simplicity */
> 	BUILD_BUG_ON(offsetof(struct intel_remapped_info, plane[0]) !=
> 	             offsetof(struct intel_rotation_info, plane[0]));
> 	BUILD_BUG_ON(offsetofend(struct intel_remapped_info, plane[1]) !=
> 	             offsetofend(struct intel_rotation_info, plane[1]));
> }

Yeah, something like that could work.
Ville Syrjälä July 19, 2018, 8:16 p.m. UTC | #5
On Thu, Jul 19, 2018 at 08:46:54PM +0100, Chris Wilson wrote:
> Quoting Ville Syrjälä (2018-07-19 20:33:57)
> > On Thu, Jul 19, 2018 at 07:59:33PM +0100, Chris Wilson wrote:
> > > Quoting Ville Syrjala (2018-07-19 19:22:11)
> > > > +static struct scatterlist *
> > > > +remap_pages(const dma_addr_t *in, unsigned int offset,
> > > > +           unsigned int width, unsigned int height,
> > > > +           unsigned int stride,
> > > > +           struct sg_table *st, struct scatterlist *sg)
> > > > +{
> > > > +       unsigned int column, row;
> > > > +
> > > > +       for (row = 0; row < height; row++) {
> > > > +               for (column = 0; column < width; column++) {
> > > > +                       st->nents++;
> > > > +                       /* We don't need the pages, but need to initialize
> > > > +                        * the entries so the sg list can be happily traversed.
> > > > +                        * The only thing we need are DMA addresses.
> > > > +                        */
> > > > +                       sg_set_page(sg, NULL, PAGE_SIZE, 0);
> > > > +                       sg_dma_address(sg) = in[offset + column];
> > > > +                       sg_dma_len(sg) = PAGE_SIZE;
> > > > +                       sg = sg_next(sg);
> > > 
> > > Ok. But should be I915_GTT_PAGE_SIZE?
> > 
> > I suppose. And now I wonder what would happen on gen2 with its
> > 2KiB gtt pages. Probably nothing good.
> 
> Pffifle. We call it 4KiB. It's just about the semantics, and here we
> should be splitting the dma addresses by GTT_PAGE_SIZE rather than
> system page size.
> 
> > > > +struct intel_remapped_info {
> > > > +       struct intel_remapped_plane_info {
> > > > +               /* tiles */
> > > > +               unsigned int width, height, stride, offset;
> > > > +       } plane[2];
> > > > +       unsigned int unused;
> > > 
> > > Tag it as mbz, since we do use it inside the compare. Hmm, I wonder if
> > > it actually is better if it doesn't exist if it isn't used, then it
> > > should be zero.. Hmm, not sure if that's defined at all, might have to
> > > say memset and don't rely on {} zeroing?
> > > 
> > > Should work fine as a memcmp key for the rbtree.
> > 
> > This whole thing is a bit questionale the way I did it. When populating
> > the gtt_view I just poke at view->rotated and rely on matching layout
> > for view->remapped. To make it less magic maybe I should embed one
> > inside the other?
> 
> Hmm. If it's intentionally the same layout, then we should just use the
> same struct for both. remapped/remap_info is generic enough to cover
> rotation as well.
> 
> > > > +} __packed;
> > > > +
> > > > +static inline void assert_intel_remapped_info_is_packed(void)
> > > > +{
> > > > +       BUILD_BUG_ON(sizeof(struct intel_remapped_info) != 10*sizeof(unsigned int));

Hmm. These assert inlines don't seem to be doing their job. Clearly
my struct size was 9 ints not 10.
Chris Wilson July 19, 2018, 8:25 p.m. UTC | #6
Quoting Ville Syrjälä (2018-07-19 21:16:20)
> > > > > +} __packed;
> > > > > +
> > > > > +static inline void assert_intel_remapped_info_is_packed(void)
> > > > > +{
> > > > > +       BUILD_BUG_ON(sizeof(struct intel_remapped_info) != 10*sizeof(unsigned int));
> 
> Hmm. These assert inlines don't seem to be doing their job. Clearly
> my struct size was 9 ints not 10.

gcc is getting overly smart. Pull all the asserts into one
assert_i915_gem_gtt_types() and then call that from i915_vma_compare().
-Chris
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index b3aefd623557..82b2984eabee 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -196,6 +196,18 @@  describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj)
 					   vma->ggtt_view.rotated.plane[1].offset);
 				break;
 
+			case I915_GGTT_VIEW_REMAPPED:
+				seq_printf(m, ", remapped [(%ux%u, stride=%u, offset=%u), (%ux%u, stride=%u, offset=%u)]",
+					   vma->ggtt_view.remapped.plane[0].width,
+					   vma->ggtt_view.remapped.plane[0].height,
+					   vma->ggtt_view.remapped.plane[0].stride,
+					   vma->ggtt_view.remapped.plane[0].offset,
+					   vma->ggtt_view.remapped.plane[1].width,
+					   vma->ggtt_view.remapped.plane[1].height,
+					   vma->ggtt_view.remapped.plane[1].stride,
+					   vma->ggtt_view.remapped.plane[1].offset);
+				break;
+
 			default:
 				MISSING_CASE(vma->ggtt_view.type);
 				break;
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index f00c7fbef79e..29b16f564225 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -3800,6 +3800,92 @@  intel_rotate_pages(struct intel_rotation_info *rot_info,
 	return ERR_PTR(ret);
 }
 
+static struct scatterlist *
+remap_pages(const dma_addr_t *in, unsigned int offset,
+	    unsigned int width, unsigned int height,
+	    unsigned int stride,
+	    struct sg_table *st, struct scatterlist *sg)
+{
+	unsigned int column, row;
+
+	for (row = 0; row < height; row++) {
+		for (column = 0; column < width; column++) {
+			st->nents++;
+			/* We don't need the pages, but need to initialize
+			 * the entries so the sg list can be happily traversed.
+			 * The only thing we need are DMA addresses.
+			 */
+			sg_set_page(sg, NULL, PAGE_SIZE, 0);
+			sg_dma_address(sg) = in[offset + column];
+			sg_dma_len(sg) = PAGE_SIZE;
+			sg = sg_next(sg);
+		}
+		offset += stride;
+	}
+
+	return sg;
+}
+
+static noinline struct sg_table *
+intel_remap_pages(struct intel_remapped_info *rem_info,
+		  struct drm_i915_gem_object *obj)
+{
+	const unsigned long n_pages = obj->base.size / PAGE_SIZE;
+	unsigned int size = intel_remapped_info_size(rem_info);
+	struct sgt_iter sgt_iter;
+	dma_addr_t dma_addr;
+	unsigned long i;
+	dma_addr_t *page_addr_list;
+	struct sg_table *st;
+	struct scatterlist *sg;
+	int ret = -ENOMEM;
+
+	/* Allocate a temporary list of source pages for random access. */
+	page_addr_list = kvmalloc_array(n_pages,
+					sizeof(dma_addr_t),
+					GFP_KERNEL);
+	if (!page_addr_list)
+		return ERR_PTR(ret);
+
+	/* Allocate target SG list. */
+	st = kmalloc(sizeof(*st), GFP_KERNEL);
+	if (!st)
+		goto err_st_alloc;
+
+	ret = sg_alloc_table(st, size, GFP_KERNEL);
+	if (ret)
+		goto err_sg_alloc;
+
+	/* Populate source page list from the object. */
+	i = 0;
+	for_each_sgt_dma(dma_addr, sgt_iter, obj->mm.pages)
+		page_addr_list[i++] = dma_addr;
+
+	GEM_BUG_ON(i != n_pages);
+	st->nents = 0;
+	sg = st->sgl;
+
+	for (i = 0 ; i < ARRAY_SIZE(rem_info->plane); i++) {
+		sg = remap_pages(page_addr_list, rem_info->plane[i].offset,
+				 rem_info->plane[i].width, rem_info->plane[i].height,
+				 rem_info->plane[i].stride, st, sg);
+	}
+
+	kvfree(page_addr_list);
+
+	return st;
+
+err_sg_alloc:
+	kfree(st);
+err_st_alloc:
+	kvfree(page_addr_list);
+
+	DRM_DEBUG_DRIVER("Failed to create remapped mapping for object size %zu! (%ux%u tiles, %u pages)\n",
+			 obj->base.size, rem_info->plane[0].width, rem_info->plane[0].height, size);
+
+	return ERR_PTR(ret);
+}
+
 static noinline struct sg_table *
 intel_partial_pages(const struct i915_ggtt_view *view,
 		    struct drm_i915_gem_object *obj)
@@ -3876,6 +3962,11 @@  i915_get_ggtt_vma_pages(struct i915_vma *vma)
 			intel_rotate_pages(&vma->ggtt_view.rotated, vma->obj);
 		break;
 
+	case I915_GGTT_VIEW_REMAPPED:
+		vma->pages =
+			intel_remap_pages(&vma->ggtt_view.remapped, vma->obj);
+		break;
+
 	case I915_GGTT_VIEW_PARTIAL:
 		vma->pages = intel_partial_pages(&vma->ggtt_view, vma->obj);
 		break;
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index 2a116a91420b..e15ac9f64c36 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -160,6 +160,19 @@  typedef u64 gen8_ppgtt_pml4e_t;
 
 struct sg_table;
 
+struct intel_remapped_info {
+	struct intel_remapped_plane_info {
+		/* tiles */
+		unsigned int width, height, stride, offset;
+	} plane[2];
+	unsigned int unused;
+} __packed;
+
+static inline void assert_intel_remapped_info_is_packed(void)
+{
+	BUILD_BUG_ON(sizeof(struct intel_remapped_info) != 10*sizeof(unsigned int));
+}
+
 struct intel_rotation_info {
 	struct intel_rotation_plane_info {
 		/* tiles */
@@ -186,6 +199,7 @@  enum i915_ggtt_view_type {
 	I915_GGTT_VIEW_NORMAL = 0,
 	I915_GGTT_VIEW_ROTATED = sizeof(struct intel_rotation_info),
 	I915_GGTT_VIEW_PARTIAL = sizeof(struct intel_partial_info),
+	I915_GGTT_VIEW_REMAPPED = sizeof(struct intel_remapped_info),
 };
 
 static inline void assert_i915_ggtt_view_type_is_unique(void)
@@ -197,6 +211,7 @@  static inline void assert_i915_ggtt_view_type_is_unique(void)
 	case I915_GGTT_VIEW_NORMAL:
 	case I915_GGTT_VIEW_PARTIAL:
 	case I915_GGTT_VIEW_ROTATED:
+	case I915_GGTT_VIEW_REMAPPED:
 		/* gcc complains if these are identical cases */
 		break;
 	}
@@ -208,6 +223,7 @@  struct i915_ggtt_view {
 		/* Members need to contain no holes/padding */
 		struct intel_partial_info partial;
 		struct intel_rotation_info rotated;
+		struct intel_remapped_info remapped;
 	};
 };
 
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index 11d834f94220..eb54755ec25b 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -164,6 +164,9 @@  vma_create(struct drm_i915_gem_object *obj,
 		} else if (view->type == I915_GGTT_VIEW_ROTATED) {
 			vma->size = intel_rotation_info_size(&view->rotated);
 			vma->size <<= PAGE_SHIFT;
+		} else if (view->type == I915_GGTT_VIEW_REMAPPED) {
+			vma->size = intel_remapped_info_size(&view->remapped);
+			vma->size <<= PAGE_SHIFT;
 		}
 	}
 
@@ -462,7 +465,8 @@  void __i915_vma_set_map_and_fenceable(struct i915_vma *vma)
 	 * Explicitly disable for rotated VMA since the display does not
 	 * need the fence and the VMA is not accessible to other users.
 	 */
-	if (vma->ggtt_view.type == I915_GGTT_VIEW_ROTATED)
+	if (vma->ggtt_view.type == I915_GGTT_VIEW_ROTATED ||
+	    vma->ggtt_view.type == I915_GGTT_VIEW_REMAPPED)
 		return;
 
 	fenceable = (vma->node.size >= vma->fence_size &&
diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
index f06d66377107..19bb3115226c 100644
--- a/drivers/gpu/drm/i915/i915_vma.h
+++ b/drivers/gpu/drm/i915/i915_vma.h
@@ -257,8 +257,11 @@  i915_vma_compare(struct i915_vma *vma,
 	 */
 	BUILD_BUG_ON(I915_GGTT_VIEW_NORMAL >= I915_GGTT_VIEW_PARTIAL);
 	BUILD_BUG_ON(I915_GGTT_VIEW_PARTIAL >= I915_GGTT_VIEW_ROTATED);
+	BUILD_BUG_ON(I915_GGTT_VIEW_ROTATED >= I915_GGTT_VIEW_REMAPPED);
 	BUILD_BUG_ON(offsetof(typeof(*view), rotated) !=
-		     offsetof(typeof(*view), partial));
+		     offsetof(typeof(*view), partial) ||
+		     offsetof(typeof(*view), rotated) !=
+		     offsetof(typeof(*view), remapped));
 	return memcmp(&vma->ggtt_view.partial, &view->partial, view->type);
 }
 
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index b39c941b4791..e6f48fab65de 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -2005,6 +2005,17 @@  unsigned int intel_rotation_info_size(const struct intel_rotation_info *rot_info
 	return size;
 }
 
+unsigned int intel_remapped_info_size(const struct intel_remapped_info *rem_info)
+{
+	unsigned int size = 0;
+	int i;
+
+	for (i = 0 ; i < ARRAY_SIZE(rem_info->plane); i++)
+		size += rem_info->plane[i].width * rem_info->plane[i].height;
+
+	return size;
+}
+
 static void
 intel_fill_fb_ggtt_view(struct i915_ggtt_view *view,
 			const struct drm_framebuffer *fb,
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index 91b7fb6a07e1..2be5821fc24f 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -1492,6 +1492,7 @@  unsigned int intel_fb_xy_to_linear(int x, int y,
 void intel_add_fb_offsets(int *x, int *y,
 			  const struct intel_plane_state *state, int plane);
 unsigned int intel_rotation_info_size(const struct intel_rotation_info *rot_info);
+unsigned int intel_remapped_info_size(const struct intel_remapped_info *rem_info);
 bool intel_has_pending_fb_unpin(struct drm_i915_private *dev_priv);
 void intel_mark_busy(struct drm_i915_private *dev_priv);
 void intel_mark_idle(struct drm_i915_private *dev_priv);