diff mbox

[RFC,3/5] drm/i915: Infrastructure for supporting different GGTT views per object

Message ID 1414687178-18562-4-git-send-email-tvrtko.ursulin@linux.intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Tvrtko Ursulin Oct. 30, 2014, 4:39 p.m. UTC
From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Things like reliable GGTT mappings and mirrored 3d display will need to be
to map the same object twice into the GGTT.

Add a ggtt_view field per VMA and select the page view based on the type
of the view.

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/i915_drv.h            |  3 +++
 drivers/gpu/drm/i915/i915_gem.c            |  9 ++++---
 drivers/gpu/drm/i915/i915_gem_context.c    |  2 +-
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |  2 +-
 drivers/gpu/drm/i915/i915_gem_gtt.c        | 38 +++++++++++++++++++++++-------
 drivers/gpu/drm/i915/i915_gem_gtt.h        | 24 ++++++++++++++++++-
 6 files changed, 63 insertions(+), 15 deletions(-)

Comments

Chris Wilson Oct. 30, 2014, 4:41 p.m. UTC | #1
On Thu, Oct 30, 2014 at 04:39:36PM +0000, Tvrtko Ursulin wrote:
> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> 
> Things like reliable GGTT mappings and mirrored 3d display will need to be
> to map the same object twice into the GGTT.

What's a reliable GGTT mapping and how does it differ from today's
notoriously unreliable mappings?
-Chris
Tvrtko Ursulin Oct. 30, 2014, 4:55 p.m. UTC | #2
On 10/30/2014 04:41 PM, Chris Wilson wrote:
> On Thu, Oct 30, 2014 at 04:39:36PM +0000, Tvrtko Ursulin wrote:
>> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>
>> Things like reliable GGTT mappings and mirrored 3d display will need to be
>> to map the same object twice into the GGTT.
>
> What's a reliable GGTT mapping and how does it differ from today's
> notoriously unreliable mappings?

Ask Daniel. :P But you are probably aiming at the short commit message, 
which I justify with the RFC tag. :)

As far as I understand it idea is to allow mapping a subset of an object 
to work around the 256Mb limit of the mappable region. How it was 
described sounded a bit like VM inside a VM but I am more interested in 
the infrastructure than this particular justification.

Regards,

Tvrtko
Daniel Vetter Nov. 3, 2014, 3:58 p.m. UTC | #3
On Thu, Oct 30, 2014 at 04:39:36PM +0000, Tvrtko Ursulin wrote:
> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> 
> Things like reliable GGTT mappings and mirrored 3d display will need to be
> to map the same object twice into the GGTT.
> 
> Add a ggtt_view field per VMA and select the page view based on the type
> of the view.
> 
> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Cc: Daniel Vetter <daniel.vetter@ffwll.ch>

lgtm overall, some comments for polish in the crucial parts below.

> @@ -2189,3 +2191,21 @@ i915_gem_obj_lookup_or_create_vma(struct drm_i915_gem_object *obj,
>  
>  	return vma;
>  }
> +
> +void i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level,
> +			u32 flags)
> +{
> +	struct sg_table *pages;
> +
> +	if (vma->ggtt_view.get_pages)
> +		pages = vma->ggtt_view.get_pages(&vma->ggtt_view, vma->obj);
> +	else
> +		pages = vma->obj->pages;
> +
> +	if (pages && !IS_ERR(pages)) {
> +		vma->bind_vma(vma, pages, cache_level, flags);
> +
> +		if (vma->ggtt_view.put_pages)
> +			vma->ggtt_view.put_pages(&vma->ggtt_view);
> +	}

tbh this looks a bit like overboarding generalism to me ;-) Imo
- drop the ->put_pages callback and just free the sg table if you have
  one.
- drop teh ->get_pages callbacks and replace it with a new
  i915_vma_shuffle_pages or similar you'll call for non-standard ggtt
  views. Two reasons: shuffle is a more accurate name than get since this
  version doesn't grab it's own page references any more, and with
  currently just one internal user for this a vtable is serious overkill.

> +}
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
> index 66bc44b..cbaddda 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.h
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
> @@ -109,6 +109,23 @@ typedef gen8_gtt_pte_t gen8_ppgtt_pde_t;
>  #define GEN8_PPAT_ELLC_OVERRIDE		(0<<2)
>  #define GEN8_PPAT(i, x)			((uint64_t) (x) << ((i) * 8))
>  
> +enum i915_ggtt_view_type {
> +	I915_GGTT_VIEW_NORMAL = 0,
> +};
> +
> +struct i915_ggtt_view {
> +	enum i915_ggtt_view_type type;
> +	unsigned int vma_id;

Imo vma_id and ggtt_view_type are fairly redundant. If you _really_ want
to make this super-generic (i.e. for ppgtt) just call it gtt_view instead
fo ggtt_view. But I wouldn't bother.

Removing vma_id also removes the int, which I'd ask you to replace with an
explicit enum anyway ;-)

> +	struct sg_table *pages;

This seems unused - leftover from a previous patch which kept the sgtable
around?
-Daniel
Tvrtko Ursulin Nov. 3, 2014, 4:34 p.m. UTC | #4
On 11/03/2014 03:58 PM, Daniel Vetter wrote:
> On Thu, Oct 30, 2014 at 04:39:36PM +0000, Tvrtko Ursulin wrote:
>> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>
>> Things like reliable GGTT mappings and mirrored 3d display will need to be
>> to map the same object twice into the GGTT.
>>
>> Add a ggtt_view field per VMA and select the page view based on the type
>> of the view.
>>
>> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>> Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
>
> lgtm overall, some comments for polish in the crucial parts below.
>
>> @@ -2189,3 +2191,21 @@ i915_gem_obj_lookup_or_create_vma(struct drm_i915_gem_object *obj,
>>
>>   	return vma;
>>   }
>> +
>> +void i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level,
>> +			u32 flags)
>> +{
>> +	struct sg_table *pages;
>> +
>> +	if (vma->ggtt_view.get_pages)
>> +		pages = vma->ggtt_view.get_pages(&vma->ggtt_view, vma->obj);
>> +	else
>> +		pages = vma->obj->pages;
>> +
>> +	if (pages && !IS_ERR(pages)) {
>> +		vma->bind_vma(vma, pages, cache_level, flags);
>> +
>> +		if (vma->ggtt_view.put_pages)
>> +			vma->ggtt_view.put_pages(&vma->ggtt_view);
>> +	}
>
> tbh this looks a bit like overboarding generalism to me ;-) Imo
> - drop the ->put_pages callback and just free the sg table if you have
>    one.
> - drop teh ->get_pages callbacks and replace it with a new
>    i915_vma_shuffle_pages or similar you'll call for non-standard ggtt
>    views. Two reasons: shuffle is a more accurate name than get since this
>    version doesn't grab it's own page references any more, and with
>    currently just one internal user for this a vtable is serious overkill.

I actually thought I will need semi-persistent view for this in the 
future patch which get_pages()/put_pages() caters for.

More precisely it would be for holding onto created pages during view 
creation across the i915_gem_gtt_prepare_object and i915_vma_bind in 
i915_gem_object_bind_to_vm.

Also, ->put_pages looks much neater to me from i915_gem_vma_destroy 
rather than leaking the knowledge of internal implementation. That is of 
course if you will allow the above justification for making the 
alternative view at least semi-persistent.

As additional bonus it has the same semantics to GEM get/put_pages.

>> +}
>> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
>> index 66bc44b..cbaddda 100644
>> --- a/drivers/gpu/drm/i915/i915_gem_gtt.h
>> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
>> @@ -109,6 +109,23 @@ typedef gen8_gtt_pte_t gen8_ppgtt_pde_t;
>>   #define GEN8_PPAT_ELLC_OVERRIDE		(0<<2)
>>   #define GEN8_PPAT(i, x)			((uint64_t) (x) << ((i) * 8))
>>
>> +enum i915_ggtt_view_type {
>> +	I915_GGTT_VIEW_NORMAL = 0,
>> +};
>> +
>> +struct i915_ggtt_view {
>> +	enum i915_ggtt_view_type type;
>> +	unsigned int vma_id;
>
> Imo vma_id and ggtt_view_type are fairly redundant. If you _really_ want
> to make this super-generic (i.e. for ppgtt) just call it gtt_view instead
> fo ggtt_view. But I wouldn't bother.

So you suggest to imply VMA id to be equal to GGTT view type and 
VMA_ID_DEFAULT to be fixed to I915_GGTT_VIEW_NORMAL? Is it not a 
logistics/maintenance problem to sync the two then?

> Removing vma_id also removes the int, which I'd ask you to replace with an
> explicit enum anyway ;-)

Yes I left that for later. I mean, when at stage to be talking about 
such detail it is a happy place.

>> +	struct sg_table *pages;
>
> This seems unused - leftover from a previous patch which kept the sgtable
> around?

Yes, survived refactoring somehow, shouldn't be here.

Regards,

Tvrtko
Daniel Vetter Nov. 3, 2014, 4:52 p.m. UTC | #5
On Mon, Nov 03, 2014 at 04:34:29PM +0000, Tvrtko Ursulin wrote:
> 
> On 11/03/2014 03:58 PM, Daniel Vetter wrote:
> >On Thu, Oct 30, 2014 at 04:39:36PM +0000, Tvrtko Ursulin wrote:
> >>From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> >>
> >>Things like reliable GGTT mappings and mirrored 3d display will need to be
> >>to map the same object twice into the GGTT.
> >>
> >>Add a ggtt_view field per VMA and select the page view based on the type
> >>of the view.
> >>
> >>Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> >>Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
> >
> >lgtm overall, some comments for polish in the crucial parts below.
> >
> >>@@ -2189,3 +2191,21 @@ i915_gem_obj_lookup_or_create_vma(struct drm_i915_gem_object *obj,
> >>
> >>  	return vma;
> >>  }
> >>+
> >>+void i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level,
> >>+			u32 flags)
> >>+{
> >>+	struct sg_table *pages;
> >>+
> >>+	if (vma->ggtt_view.get_pages)
> >>+		pages = vma->ggtt_view.get_pages(&vma->ggtt_view, vma->obj);
> >>+	else
> >>+		pages = vma->obj->pages;
> >>+
> >>+	if (pages && !IS_ERR(pages)) {
> >>+		vma->bind_vma(vma, pages, cache_level, flags);
> >>+
> >>+		if (vma->ggtt_view.put_pages)
> >>+			vma->ggtt_view.put_pages(&vma->ggtt_view);
> >>+	}
> >
> >tbh this looks a bit like overboarding generalism to me ;-) Imo
> >- drop the ->put_pages callback and just free the sg table if you have
> >   one.
> >- drop teh ->get_pages callbacks and replace it with a new
> >   i915_vma_shuffle_pages or similar you'll call for non-standard ggtt
> >   views. Two reasons: shuffle is a more accurate name than get since this
> >   version doesn't grab it's own page references any more, and with
> >   currently just one internal user for this a vtable is serious overkill.
> 
> I actually thought I will need semi-persistent view for this in the future
> patch which get_pages()/put_pages() caters for.
> 
> More precisely it would be for holding onto created pages during view
> creation across the i915_gem_gtt_prepare_object and i915_vma_bind in
> i915_gem_object_bind_to_vm.
> 
> Also, ->put_pages looks much neater to me from i915_gem_vma_destroy rather
> than leaking the knowledge of internal implementation. That is of course if
> you will allow the above justification for making the alternative view at
> least semi-persistent.

I don't see why the view needs to be semi-persistent. And it certainly
doesn't work by attaching the sg table to the vma, since then the pages
can't survive the vma. And the vma is already cached (i.e. we only ever
throw them away lazily).

And I don't hink caching it longer than the vma is useful since when we
throw away the vma that's usually a much bigger performance disaster,
often involving stalls and chasing down backing storage again. Allocating
a puny sg table and filling it again isn't a lot of work. Especially since
this is only for uncommon cases like scanout buffers or ggtt mmaps thus
far.

> As additional bonus it has the same semantics to GEM get/put_pages.

Well, this isn't the same thing. The _really_ crucial part of
get/put_pages is to grab a reference off the backing storage pages, to
make sure they don't get swapped out.

The comes prepare/finish_gtt, whose job it is to ensure that those pages
are mapped into the VT-d iommu.

Finally there's the job of shuffling the resulting dma_addr_t pages from
prepare_gtt just for the insert_entries/vma_bind functions. You're about
two layers away from get/put_pages and this shuffled sg table here is
really only needed to make insert_entries happy. And not even for the
duration of the entire gtt.
> 
> >>+}
> >>diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
> >>index 66bc44b..cbaddda 100644
> >>--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
> >>+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
> >>@@ -109,6 +109,23 @@ typedef gen8_gtt_pte_t gen8_ppgtt_pde_t;
> >>  #define GEN8_PPAT_ELLC_OVERRIDE		(0<<2)
> >>  #define GEN8_PPAT(i, x)			((uint64_t) (x) << ((i) * 8))
> >>
> >>+enum i915_ggtt_view_type {
> >>+	I915_GGTT_VIEW_NORMAL = 0,
> >>+};
> >>+
> >>+struct i915_ggtt_view {
> >>+	enum i915_ggtt_view_type type;
> >>+	unsigned int vma_id;
> >
> >Imo vma_id and ggtt_view_type are fairly redundant. If you _really_ want
> >to make this super-generic (i.e. for ppgtt) just call it gtt_view instead
> >fo ggtt_view. But I wouldn't bother.
> 
> So you suggest to imply VMA id to be equal to GGTT view type and
> VMA_ID_DEFAULT to be fixed to I915_GGTT_VIEW_NORMAL? Is it not a
> logistics/maintenance problem to sync the two then?

Well if we kill one of them completely there's no syncing required, which
is what I think we want. Having two will be a nightmare, and I don't see
any use a few years out even for non-ggtt special views.

We can always add more complexity later on if needed.

> >Removing vma_id also removes the int, which I'd ask you to replace with an
> >explicit enum anyway ;-)
> 
> Yes I left that for later. I mean, when at stage to be talking about such
> detail it is a happy place.

I'm not a fan of preemptive generalization - it tends to be the wrong one
and hurt doubly in the future since you have to remove the wrong one first
before you can add the right stuff.
-Daniel
Tvrtko Ursulin Nov. 3, 2014, 5:20 p.m. UTC | #6
On 11/03/2014 04:52 PM, Daniel Vetter wrote:
> On Mon, Nov 03, 2014 at 04:34:29PM +0000, Tvrtko Ursulin wrote:
>>
>> On 11/03/2014 03:58 PM, Daniel Vetter wrote:
>>> On Thu, Oct 30, 2014 at 04:39:36PM +0000, Tvrtko Ursulin wrote:
>>>> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>>>
>>>> Things like reliable GGTT mappings and mirrored 3d display will need to be
>>>> to map the same object twice into the GGTT.
>>>>
>>>> Add a ggtt_view field per VMA and select the page view based on the type
>>>> of the view.
>>>>
>>>> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>>> Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
>>>
>>> lgtm overall, some comments for polish in the crucial parts below.
>>>
>>>> @@ -2189,3 +2191,21 @@ i915_gem_obj_lookup_or_create_vma(struct drm_i915_gem_object *obj,
>>>>
>>>>   	return vma;
>>>>   }
>>>> +
>>>> +void i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level,
>>>> +			u32 flags)
>>>> +{
>>>> +	struct sg_table *pages;
>>>> +
>>>> +	if (vma->ggtt_view.get_pages)
>>>> +		pages = vma->ggtt_view.get_pages(&vma->ggtt_view, vma->obj);
>>>> +	else
>>>> +		pages = vma->obj->pages;
>>>> +
>>>> +	if (pages && !IS_ERR(pages)) {
>>>> +		vma->bind_vma(vma, pages, cache_level, flags);
>>>> +
>>>> +		if (vma->ggtt_view.put_pages)
>>>> +			vma->ggtt_view.put_pages(&vma->ggtt_view);
>>>> +	}
>>>
>>> tbh this looks a bit like overboarding generalism to me ;-) Imo
>>> - drop the ->put_pages callback and just free the sg table if you have
>>>    one.
>>> - drop teh ->get_pages callbacks and replace it with a new
>>>    i915_vma_shuffle_pages or similar you'll call for non-standard ggtt
>>>    views. Two reasons: shuffle is a more accurate name than get since this
>>>    version doesn't grab it's own page references any more, and with
>>>    currently just one internal user for this a vtable is serious overkill.
>>
>> I actually thought I will need semi-persistent view for this in the future
>> patch which get_pages()/put_pages() caters for.
>>
>> More precisely it would be for holding onto created pages during view
>> creation across the i915_gem_gtt_prepare_object and i915_vma_bind in
>> i915_gem_object_bind_to_vm.
>>
>> Also, ->put_pages looks much neater to me from i915_gem_vma_destroy rather
>> than leaking the knowledge of internal implementation. That is of course if
>> you will allow the above justification for making the alternative view at
>> least semi-persistent.
>
> I don't see why the view needs to be semi-persistent. And it certainly
> doesn't work by attaching the sg table to the vma, since then the pages
> can't survive the vma. And the vma is already cached (i.e. we only ever
> throw them away lazily).
 >
> And I don't hink caching it longer than the vma is useful since when we
> throw away the vma that's usually a much bigger performance disaster,
> often involving stalls and chasing down backing storage again. Allocating
> a puny sg table and filling it again isn't a lot of work. Especially since
> this is only for uncommon cases like scanout buffers or ggtt mmaps thus
> far.

Ah alright, think I see what you mean. Hm.. to explain once more why I 
put that in...

At the moment it is just the means of allow the alternative GGTT view to 
exists (as in sg_table) for the duration of i915_gem_object_bind_to_vm.

Because it does a two stage process there; the gtt_prepare_object and 
insert entries.

I did not like your idea, well I did not think it is feasible - as in 
easily doable, of stealing the DMA addresses since the SG tables between 
view don't have a 1:1 relationship in number of chunk/pages.

So maybe I am missing something, but to me it looked like a handy way of 
achieving that goal.

You don't like the fact that my put_pages is done only in 
i915_gem_vma_destroy in this patch, which only happens lazily, correct?

If the new get/put_pages calls on the view were done explicitly in 
i915_gem_object_bind_to_vm would that be any better? That would show 
explicitly that the SG table is a throw away.

>> So you suggest to imply VMA id to be equal to GGTT view type and
>> VMA_ID_DEFAULT to be fixed to I915_GGTT_VIEW_NORMAL? Is it not a
>> logistics/maintenance problem to sync the two then?
>
> Well if we kill one of them completely there's no syncing required, which
> is what I think we want. Having two will be a nightmare, and I don't see
> any use a few years out even for non-ggtt special views.
>
> We can always add more complexity later on if needed.

Ok I don't see it at the moment since they are two different concepts to 
me but I'll think about it.

>>> Removing vma_id also removes the int, which I'd ask you to replace with an
>>> explicit enum anyway ;-)
>>
>> Yes I left that for later. I mean, when at stage to be talking about such
>> detail it is a happy place.
>
> I'm not a fan of preemptive generalization - it tends to be the wrong one
> and hurt doubly in the future since you have to remove the wrong one first
> before you can add the right stuff.

In general :), as always it is the question of getting the balance 
right. Because the opposite can also happen and maybe even has in some 
parts of the driver.

Anyway, that's why I didn't bother with polishing the enums etc. :)

Regards,

Tvrtko
Daniel Vetter Nov. 3, 2014, 5:29 p.m. UTC | #7
On Mon, Nov 3, 2014 at 6:20 PM, Tvrtko Ursulin
<tvrtko.ursulin@linux.intel.com> wrote:
> I did not like your idea, well I did not think it is feasible - as in easily
> doable, of stealing the DMA addresses since the SG tables between view don't
> have a 1:1 relationship in number of chunk/pages.

Ok, so sg table coalescing is getting in the way. On the source sg
table stored in obj->pages we can fix this by using one of the
per-page sg table walkers - they'll take care of all the annoying
details. See how we walk the sg tables in the pte insert fucntions for
examples.

On the new target sg table I'd simply not bother with merging and
allocate a full sg table with sg_alloc_table. Due to the rotation
there won't be a lot of contiguous stuff anyway.

That leaves the ugly problem of resorting the table without going
nuts. Either allocate a temporary array (allocated with drm_malloc_ab
since kmalloc won't be enough for this) of dma_addr_t and use your
approach. Or just walk the sg_tables row-vise on the rotate layout and
only fill in the relevant holes for an O(rows*num_pages) complexity. I
don't expect anyone to actually notice this little inefficient tbh ;-)

In short, I don't see a problem ;-)
-Daniel
Daniel Vetter Nov. 3, 2014, 5:35 p.m. UTC | #8
On Mon, Nov 3, 2014 at 6:20 PM, Tvrtko Ursulin
<tvrtko.ursulin@linux.intel.com> wrote:
>> I'm not a fan of preemptive generalization - it tends to be the wrong one
>> and hurt doubly in the future since you have to remove the wrong one first
>> before you can add the right stuff.
>
>
> In general :), as always it is the question of getting the balance right.
> Because the opposite can also happen and maybe even has in some parts of the
> driver.

Well there are ugly corners, but generally just areas where the proper
abstraction wasn't yet clear. Or areas where we've never gotten around
to implement what we discussed we wanted. The few cases where we've
had overgeneralization tended to hurt us badly and actually the
reasons behind some of the cruft in GEM.

> Anyway, that's why I didn't bother with polishing the enums etc. :)

Well I think if we do add some new abstraction it should be polished.
The point of abstraction is to hide details, and that's only possible
if the abstraction is clean&polished and doesn't leak in any
direction. Given that I've mastered in advanced abstract nonsene (i.e.
math) I might have a bit a screwed view here ;-)
-Daniel
Tvrtko Ursulin Nov. 4, 2014, 10:25 a.m. UTC | #9
On 11/03/2014 05:35 PM, Daniel Vetter wrote:
>> Anyway, that's why I didn't bother with polishing the enums etc. :)
>
> Well I think if we do add some new abstraction it should be polished.
> The point of abstraction is to hide details, and that's only possible
> if the abstraction is clean&polished and doesn't leak in any
> direction. Given that I've mastered in advanced abstract nonsene (i.e.
> math) I might have a bit a screwed view here ;-)

Don't think we were on the same page there. I just said I didn't bother 
with polish (int vs enum) _while_ the whole thing is in the design RFC 
stage. Not that it doesn't need to be polished when it's done.

Tvrtko
Tvrtko Ursulin Nov. 4, 2014, 11:15 a.m. UTC | #10
On 11/03/2014 05:29 PM, Daniel Vetter wrote:
> On Mon, Nov 3, 2014 at 6:20 PM, Tvrtko Ursulin
> <tvrtko.ursulin@linux.intel.com> wrote:
>> I did not like your idea, well I did not think it is feasible - as in easily
>> doable, of stealing the DMA addresses since the SG tables between view don't
>> have a 1:1 relationship in number of chunk/pages.
>
> Ok, so sg table coalescing is getting in the way. On the source sg
> table stored in obj->pages we can fix this by using one of the
> per-page sg table walkers - they'll take care of all the annoying
> details. See how we walk the sg tables in the pte insert fucntions for
> examples.

Obviously I am doing the same elsewhere...

> On the new target sg table I'd simply not bother with merging and
> allocate a full sg table with sg_alloc_table. Due to the rotation
> there won't be a lot of contiguous stuff anyway.

For things like mirrored 2d/3d still it could save space. And it kind of 
feels suboptimal to special case and add low level code when there is a 
nice helper functions which handles it all.

> That leaves the ugly problem of resorting the table without going
> nuts. Either allocate a temporary array (allocated with drm_malloc_ab
> since kmalloc won't be enough for this) of dma_addr_t and use your
> approach. Or just walk the sg_tables row-vise on the rotate layout and
> only fill in the relevant holes for an O(rows*num_pages) complexity. I
> don't expect anyone to actually notice this little inefficient tbh ;-)
>
> In short, I don't see a problem ;-)

I just find it a bit ugly and hackish to poke in sg internals only to 
avoid having get_pages/put_pages (Or call them differently if the name 
bothers you the most. get_page_view/put_page_view perhaps?), even if the 
lifetime of that temporary sg_table is limited to VMA creation (as it in 
fact was in my patch).

So I suppose while you see a vtable as a "serious overkill", I saw it as 
a way to avoid messing around in sg internals and reuse existing code. 
Or in other words:

if (ggtt_view->type == NORMAL)
   pages = get_normal();
else if(ggtt_view->type == X1)
   pages = get_x1();
else if(ggtt_view->type == X2)
    pages = get_x2();

etc.. rather than simply:

if (ggtt_view->get_page_view)
   pages = ggtt_view->get_page_view)
else
    pages = obj->pages;

And be done with it. But of course my chances or merging this are slim 
unless I satisfy your taste so please let me know if your stance here is 
still "unshaken".

Also on the question of getting rid of one of VMA id and ggtt_view type, 
how exactly did you envisage that? Simply to imply that the only users 
of multiple VMAs are the GGTT views?

Regards,

Tvrtko
Daniel Vetter Nov. 4, 2014, 11:23 a.m. UTC | #11
On Tue, Nov 04, 2014 at 11:15:16AM +0000, Tvrtko Ursulin wrote:
> 
> On 11/03/2014 05:29 PM, Daniel Vetter wrote:
> >On Mon, Nov 3, 2014 at 6:20 PM, Tvrtko Ursulin
> ><tvrtko.ursulin@linux.intel.com> wrote:
> >>I did not like your idea, well I did not think it is feasible - as in easily
> >>doable, of stealing the DMA addresses since the SG tables between view don't
> >>have a 1:1 relationship in number of chunk/pages.
> >
> >Ok, so sg table coalescing is getting in the way. On the source sg
> >table stored in obj->pages we can fix this by using one of the
> >per-page sg table walkers - they'll take care of all the annoying
> >details. See how we walk the sg tables in the pte insert fucntions for
> >examples.
> 
> Obviously I am doing the same elsewhere...
> 
> >On the new target sg table I'd simply not bother with merging and
> >allocate a full sg table with sg_alloc_table. Due to the rotation
> >there won't be a lot of contiguous stuff anyway.
> 
> For things like mirrored 2d/3d still it could save space. And it kind of
> feels suboptimal to special case and add low level code when there is a nice
> helper functions which handles it all.

If space would be a concern we wouldn't use sg tables. They have a lot of
fluff. Also afaik there's not sg interface to create an sg table from a
list of dma_addr_t, which is what we want.

> >That leaves the ugly problem of resorting the table without going
> >nuts. Either allocate a temporary array (allocated with drm_malloc_ab
> >since kmalloc won't be enough for this) of dma_addr_t and use your
> >approach. Or just walk the sg_tables row-vise on the rotate layout and
> >only fill in the relevant holes for an O(rows*num_pages) complexity. I
> >don't expect anyone to actually notice this little inefficient tbh ;-)
> >
> >In short, I don't see a problem ;-)
> 
> I just find it a bit ugly and hackish to poke in sg internals only to avoid
> having get_pages/put_pages (Or call them differently if the name bothers you
> the most. get_page_view/put_page_view perhaps?), even if the lifetime of
> that temporary sg_table is limited to VMA creation (as it in fact was in my
> patch).

sg tables are just data structures for drivers to map dma memory ranges.
As a driver your supposed to noodle around in them. So maybe I don't
understand your concern?

Wrt naming, get/put imply reference counting, which this doesn't do.
create_page_view/free_page_view I think are better names. Not sure about
the page_view name though.

> So I suppose while you see a vtable as a "serious overkill", I saw it as a
> way to avoid messing around in sg internals and reuse existing code. Or in
> other words:
> 
> if (ggtt_view->type == NORMAL)
>   pages = get_normal();
> else if(ggtt_view->type == X1)
>   pages = get_x1();
> else if(ggtt_view->type == X2)
>    pages = get_x2();
> 
> etc.. rather than simply:
> 
> if (ggtt_view->get_page_view)
>   pages = ggtt_view->get_page_view)
> else
>    pages = obj->pages;
> 
> And be done with it. But of course my chances or merging this are slim
> unless I satisfy your taste so please let me know if your stance here is
> still "unshaken".

Well imo vfuncs are a pain, so we generally add them only when there's
more than 2 implementations. Atm we have one. So if the other two show up
then I'm ok, but before that it just makes chasing callchains more
annoying.

> Also on the question of getting rid of one of VMA id and ggtt_view type, how
> exactly did you envisage that? Simply to imply that the only users of
> multiple VMAs are the GGTT views?

Yeah. We don't have anything else, and I don't expect anything else
really.
-Daniel
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index a830b85..92dec19 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2518,6 +2518,9 @@  int __must_check i915_gem_object_pin(struct drm_i915_gem_object *obj,
 				     struct i915_address_space *vm,
 				     uint32_t alignment,
 				     uint64_t flags);
+
+void i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level,
+			u32 flags);
 int __must_check i915_vma_unbind(struct i915_vma *vma);
 int i915_gem_object_put_pages(struct drm_i915_gem_object *obj);
 void i915_gem_release_all_mmaps(struct drm_i915_private *dev_priv);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 7f283d8..d7027f9 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3487,7 +3487,7 @@  search_free:
 	WARN_ON(flags & PIN_MAPPABLE && !obj->map_and_fenceable);
 
 	trace_i915_vma_bind(vma, flags);
-	vma->bind_vma(vma, obj->cache_level,
+	i915_vma_bind(vma, obj->cache_level,
 		      flags & (PIN_MAPPABLE | PIN_GLOBAL) ? GLOBAL_BIND : 0);
 
 	return vma;
@@ -3695,7 +3695,7 @@  int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
 
 		list_for_each_entry(vma, &obj->vma_list, vma_link)
 			if (drm_mm_node_allocated(&vma->node))
-				vma->bind_vma(vma, cache_level,
+				i915_vma_bind(vma, cache_level,
 						vma->bound & GLOBAL_BIND);
 	}
 
@@ -4093,7 +4093,7 @@  i915_gem_object_pin(struct drm_i915_gem_object *obj,
 	}
 
 	if (flags & PIN_GLOBAL && !(vma->bound & GLOBAL_BIND))
-		vma->bind_vma(vma, obj->cache_level, GLOBAL_BIND);
+		i915_vma_bind(vma, obj->cache_level, GLOBAL_BIND);
 
 	vma->pin_count++;
 	if (flags & PIN_MAPPABLE)
@@ -4511,6 +4511,9 @@  void i915_gem_vma_destroy(struct i915_vma *vma)
 
 	list_del(&vma->vma_link);
 
+	if (vma->ggtt_view.put_pages)
+		vma->ggtt_view.put_pages(&vma->ggtt_view);
+
 	kfree(vma);
 }
 
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 5ff6e94..d365348 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -577,7 +577,7 @@  static int do_switch(struct intel_engine_cs *ring,
 
 	vma = i915_gem_obj_to_ggtt(to->legacy_hw_ctx.rcs_state);
 	if (!(vma->bound & GLOBAL_BIND))
-		vma->bind_vma(vma, to->legacy_hw_ctx.rcs_state->cache_level,
+		i915_vma_bind(vma, to->legacy_hw_ctx.rcs_state->cache_level,
 				GLOBAL_BIND);
 
 	if (!to->legacy_hw_ctx.initialized || i915_gem_context_is_default(to))
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 8d56d5b..5ba9440 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -358,7 +358,7 @@  i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj,
 	if (unlikely(IS_GEN6(dev) &&
 	    reloc->write_domain == I915_GEM_DOMAIN_INSTRUCTION &&
 	    !(target_vma->bound & GLOBAL_BIND)))
-		target_vma->bind_vma(target_vma, target_i915_obj->cache_level,
+		i915_vma_bind(target_vma, target_i915_obj->cache_level,
 				GLOBAL_BIND);
 
 	/* Validate that the target is in a valid r/w GPU domain */
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index e4ee2ac..c0e7cd9 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -30,6 +30,8 @@ 
 #include "i915_trace.h"
 #include "intel_drv.h"
 
+const struct i915_ggtt_view i915_ggtt_view_normal;
+
 static void bdw_setup_private_ppat(struct drm_i915_private *dev_priv);
 static void chv_setup_private_ppat(struct drm_i915_private *dev_priv);
 
@@ -71,7 +73,7 @@  static int sanitize_enable_ppgtt(struct drm_device *dev, int enable_ppgtt)
 }
 
 
-static void ppgtt_bind_vma(struct i915_vma *vma,
+static void ppgtt_bind_vma(struct i915_vma *vma, struct sg_table *pages,
 			   enum i915_cache_level cache_level,
 			   u32 flags);
 static void ppgtt_unbind_vma(struct i915_vma *vma);
@@ -1194,7 +1196,7 @@  void  i915_ppgtt_release(struct kref *kref)
 }
 
 static void
-ppgtt_bind_vma(struct i915_vma *vma,
+ppgtt_bind_vma(struct i915_vma *vma, struct sg_table *pages,
 	       enum i915_cache_level cache_level,
 	       u32 flags)
 {
@@ -1202,7 +1204,7 @@  ppgtt_bind_vma(struct i915_vma *vma,
 	if (vma->obj->gt_ro)
 		flags |= PTE_READ_ONLY;
 
-	vma->vm->insert_entries(vma->vm, vma->obj->pages, vma->node.start,
+	vma->vm->insert_entries(vma->vm, pages, vma->node.start,
 				cache_level, flags);
 }
 
@@ -1325,7 +1327,7 @@  void i915_gem_restore_gtt_mappings(struct drm_device *dev)
 		 * without telling our object about it. So we need to fake it.
 		 */
 		vma->bound &= ~GLOBAL_BIND;
-		vma->bind_vma(vma, obj->cache_level, GLOBAL_BIND);
+		i915_vma_bind(vma, obj->cache_level, GLOBAL_BIND);
 	}
 
 
@@ -1511,7 +1513,7 @@  static void gen6_ggtt_clear_range(struct i915_address_space *vm,
 }
 
 
-static void i915_ggtt_bind_vma(struct i915_vma *vma,
+static void i915_ggtt_bind_vma(struct i915_vma *vma, struct sg_table *pages,
 			       enum i915_cache_level cache_level,
 			       u32 unused)
 {
@@ -1520,7 +1522,7 @@  static void i915_ggtt_bind_vma(struct i915_vma *vma,
 		AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY;
 
 	BUG_ON(!i915_is_ggtt(vma->vm));
-	intel_gtt_insert_sg_entries(vma->obj->pages, entry, flags);
+	intel_gtt_insert_sg_entries(pages, entry, flags);
 	vma->bound = GLOBAL_BIND;
 }
 
@@ -1544,7 +1546,7 @@  static void i915_ggtt_unbind_vma(struct i915_vma *vma)
 	intel_gtt_clear_range(first, size);
 }
 
-static void ggtt_bind_vma(struct i915_vma *vma,
+static void ggtt_bind_vma(struct i915_vma *vma, struct sg_table *pages,
 			  enum i915_cache_level cache_level,
 			  u32 flags)
 {
@@ -1570,7 +1572,7 @@  static void ggtt_bind_vma(struct i915_vma *vma,
 	if (!dev_priv->mm.aliasing_ppgtt || flags & GLOBAL_BIND) {
 		if (!(vma->bound & GLOBAL_BIND) ||
 		    (cache_level != obj->cache_level)) {
-			vma->vm->insert_entries(vma->vm, obj->pages,
+			vma->vm->insert_entries(vma->vm, pages,
 						vma->node.start,
 						cache_level, flags);
 			vma->bound |= GLOBAL_BIND;
@@ -1582,7 +1584,7 @@  static void ggtt_bind_vma(struct i915_vma *vma,
 	     (cache_level != obj->cache_level))) {
 		struct i915_hw_ppgtt *appgtt = dev_priv->mm.aliasing_ppgtt;
 		appgtt->base.insert_entries(&appgtt->base,
-					    vma->obj->pages,
+					    pages,
 					    vma->node.start,
 					    cache_level, flags);
 		vma->bound |= LOCAL_BIND;
@@ -2189,3 +2191,21 @@  i915_gem_obj_lookup_or_create_vma(struct drm_i915_gem_object *obj,
 
 	return vma;
 }
+
+void i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level,
+			u32 flags)
+{
+	struct sg_table *pages;
+
+	if (vma->ggtt_view.get_pages)
+		pages = vma->ggtt_view.get_pages(&vma->ggtt_view, vma->obj);
+	else
+		pages = vma->obj->pages;
+
+	if (pages && !IS_ERR(pages)) {
+		vma->bind_vma(vma, pages, cache_level, flags);
+
+		if (vma->ggtt_view.put_pages)
+			vma->ggtt_view.put_pages(&vma->ggtt_view);
+	}
+}
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index 66bc44b..cbaddda 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -109,6 +109,23 @@  typedef gen8_gtt_pte_t gen8_ppgtt_pde_t;
 #define GEN8_PPAT_ELLC_OVERRIDE		(0<<2)
 #define GEN8_PPAT(i, x)			((uint64_t) (x) << ((i) * 8))
 
+enum i915_ggtt_view_type {
+	I915_GGTT_VIEW_NORMAL = 0,
+};
+
+struct i915_ggtt_view {
+	enum i915_ggtt_view_type type;
+	unsigned int vma_id;
+	struct sg_table *pages;
+
+	struct sg_table *(*get_pages)(struct i915_ggtt_view *ggtt_view,
+				      struct drm_i915_gem_object *obj);
+
+	void (*put_pages)(struct i915_ggtt_view *ggtt_view);
+};
+
+extern const struct i915_ggtt_view i915_ggtt_view_normal;
+
 enum i915_cache_level;
 /**
  * A VMA represents a GEM BO that is bound into an address space. Therefore, a
@@ -136,6 +153,11 @@  struct i915_vma {
 #define VMA_ID_DEFAULT	(0)
 	unsigned int id;
 
+	/**
+	 * Support different GGTT views into the same object.
+	 */
+	struct i915_ggtt_view ggtt_view;
+
 	/** This object's place on the active/inactive lists */
 	struct list_head mm_list;
 
@@ -168,7 +190,7 @@  struct i915_vma {
 	 * setting the valid PTE entries to a reserved scratch page. */
 	void (*unbind_vma)(struct i915_vma *vma);
 	/* Map an object into an address space with the given cache flags. */
-	void (*bind_vma)(struct i915_vma *vma,
+	void (*bind_vma)(struct i915_vma *vma, struct sg_table *pages,
 			 enum i915_cache_level cache_level,
 			 u32 flags);
 };