diff mbox

[RFC,1/2] drm/i915: Move ioremap_wc tracking onto VMA

Message ID 1460476612-10427-1-git-send-email-tvrtko.ursulin@linux.intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Tvrtko Ursulin April 12, 2016, 3:56 p.m. UTC
From: Chris Wilson <chris@chris-wilson.co.uk>

By tracking the iomapping on the VMA itself, we can share that area
between multiple users. Also by only revoking the iomapping upon
unbinding from the mappable portion of the GGTT, we can keep that iomap
across multiple invocations (e.g. execlists context pinning).

v2:
  * Rebase on nightly;
  * added kerneldoc. (Tvrtko Ursulin)

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/i915_gem.c     |  2 ++
 drivers/gpu/drm/i915/i915_gem_gtt.c | 38 +++++++++++++++++++++++++++++++++++++
 drivers/gpu/drm/i915/i915_gem_gtt.h | 38 +++++++++++++++++++++++++++++++++++++
 drivers/gpu/drm/i915/intel_fbdev.c  |  8 +++-----
 4 files changed, 81 insertions(+), 5 deletions(-)

Comments

Chris Wilson April 12, 2016, 4:21 p.m. UTC | #1
On Tue, Apr 12, 2016 at 04:56:51PM +0100, Tvrtko Ursulin wrote:
> From: Chris Wilson <chris@chris-wilson.co.uk>
> 
> By tracking the iomapping on the VMA itself, we can share that area
> between multiple users. Also by only revoking the iomapping upon
> unbinding from the mappable portion of the GGTT, we can keep that iomap
> across multiple invocations (e.g. execlists context pinning).
> 
> v2:
>   * Rebase on nightly;
>   * added kerneldoc. (Tvrtko Ursulin)
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> ---
>  drivers/gpu/drm/i915/i915_gem.c     |  2 ++
>  drivers/gpu/drm/i915/i915_gem_gtt.c | 38 +++++++++++++++++++++++++++++++++++++
>  drivers/gpu/drm/i915/i915_gem_gtt.h | 38 +++++++++++++++++++++++++++++++++++++
>  drivers/gpu/drm/i915/intel_fbdev.c  |  8 +++-----
>  4 files changed, 81 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index b37ffea8b458..6a485630595e 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -3393,6 +3393,8 @@ static int __i915_vma_unbind(struct i915_vma *vma, bool wait)
>  		ret = i915_gem_object_put_fence(obj);
>  		if (ret)
>  			return ret;
> +
> +		i915_vma_iounmap(vma);
>  	}
>  
>  	trace_i915_vma_unbind(vma);
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> index c5cb04907525..b2a8a14e8dcb 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> @@ -3626,3 +3626,41 @@ i915_ggtt_view_size(struct drm_i915_gem_object *obj,
>  		return obj->base.size;
>  	}
>  }
> +
> +void *i915_vma_iomap(struct drm_i915_private *dev_priv,
> +		     struct i915_vma *vma)
> +{
> +	struct drm_i915_gem_object *obj = vma->obj;
> +	struct i915_ggtt *ggtt = &dev_priv->ggtt;
> +
> +	if (WARN_ON(!obj->map_and_fenceable))
> +		return ERR_PTR(-ENODEV);
> +
> +	BUG_ON(!vma->is_ggtt);
> +	BUG_ON(vma->ggtt_view.type != I915_GGTT_VIEW_NORMAL);
> +	BUG_ON((vma->bound & GLOBAL_BIND) == 0);
> +
> +	if (vma->iomap == NULL) {
> +		void *ptr;

We could extract ggtt from the is_ggtt vma->vm that would remove the
dev_priv parameter and make the callers a bit tidier.

static inline struct i915_ggtt *to_ggtt(struct i915_address_space *vm)
{
	BUG_ON(!i915_is_ggtt(vm));
	return container_of(vm, struct i915_ggtt, base);
}

> +
> +		ptr = ioremap_wc(ggtt->mappable_base + vma->node.start,
> +				 obj->base.size);
> +		if (ptr == NULL) {
> +			int ret;
> +
> +			/* Too many areas already allocated? */
> +			ret = i915_gem_evict_vm(vma->vm, true);
> +			if (ret)
> +				return ERR_PTR(ret);
> +
> +			ptr = ioremap_wc(ggtt->mappable_base + vma->node.start,
> +					 obj->base.size);

No, we really don't want to create a new ioremap for every caller when
we already have one, ggtt->mappable. Hence,
    io-mapping: Specify mapping size for io_mapping_map_wc
being its preceeding patch. The difference is huge on Braswell.
-Chris
Tvrtko Ursulin April 13, 2016, 8:45 a.m. UTC | #2
On 12/04/16 17:21, Chris Wilson wrote:
> On Tue, Apr 12, 2016 at 04:56:51PM +0100, Tvrtko Ursulin wrote:
>> From: Chris Wilson <chris@chris-wilson.co.uk>
>>
>> By tracking the iomapping on the VMA itself, we can share that area
>> between multiple users. Also by only revoking the iomapping upon
>> unbinding from the mappable portion of the GGTT, we can keep that iomap
>> across multiple invocations (e.g. execlists context pinning).
>>
>> v2:
>>    * Rebase on nightly;
>>    * added kerneldoc. (Tvrtko Ursulin)
>>
>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>> ---
>>   drivers/gpu/drm/i915/i915_gem.c     |  2 ++
>>   drivers/gpu/drm/i915/i915_gem_gtt.c | 38 +++++++++++++++++++++++++++++++++++++
>>   drivers/gpu/drm/i915/i915_gem_gtt.h | 38 +++++++++++++++++++++++++++++++++++++
>>   drivers/gpu/drm/i915/intel_fbdev.c  |  8 +++-----
>>   4 files changed, 81 insertions(+), 5 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
>> index b37ffea8b458..6a485630595e 100644
>> --- a/drivers/gpu/drm/i915/i915_gem.c
>> +++ b/drivers/gpu/drm/i915/i915_gem.c
>> @@ -3393,6 +3393,8 @@ static int __i915_vma_unbind(struct i915_vma *vma, bool wait)
>>   		ret = i915_gem_object_put_fence(obj);
>>   		if (ret)
>>   			return ret;
>> +
>> +		i915_vma_iounmap(vma);
>>   	}
>>
>>   	trace_i915_vma_unbind(vma);
>> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
>> index c5cb04907525..b2a8a14e8dcb 100644
>> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
>> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
>> @@ -3626,3 +3626,41 @@ i915_ggtt_view_size(struct drm_i915_gem_object *obj,
>>   		return obj->base.size;
>>   	}
>>   }
>> +
>> +void *i915_vma_iomap(struct drm_i915_private *dev_priv,
>> +		     struct i915_vma *vma)
>> +{
>> +	struct drm_i915_gem_object *obj = vma->obj;
>> +	struct i915_ggtt *ggtt = &dev_priv->ggtt;
>> +
>> +	if (WARN_ON(!obj->map_and_fenceable))
>> +		return ERR_PTR(-ENODEV);
>> +
>> +	BUG_ON(!vma->is_ggtt);
>> +	BUG_ON(vma->ggtt_view.type != I915_GGTT_VIEW_NORMAL);
>> +	BUG_ON((vma->bound & GLOBAL_BIND) == 0);
>> +
>> +	if (vma->iomap == NULL) {
>> +		void *ptr;
>
> We could extract ggtt from the is_ggtt vma->vm that would remove the
> dev_priv parameter and make the callers a bit tidier.
>
> static inline struct i915_ggtt *to_ggtt(struct i915_address_space *vm)
> {
> 	BUG_ON(!i915_is_ggtt(vm));
> 	return container_of(vm, struct i915_ggtt, base);
> }
>
>> +
>> +		ptr = ioremap_wc(ggtt->mappable_base + vma->node.start,
>> +				 obj->base.size);
>> +		if (ptr == NULL) {
>> +			int ret;
>> +
>> +			/* Too many areas already allocated? */
>> +			ret = i915_gem_evict_vm(vma->vm, true);
>> +			if (ret)
>> +				return ERR_PTR(ret);
>> +
>> +			ptr = ioremap_wc(ggtt->mappable_base + vma->node.start,
>> +					 obj->base.size);
>
> No, we really don't want to create a new ioremap for every caller when
> we already have one, ggtt->mappable. Hence,
>      io-mapping: Specify mapping size for io_mapping_map_wc
> being its preceeding patch. The difference is huge on Braswell.

I was following the principle of least change for this one since it does 
remain the same in that respect as the current code. Goal was to unblock 
the GuC early unpin / execlist no retired queue series and not get into 
the trap of inflating the dependencies too much. I thought to add this 
and default context cleanup but you keep adding things to the pile. :)

I'll have a look at your io_mapping stuff to see how much it would add 
to the series. Remember I said I'll add the stable ctx id patches as 
well. Do we need to come up with a plan here?

I could have two separate series to simplify dependencies a bit:

  1. GuC premature unpin and
  2. execlist no retired queue.

The stack for the first one could look like (not as patches but 
conceptually):

  1. default context cleanup
  2. any io_mapping patches of yours
  3. i915_vma_iomap or WC pin_map as you suggested in the other reply.
  4. previous / pinned context

Execlist no retired queue would then be:

  1. stable ctx id
  2. removal of i915_gem_request_unreference__unlocked
  3. execlist no retired queue

If I did not forget about something.

At any point in any series we could add things like i915_gem_request.c 
or those patches which move around the context init.

Regards,

Tvrtko
Chris Wilson April 13, 2016, 9:04 a.m. UTC | #3
On Wed, Apr 13, 2016 at 09:45:03AM +0100, Tvrtko Ursulin wrote:
> 
> On 12/04/16 17:21, Chris Wilson wrote:
> >On Tue, Apr 12, 2016 at 04:56:51PM +0100, Tvrtko Ursulin wrote:
> >>From: Chris Wilson <chris@chris-wilson.co.uk>
> >>
> >>By tracking the iomapping on the VMA itself, we can share that area
> >>between multiple users. Also by only revoking the iomapping upon
> >>unbinding from the mappable portion of the GGTT, we can keep that iomap
> >>across multiple invocations (e.g. execlists context pinning).
> >>
> >>v2:
> >>   * Rebase on nightly;
> >>   * added kerneldoc. (Tvrtko Ursulin)
> >>
> >>Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> >>Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> >>---
> >>  drivers/gpu/drm/i915/i915_gem.c     |  2 ++
> >>  drivers/gpu/drm/i915/i915_gem_gtt.c | 38 +++++++++++++++++++++++++++++++++++++
> >>  drivers/gpu/drm/i915/i915_gem_gtt.h | 38 +++++++++++++++++++++++++++++++++++++
> >>  drivers/gpu/drm/i915/intel_fbdev.c  |  8 +++-----
> >>  4 files changed, 81 insertions(+), 5 deletions(-)
> >>
> >>diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> >>index b37ffea8b458..6a485630595e 100644
> >>--- a/drivers/gpu/drm/i915/i915_gem.c
> >>+++ b/drivers/gpu/drm/i915/i915_gem.c
> >>@@ -3393,6 +3393,8 @@ static int __i915_vma_unbind(struct i915_vma *vma, bool wait)
> >>  		ret = i915_gem_object_put_fence(obj);
> >>  		if (ret)
> >>  			return ret;
> >>+
> >>+		i915_vma_iounmap(vma);
> >>  	}
> >>
> >>  	trace_i915_vma_unbind(vma);
> >>diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> >>index c5cb04907525..b2a8a14e8dcb 100644
> >>--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> >>+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> >>@@ -3626,3 +3626,41 @@ i915_ggtt_view_size(struct drm_i915_gem_object *obj,
> >>  		return obj->base.size;
> >>  	}
> >>  }
> >>+
> >>+void *i915_vma_iomap(struct drm_i915_private *dev_priv,
> >>+		     struct i915_vma *vma)
> >>+{
> >>+	struct drm_i915_gem_object *obj = vma->obj;
> >>+	struct i915_ggtt *ggtt = &dev_priv->ggtt;
> >>+
> >>+	if (WARN_ON(!obj->map_and_fenceable))
> >>+		return ERR_PTR(-ENODEV);
> >>+
> >>+	BUG_ON(!vma->is_ggtt);
> >>+	BUG_ON(vma->ggtt_view.type != I915_GGTT_VIEW_NORMAL);
> >>+	BUG_ON((vma->bound & GLOBAL_BIND) == 0);
> >>+
> >>+	if (vma->iomap == NULL) {
> >>+		void *ptr;
> >
> >We could extract ggtt from the is_ggtt vma->vm that would remove the
> >dev_priv parameter and make the callers a bit tidier.
> >
> >static inline struct i915_ggtt *to_ggtt(struct i915_address_space *vm)
> >{
> >	BUG_ON(!i915_is_ggtt(vm));
> >	return container_of(vm, struct i915_ggtt, base);
> >}
> >
> >>+
> >>+		ptr = ioremap_wc(ggtt->mappable_base + vma->node.start,
> >>+				 obj->base.size);
> >>+		if (ptr == NULL) {
> >>+			int ret;
> >>+
> >>+			/* Too many areas already allocated? */
> >>+			ret = i915_gem_evict_vm(vma->vm, true);
> >>+			if (ret)
> >>+				return ERR_PTR(ret);
> >>+
> >>+			ptr = ioremap_wc(ggtt->mappable_base + vma->node.start,
> >>+					 obj->base.size);
> >
> >No, we really don't want to create a new ioremap for every caller when
> >we already have one, ggtt->mappable. Hence,
> >     io-mapping: Specify mapping size for io_mapping_map_wc
> >being its preceeding patch. The difference is huge on Braswell.
> 
> I was following the principle of least change for this one since it
> does remain the same in that respect as the current code. Goal was
> to unblock the GuC early unpin / execlist no retired queue series
> and not get into the trap of inflating the dependencies too much. I
> thought to add this and default context cleanup but you keep adding
> things to the pile. :)

Tip of the iceberg. Only we have lots of icebergs. And the occasional
dragon.
 
> I'll have a look at your io_mapping stuff to see how much it would
> add to the series. Remember I said I'll add the stable ctx id
> patches as well. Do we need to come up with a plan here?

We more or less own io_mapping, for this it is just one patch to add a
pass-through parameter to ioremap_wc that's required for 32bit.

I could mutter about the patch before this being quite a major
bugfix...

> I could have two separate series to simplify dependencies a bit:
> 
>  1. GuC premature unpin and
>  2. execlist no retired queue.
> 
> The stack for the first one could look like (not as patches but
> conceptually):
> 
>  1. default context cleanup
>  2. any io_mapping patches of yours
>  3. i915_vma_iomap or WC pin_map as you suggested in the other reply.
>  4. previous / pinned context
> 
> Execlist no retired queue would then be:
> 
>  1. stable ctx id
>  2. removal of i915_gem_request_unreference__unlocked
>  3. execlist no retired queue
> 
> If I did not forget about something.
> 
> At any point in any series we could add things like
> i915_gem_request.c or those patches which move around the context
> init.

Could I see you some drm_mm.c patches after that?
-Chris
Tvrtko Ursulin April 13, 2016, 9:14 a.m. UTC | #4
On 13/04/16 10:04, Chris Wilson wrote:
> On Wed, Apr 13, 2016 at 09:45:03AM +0100, Tvrtko Ursulin wrote:
>>
>> On 12/04/16 17:21, Chris Wilson wrote:
>>> On Tue, Apr 12, 2016 at 04:56:51PM +0100, Tvrtko Ursulin wrote:
>>>> From: Chris Wilson <chris@chris-wilson.co.uk>
>>>>
>>>> By tracking the iomapping on the VMA itself, we can share that area
>>>> between multiple users. Also by only revoking the iomapping upon
>>>> unbinding from the mappable portion of the GGTT, we can keep that iomap
>>>> across multiple invocations (e.g. execlists context pinning).
>>>>
>>>> v2:
>>>>    * Rebase on nightly;
>>>>    * added kerneldoc. (Tvrtko Ursulin)
>>>>
>>>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>>>> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>>> ---
>>>>   drivers/gpu/drm/i915/i915_gem.c     |  2 ++
>>>>   drivers/gpu/drm/i915/i915_gem_gtt.c | 38 +++++++++++++++++++++++++++++++++++++
>>>>   drivers/gpu/drm/i915/i915_gem_gtt.h | 38 +++++++++++++++++++++++++++++++++++++
>>>>   drivers/gpu/drm/i915/intel_fbdev.c  |  8 +++-----
>>>>   4 files changed, 81 insertions(+), 5 deletions(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
>>>> index b37ffea8b458..6a485630595e 100644
>>>> --- a/drivers/gpu/drm/i915/i915_gem.c
>>>> +++ b/drivers/gpu/drm/i915/i915_gem.c
>>>> @@ -3393,6 +3393,8 @@ static int __i915_vma_unbind(struct i915_vma *vma, bool wait)
>>>>   		ret = i915_gem_object_put_fence(obj);
>>>>   		if (ret)
>>>>   			return ret;
>>>> +
>>>> +		i915_vma_iounmap(vma);
>>>>   	}
>>>>
>>>>   	trace_i915_vma_unbind(vma);
>>>> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
>>>> index c5cb04907525..b2a8a14e8dcb 100644
>>>> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
>>>> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
>>>> @@ -3626,3 +3626,41 @@ i915_ggtt_view_size(struct drm_i915_gem_object *obj,
>>>>   		return obj->base.size;
>>>>   	}
>>>>   }
>>>> +
>>>> +void *i915_vma_iomap(struct drm_i915_private *dev_priv,
>>>> +		     struct i915_vma *vma)
>>>> +{
>>>> +	struct drm_i915_gem_object *obj = vma->obj;
>>>> +	struct i915_ggtt *ggtt = &dev_priv->ggtt;
>>>> +
>>>> +	if (WARN_ON(!obj->map_and_fenceable))
>>>> +		return ERR_PTR(-ENODEV);
>>>> +
>>>> +	BUG_ON(!vma->is_ggtt);
>>>> +	BUG_ON(vma->ggtt_view.type != I915_GGTT_VIEW_NORMAL);
>>>> +	BUG_ON((vma->bound & GLOBAL_BIND) == 0);
>>>> +
>>>> +	if (vma->iomap == NULL) {
>>>> +		void *ptr;
>>>
>>> We could extract ggtt from the is_ggtt vma->vm that would remove the
>>> dev_priv parameter and make the callers a bit tidier.
>>>
>>> static inline struct i915_ggtt *to_ggtt(struct i915_address_space *vm)
>>> {
>>> 	BUG_ON(!i915_is_ggtt(vm));
>>> 	return container_of(vm, struct i915_ggtt, base);
>>> }
>>>
>>>> +
>>>> +		ptr = ioremap_wc(ggtt->mappable_base + vma->node.start,
>>>> +				 obj->base.size);
>>>> +		if (ptr == NULL) {
>>>> +			int ret;
>>>> +
>>>> +			/* Too many areas already allocated? */
>>>> +			ret = i915_gem_evict_vm(vma->vm, true);
>>>> +			if (ret)
>>>> +				return ERR_PTR(ret);
>>>> +
>>>> +			ptr = ioremap_wc(ggtt->mappable_base + vma->node.start,
>>>> +					 obj->base.size);
>>>
>>> No, we really don't want to create a new ioremap for every caller when
>>> we already have one, ggtt->mappable. Hence,
>>>      io-mapping: Specify mapping size for io_mapping_map_wc
>>> being its preceeding patch. The difference is huge on Braswell.
>>
>> I was following the principle of least change for this one since it
>> does remain the same in that respect as the current code. Goal was
>> to unblock the GuC early unpin / execlist no retired queue series
>> and not get into the trap of inflating the dependencies too much. I
>> thought to add this and default context cleanup but you keep adding
>> things to the pile. :)
>
> Tip of the iceberg. Only we have lots of icebergs. And the occasional
> dragon.
>
>> I'll have a look at your io_mapping stuff to see how much it would
>> add to the series. Remember I said I'll add the stable ctx id
>> patches as well. Do we need to come up with a plan here?
>
> We more or less own io_mapping, for this it is just one patch to add a
> pass-through parameter to ioremap_wc that's required for 32bit.
>
> I could mutter about the patch before this being quite a major
> bugfix...
>
>> I could have two separate series to simplify dependencies a bit:
>>
>>   1. GuC premature unpin and
>>   2. execlist no retired queue.
>>
>> The stack for the first one could look like (not as patches but
>> conceptually):
>>
>>   1. default context cleanup
>>   2. any io_mapping patches of yours
>>   3. i915_vma_iomap or WC pin_map as you suggested in the other reply.
>>   4. previous / pinned context
>>
>> Execlist no retired queue would then be:
>>
>>   1. stable ctx id
>>   2. removal of i915_gem_request_unreference__unlocked
>>   3. execlist no retired queue
>>
>> If I did not forget about something.
>>
>> At any point in any series we could add things like
>> i915_gem_request.c or those patches which move around the context
>> init.
>
> Could I see you some drm_mm.c patches after that?

If I am competent enough to help with them sure.

Does this mean above sounds like a plan to you? Two series containing 
the listed items?

Regards,

Tvrtko
Chris Wilson April 13, 2016, 9:56 a.m. UTC | #5
On Wed, Apr 13, 2016 at 10:14:50AM +0100, Tvrtko Ursulin wrote:
> On 13/04/16 10:04, Chris Wilson wrote:
> >On Wed, Apr 13, 2016 at 09:45:03AM +0100, Tvrtko Ursulin wrote:
> >>I could have two separate series to simplify dependencies a bit:
> >>
> >>  1. GuC premature unpin and
> >>  2. execlist no retired queue.
> >>
> >>The stack for the first one could look like (not as patches but
> >>conceptually):
> >>
> >>  1. default context cleanup
> >>  2. any io_mapping patches of yours
> >>  3. i915_vma_iomap or WC pin_map as you suggested in the other reply.
> >>  4. previous / pinned context
> >>
> >>Execlist no retired queue would then be:
> >>
> >>  1. stable ctx id
> >>  2. removal of i915_gem_request_unreference__unlocked
> >>  3. execlist no retired queue
> >>
> >>If I did not forget about something.
> >>
> >>At any point in any series we could add things like
> >>i915_gem_request.c or those patches which move around the context
> >>init.
> >
> >Could I see you some drm_mm.c patches after that?
> 
> If I am competent enough to help with them sure.

There's a nice tweak in there to help with the aperture thrashing
scenario (speeds up the aperture thrash tests in gem_concurrent_blit by
~60x). You mentioned you were interested in similar scenarios.

Besides which they actually fix the u64 alignment issues and some
residual u32 problems.
 
> Does this mean above sounds like a plan to you? Two series
> containing the listed items?

3. I'll get io_mapping changes ratified, and that can happen in parallel
to these two series.
-Chris
>
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index b37ffea8b458..6a485630595e 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3393,6 +3393,8 @@  static int __i915_vma_unbind(struct i915_vma *vma, bool wait)
 		ret = i915_gem_object_put_fence(obj);
 		if (ret)
 			return ret;
+
+		i915_vma_iounmap(vma);
 	}
 
 	trace_i915_vma_unbind(vma);
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index c5cb04907525..b2a8a14e8dcb 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -3626,3 +3626,41 @@  i915_ggtt_view_size(struct drm_i915_gem_object *obj,
 		return obj->base.size;
 	}
 }
+
+void *i915_vma_iomap(struct drm_i915_private *dev_priv,
+		     struct i915_vma *vma)
+{
+	struct drm_i915_gem_object *obj = vma->obj;
+	struct i915_ggtt *ggtt = &dev_priv->ggtt;
+
+	if (WARN_ON(!obj->map_and_fenceable))
+		return ERR_PTR(-ENODEV);
+
+	BUG_ON(!vma->is_ggtt);
+	BUG_ON(vma->ggtt_view.type != I915_GGTT_VIEW_NORMAL);
+	BUG_ON((vma->bound & GLOBAL_BIND) == 0);
+
+	if (vma->iomap == NULL) {
+		void *ptr;
+
+		ptr = ioremap_wc(ggtt->mappable_base + vma->node.start,
+				 obj->base.size);
+		if (ptr == NULL) {
+			int ret;
+
+			/* Too many areas already allocated? */
+			ret = i915_gem_evict_vm(vma->vm, true);
+			if (ret)
+				return ERR_PTR(ret);
+
+			ptr = ioremap_wc(ggtt->mappable_base + vma->node.start,
+					 obj->base.size);
+			if (ptr == NULL)
+				return ERR_PTR(-ENOMEM);
+		}
+
+		vma->iomap = ptr;
+	}
+
+	return vma->iomap;
+}
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index d7dd3d8a8758..526fdbc71ace 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -175,6 +175,7 @@  struct i915_vma {
 	struct drm_mm_node node;
 	struct drm_i915_gem_object *obj;
 	struct i915_address_space *vm;
+	void *iomap;
 
 	/** Flags and address space this VMA is bound to */
 #define GLOBAL_BIND	(1<<0)
@@ -559,4 +560,41 @@  size_t
 i915_ggtt_view_size(struct drm_i915_gem_object *obj,
 		    const struct i915_ggtt_view *view);
 
+/**
+ * i915_vma_iomap - calls ioremap_wc to map the GGTT VMA via the aperture
+ * @dev_priv: i915 private pointer
+ * @vma: VMA to iomap
+ *
+ * The passed in VMA has to be pinned in the global GTT mappable region. Or in
+ * other words callers are responsible for managing the VMA pinned lifetime and
+ * ensuring it covers the use of the returned mapping.
+ *
+ * Callers must hold the struct_mutex.
+ *
+ * Returns a valid iomapped pointer or ERR_PTR.
+ */
+void *i915_vma_iomap(struct drm_i915_private *dev_priv,
+		     struct i915_vma *vma);
+
+/**
+ * i915_vma_iounmap - unmaps the mapping returned from i915_vma_iomap
+ * @dev_priv: i915 private pointer
+ * @vma: VMA to unmap
+ *
+ * Unmaps the previously iomapped VMA using iounmap.
+ *
+ * Users of i915_vma_iomap should not manually unmap by calling this function
+ * if they want to take advantage of the mapping getting cached in the VMA.
+ *
+ * Callers must hold the struct_mutex.
+ */
+static inline void i915_vma_iounmap(struct i915_vma *vma)
+{
+	if (vma->iomap == NULL)
+		return;
+
+	iounmap(vma->iomap);
+	vma->iomap = NULL;
+}
+
 #endif
diff --git a/drivers/gpu/drm/i915/intel_fbdev.c b/drivers/gpu/drm/i915/intel_fbdev.c
index 79ac202f3870..515a2214827e 100644
--- a/drivers/gpu/drm/i915/intel_fbdev.c
+++ b/drivers/gpu/drm/i915/intel_fbdev.c
@@ -251,12 +251,10 @@  static int intelfb_create(struct drm_fb_helper *helper,
 	info->fix.smem_start = dev->mode_config.fb_base + i915_gem_obj_ggtt_offset(obj);
 	info->fix.smem_len = size;
 
-	info->screen_base =
-		ioremap_wc(ggtt->mappable_base + i915_gem_obj_ggtt_offset(obj),
-			   size);
-	if (!info->screen_base) {
+	info->screen_base = i915_vma_iomap(dev_priv, i915_gem_obj_to_ggtt(obj));
+	if (IS_ERR(info->screen_base)) {
 		DRM_ERROR("Failed to remap framebuffer into virtual memory\n");
-		ret = -ENOSPC;
+		ret = PTR_ERR(info->screen_base);
 		goto out_destroy_fbi;
 	}
 	info->screen_size = size;