diff mbox

[2/4] drm/i915: Improve dynamic management/eviction of lrc backing objects

Message ID 1444143124-16030-3-git-send-email-nicholas.hoath@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Nick Hoath Oct. 6, 2015, 2:52 p.m. UTC
Shovel all context related objects through the active queue and obj
management.

- Added callback in vma_(un)bind to add CPU (un)mapping at same time
  if desired
- Inserted LRC hw context & ringbuf to vma active list

Issue: VIZ-4277
Signed-off-by: Nick Hoath <nicholas.hoath@intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h         |  4 ++
 drivers/gpu/drm/i915/i915_gem.c         |  3 ++
 drivers/gpu/drm/i915/i915_gem_gtt.c     |  8 ++++
 drivers/gpu/drm/i915/intel_lrc.c        | 28 +++++++++++--
 drivers/gpu/drm/i915/intel_ringbuffer.c | 71 ++++++++++++++++++---------------
 drivers/gpu/drm/i915/intel_ringbuffer.h |  3 --
 6 files changed, 79 insertions(+), 38 deletions(-)

Comments

Daniel Vetter Oct. 7, 2015, 4:05 p.m. UTC | #1
On Tue, Oct 06, 2015 at 03:52:02PM +0100, Nick Hoath wrote:
> Shovel all context related objects through the active queue and obj
> management.
> 
> - Added callback in vma_(un)bind to add CPU (un)mapping at same time
>   if desired
> - Inserted LRC hw context & ringbuf to vma active list
> 
> Issue: VIZ-4277
> Signed-off-by: Nick Hoath <nicholas.hoath@intel.com>
> ---
>  drivers/gpu/drm/i915/i915_drv.h         |  4 ++
>  drivers/gpu/drm/i915/i915_gem.c         |  3 ++
>  drivers/gpu/drm/i915/i915_gem_gtt.c     |  8 ++++
>  drivers/gpu/drm/i915/intel_lrc.c        | 28 +++++++++++--
>  drivers/gpu/drm/i915/intel_ringbuffer.c | 71 ++++++++++++++++++---------------
>  drivers/gpu/drm/i915/intel_ringbuffer.h |  3 --
>  6 files changed, 79 insertions(+), 38 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 3d217f9..d660ee3 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -2169,6 +2169,10 @@ struct drm_i915_gem_object {
>  			struct work_struct *work;
>  		} userptr;
>  	};
> +
> +	/** Support for automatic CPU side mapping of object */
> +	int (*mmap)(struct drm_i915_gem_object *obj, bool unmap);

I don't think we need a map hook, that can still be done (if not done so
already) by the callers. Also it's better to rename this to vma_unbind
(and it should be at the vma level I think) since there's other potential
users. So explicit maping, lazy unmapping for the kmaps we need. That's
the same design we're using for binding objects into gpu address spaces.

Also Chris Wilson has something similar, please align with him on the
precise design of this callback.

Thanks, Daniel

> +	void *mappable;
>  };
>  #define to_intel_bo(x) container_of(x, struct drm_i915_gem_object, base)
>  
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index fc82171..56e0e00 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -3262,6 +3262,9 @@ static int __i915_vma_unbind(struct i915_vma *vma, bool wait)
>  	if (vma->pin_count)
>  		return -EBUSY;
>  
> +	if (obj->mmap)
> +		obj->mmap(obj, true);
> +
>  	BUG_ON(obj->pages == NULL);
>  
>  	if (wait) {
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> index 620d57e..786ec4b 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> @@ -3495,6 +3495,14 @@ int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level,
>  
>  	vma->bound |= bind_flags;
>  
> +	if (vma->obj->mmap) {
> +		ret = vma->obj->mmap(vma->obj, false);
> +		if (ret) {
> +			i915_vma_unbind(vma);
> +			return ret;
> +		}
> +	}
> +
>  	return 0;
>  }
>  
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index e8f5b6c..b807928 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -723,6 +723,18 @@ intel_logical_ring_advance_and_submit(struct drm_i915_gem_request *request)
>  
>  	intel_logical_ring_advance(request->ringbuf);
>  
> +	/* Push the hw context on to the active list */
> +	i915_vma_move_to_active(
> +			i915_gem_obj_to_ggtt(
> +				request->ctx->engine[ring->id].state),
> +			request);
> +
> +	/* Push the ringbuf on to the active list */
> +	i915_vma_move_to_active(
> +			i915_gem_obj_to_ggtt(
> +			request->ctx->engine[ring->id].ringbuf->obj),
> +			request);
> +
>  	request->tail = request->ringbuf->tail;
>  
>  	if (intel_ring_stopped(ring))
> @@ -1006,10 +1018,15 @@ static int intel_lr_context_do_pin(struct intel_engine_cs *ring,
>  	if (ret)
>  		return ret;
>  
> -	ret = intel_pin_and_map_ringbuffer_obj(ring->dev, ringbuf);
> +	ret = i915_gem_obj_ggtt_pin(ringbuf->obj, PAGE_SIZE,
> +			PIN_MAPPABLE);
>  	if (ret)
>  		goto unpin_ctx_obj;
>  
> +	ret = i915_gem_object_set_to_gtt_domain(ringbuf->obj, true);
> +	if (ret)
> +		goto unpin_rb_obj;
> +
>  	ctx_obj->dirty = true;
>  
>  	/* Invalidate GuC TLB. */
> @@ -1018,6 +1035,8 @@ static int intel_lr_context_do_pin(struct intel_engine_cs *ring,
>  
>  	return ret;
>  
> +unpin_rb_obj:
> +	i915_gem_object_ggtt_unpin(ringbuf->obj);
>  unpin_ctx_obj:
>  	i915_gem_object_ggtt_unpin(ctx_obj);
>  
> @@ -1052,7 +1071,7 @@ void intel_lr_context_unpin(struct drm_i915_gem_request *rq)
>  	if (ctx_obj) {
>  		WARN_ON(!mutex_is_locked(&ring->dev->struct_mutex));
>  		if (--rq->ctx->engine[ring->id].pin_count == 0) {
> -			intel_unpin_ringbuffer_obj(ringbuf);
> +			i915_gem_object_ggtt_unpin(ringbuf->obj);
>  			i915_gem_object_ggtt_unpin(ctx_obj);
>  		}
>  	}
> @@ -2369,7 +2388,7 @@ void intel_lr_context_free(struct intel_context *ctx)
>  			struct intel_engine_cs *ring = ringbuf->ring;
>  
>  			if (ctx == ring->default_context) {
> -				intel_unpin_ringbuffer_obj(ringbuf);
> +				i915_gem_object_ggtt_unpin(ringbuf->obj);
>  				i915_gem_object_ggtt_unpin(ctx_obj);
>  			}
>  			WARN_ON(ctx->engine[ring->id].pin_count);
> @@ -2536,5 +2555,8 @@ void intel_lr_context_reset(struct drm_device *dev,
>  
>  		ringbuf->head = 0;
>  		ringbuf->tail = 0;
> +
> +		i915_gem_object_ggtt_unpin(
> +				ctx->engine[ring->id].state);
>  	}
>  }
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
> index c82c74c..79df8ca 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> @@ -1958,38 +1958,35 @@ static int init_phys_status_page(struct intel_engine_cs *ring)
>  	return 0;
>  }
>  
> -void intel_unpin_ringbuffer_obj(struct intel_ringbuffer *ringbuf)
> +static int intel_mmap_ringbuffer_obj(struct drm_i915_gem_object *obj,
> +		bool unmap)
>  {
> -	iounmap(ringbuf->virtual_start);
> -	ringbuf->virtual_start = NULL;
> -	i915_gem_object_ggtt_unpin(ringbuf->obj);
> -}
> -
> -int intel_pin_and_map_ringbuffer_obj(struct drm_device *dev,
> -				     struct intel_ringbuffer *ringbuf)
> -{
> -	struct drm_i915_private *dev_priv = to_i915(dev);
> -	struct drm_i915_gem_object *obj = ringbuf->obj;
> -	int ret;
> -
> -	ret = i915_gem_obj_ggtt_pin(obj, PAGE_SIZE, PIN_MAPPABLE);
> -	if (ret)
> -		return ret;
> -
> -	ret = i915_gem_object_set_to_gtt_domain(obj, true);
> -	if (ret) {
> -		i915_gem_object_ggtt_unpin(obj);
> -		return ret;
> -	}
> -
> -	ringbuf->virtual_start = ioremap_wc(dev_priv->gtt.mappable_base +
> -			i915_gem_obj_ggtt_offset(obj), ringbuf->size);
> -	if (ringbuf->virtual_start == NULL) {
> -		i915_gem_object_ggtt_unpin(obj);
> -		return -EINVAL;
> +	int ret = 0;
> +	struct intel_ringbuffer *ringbuf =
> +	(struct intel_ringbuffer *)obj->mappable;
> +
> +	if (!unmap) {
> +		struct drm_device *dev = ringbuf->ring->dev;
> +		struct drm_i915_private *dev_priv = to_i915(dev);
> +
> +		WARN_ON(ringbuf->virtual_start != NULL);
> +		if (ringbuf->virtual_start == NULL) {
> +			ringbuf->virtual_start = ioremap_wc(
> +					dev_priv->gtt.mappable_base +
> +					i915_gem_obj_ggtt_offset(obj),
> +					ringbuf->size);
> +			if (ringbuf->virtual_start == NULL) {
> +				i915_gem_object_ggtt_unpin(obj);
> +				return -EINVAL;
> +			}
> +		}
> +	} else {
> +		if (!i915_gem_obj_is_pinned(ringbuf->obj)) {
> +			iounmap(ringbuf->virtual_start);
> +			ringbuf->virtual_start = NULL;
> +		}
>  	}
> -
> -	return 0;
> +	return ret;
>  }
>  
>  static void intel_destroy_ringbuffer_obj(struct intel_ringbuffer *ringbuf)
> @@ -2016,6 +2013,9 @@ static int intel_alloc_ringbuffer_obj(struct drm_device *dev,
>  
>  	ringbuf->obj = obj;
>  
> +	obj->mmap = intel_mmap_ringbuffer_obj;
> +	obj->mappable = ringbuf;
> +
>  	return 0;
>  }
>  
> @@ -2094,7 +2094,7 @@ static int intel_init_ring_buffer(struct drm_device *dev,
>  			goto error;
>  	}
>  
> -	ret = intel_pin_and_map_ringbuffer_obj(dev, ringbuf);
> +	ret = i915_gem_obj_ggtt_pin(ringbuf->obj, PAGE_SIZE, PIN_MAPPABLE);
>  	if (ret) {
>  		DRM_ERROR("Failed to pin and map ringbuffer %s: %d\n",
>  				ring->name, ret);
> @@ -2102,12 +2102,19 @@ static int intel_init_ring_buffer(struct drm_device *dev,
>  		goto error;
>  	}
>  
> +	ret = i915_gem_object_set_to_gtt_domain(ringbuf->obj, true);
> +	if (ret)
> +		goto error_unpin;
> +
>  	ret = i915_cmd_parser_init_ring(ring);
>  	if (ret)
>  		goto error;
>  
>  	return 0;
>  
> +error_unpin:
> +	i915_gem_object_ggtt_unpin(ringbuf->obj);
> +	intel_destroy_ringbuffer_obj(ringbuf);
>  error:
>  	intel_ringbuffer_free(ringbuf);
>  	ring->buffer = NULL;
> @@ -2126,7 +2133,7 @@ void intel_cleanup_ring_buffer(struct intel_engine_cs *ring)
>  	intel_stop_ring_buffer(ring);
>  	WARN_ON(!IS_GEN2(ring->dev) && (I915_READ_MODE(ring) & MODE_IDLE) == 0);
>  
> -	intel_unpin_ringbuffer_obj(ring->buffer);
> +	i915_gem_object_ggtt_unpin(ring->buffer->obj);
>  	intel_ringbuffer_free(ring->buffer);
>  	ring->buffer = NULL;
>  
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index d99b167..8daaf99 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -421,9 +421,6 @@ intel_write_status_page(struct intel_engine_cs *ring,
>  
>  struct intel_ringbuffer *
>  intel_engine_create_ringbuffer(struct intel_engine_cs *engine, int size);
> -int intel_pin_and_map_ringbuffer_obj(struct drm_device *dev,
> -				     struct intel_ringbuffer *ringbuf);
> -void intel_unpin_ringbuffer_obj(struct intel_ringbuffer *ringbuf);
>  void intel_ringbuffer_free(struct intel_ringbuffer *ring);
>  
>  void intel_stop_ring_buffer(struct intel_engine_cs *ring);
> -- 
> 1.9.1
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Chris Wilson Oct. 8, 2015, 1:35 p.m. UTC | #2
On Wed, Oct 07, 2015 at 06:05:46PM +0200, Daniel Vetter wrote:
> On Tue, Oct 06, 2015 at 03:52:02PM +0100, Nick Hoath wrote:
> > Shovel all context related objects through the active queue and obj
> > management.
> > 
> > - Added callback in vma_(un)bind to add CPU (un)mapping at same time
> >   if desired
> > - Inserted LRC hw context & ringbuf to vma active list
> > 
> > Issue: VIZ-4277
> > Signed-off-by: Nick Hoath <nicholas.hoath@intel.com>
> > ---
> >  drivers/gpu/drm/i915/i915_drv.h         |  4 ++
> >  drivers/gpu/drm/i915/i915_gem.c         |  3 ++
> >  drivers/gpu/drm/i915/i915_gem_gtt.c     |  8 ++++
> >  drivers/gpu/drm/i915/intel_lrc.c        | 28 +++++++++++--
> >  drivers/gpu/drm/i915/intel_ringbuffer.c | 71 ++++++++++++++++++---------------
> >  drivers/gpu/drm/i915/intel_ringbuffer.h |  3 --
> >  6 files changed, 79 insertions(+), 38 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> > index 3d217f9..d660ee3 100644
> > --- a/drivers/gpu/drm/i915/i915_drv.h
> > +++ b/drivers/gpu/drm/i915/i915_drv.h
> > @@ -2169,6 +2169,10 @@ struct drm_i915_gem_object {
> >  			struct work_struct *work;
> >  		} userptr;
> >  	};
> > +
> > +	/** Support for automatic CPU side mapping of object */
> > +	int (*mmap)(struct drm_i915_gem_object *obj, bool unmap);
> 
> I don't think we need a map hook, that can still be done (if not done so
> already) by the callers. Also it's better to rename this to vma_unbind
> (and it should be at the vma level I think) since there's other potential
> users. So explicit maping, lazy unmapping for the kmaps we need. That's
> the same design we're using for binding objects into gpu address spaces.
> 
> Also Chris Wilson has something similar, please align with him on the
> precise design of this callback.

We need the unbind hook because of the movement in the first patch (it
is a separate issue, the code should work without it albeit having to
remap the ring/context state more often). The changelog in this patch
simply explains the i915_vma_move_to_active() additions. But to get the
shrink accurate we do need the context unpin on retirement and to do the
pin_count check in i915_vma_unbind() after waiting (rather than before,
as we currently do). However, the eviction code will not inspect the
active contexts objects yet (as it will continue to skip over the
ggtt->pin_count on them). The way I allowed ctx objects to be evicted was
to only keep the ctx->state pinned for the duration of the request
construction.

Note that I think it should be a vma->unbind hook not an object level
one (it is i915_vma_unbind, without only a modicum of object level state
being modified in that function).
-Chris
Nick Hoath Oct. 16, 2015, 2:42 p.m. UTC | #3
On 08/10/2015 14:35, Chris Wilson wrote:
> On Wed, Oct 07, 2015 at 06:05:46PM +0200, Daniel Vetter wrote:
>> On Tue, Oct 06, 2015 at 03:52:02PM +0100, Nick Hoath wrote:
>>> Shovel all context related objects through the active queue and obj
>>> management.
>>>
>>> - Added callback in vma_(un)bind to add CPU (un)mapping at same time
>>>    if desired
>>> - Inserted LRC hw context & ringbuf to vma active list
>>>
>>> Issue: VIZ-4277
>>> Signed-off-by: Nick Hoath <nicholas.hoath@intel.com>
>>> ---
>>>   drivers/gpu/drm/i915/i915_drv.h         |  4 ++
>>>   drivers/gpu/drm/i915/i915_gem.c         |  3 ++
>>>   drivers/gpu/drm/i915/i915_gem_gtt.c     |  8 ++++
>>>   drivers/gpu/drm/i915/intel_lrc.c        | 28 +++++++++++--
>>>   drivers/gpu/drm/i915/intel_ringbuffer.c | 71 ++++++++++++++++++---------------
>>>   drivers/gpu/drm/i915/intel_ringbuffer.h |  3 --
>>>   6 files changed, 79 insertions(+), 38 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
>>> index 3d217f9..d660ee3 100644
>>> --- a/drivers/gpu/drm/i915/i915_drv.h
>>> +++ b/drivers/gpu/drm/i915/i915_drv.h
>>> @@ -2169,6 +2169,10 @@ struct drm_i915_gem_object {
>>>   			struct work_struct *work;
>>>   		} userptr;
>>>   	};
>>> +
>>> +	/** Support for automatic CPU side mapping of object */
>>> +	int (*mmap)(struct drm_i915_gem_object *obj, bool unmap);
>>
>> I don't think we need a map hook, that can still be done (if not done so
I disagree - this keeps the interface symmetrical. Searching for the 
do/undo code paths and finding they are in difference places, called via 
different routes makes code harder to follow.
>> already) by the callers. Also it's better to rename this to vma_unbind
>> (and it should be at the vma level I think) since there's other potential
Nope - the obj is created first, at a point where the map/unamp function 
can be known. Moving the map/unmap to the vma would mean having a 
callback path to the object just to set up the callback path when the 
vma is created anonymously at some later point.
>> users. So explicit maping, lazy unmapping for the kmaps we need. That's
>> the same design we're using for binding objects into gpu address spaces.
>>
>> Also Chris Wilson has something similar, please align with him on the
>> precise design of this callback.
>
> We need the unbind hook because of the movement in the first patch (it
> is a separate issue, the code should work without it albeit having to
> remap the ring/context state more often). The changelog in this patch
> simply explains the i915_vma_move_to_active() additions. But to get the
> shrink accurate we do need the context unpin on retirement and to do the
> pin_count check in i915_vma_unbind() after waiting (rather than before,
> as we currently do). However, the eviction code will not inspect the
> active contexts objects yet (as it will continue to skip over the
> ggtt->pin_count on them). The way I allowed ctx objects to be evicted was
> to only keep the ctx->state pinned for the duration of the request
> construction.
>
> Note that I think it should be a vma->unbind hook not an object level
> one (it is i915_vma_unbind, without only a modicum of object level state
> being modified in that function).
> -Chris
>
Daniel Vetter Oct. 19, 2015, 9:48 a.m. UTC | #4
On Fri, Oct 16, 2015 at 03:42:53PM +0100, Nick Hoath wrote:
> On 08/10/2015 14:35, Chris Wilson wrote:
> >On Wed, Oct 07, 2015 at 06:05:46PM +0200, Daniel Vetter wrote:
> >>On Tue, Oct 06, 2015 at 03:52:02PM +0100, Nick Hoath wrote:
> >>>Shovel all context related objects through the active queue and obj
> >>>management.
> >>>
> >>>- Added callback in vma_(un)bind to add CPU (un)mapping at same time
> >>>   if desired
> >>>- Inserted LRC hw context & ringbuf to vma active list
> >>>
> >>>Issue: VIZ-4277
> >>>Signed-off-by: Nick Hoath <nicholas.hoath@intel.com>
> >>>---
> >>>  drivers/gpu/drm/i915/i915_drv.h         |  4 ++
> >>>  drivers/gpu/drm/i915/i915_gem.c         |  3 ++
> >>>  drivers/gpu/drm/i915/i915_gem_gtt.c     |  8 ++++
> >>>  drivers/gpu/drm/i915/intel_lrc.c        | 28 +++++++++++--
> >>>  drivers/gpu/drm/i915/intel_ringbuffer.c | 71 ++++++++++++++++++---------------
> >>>  drivers/gpu/drm/i915/intel_ringbuffer.h |  3 --
> >>>  6 files changed, 79 insertions(+), 38 deletions(-)
> >>>
> >>>diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> >>>index 3d217f9..d660ee3 100644
> >>>--- a/drivers/gpu/drm/i915/i915_drv.h
> >>>+++ b/drivers/gpu/drm/i915/i915_drv.h
> >>>@@ -2169,6 +2169,10 @@ struct drm_i915_gem_object {
> >>>  			struct work_struct *work;
> >>>  		} userptr;
> >>>  	};
> >>>+
> >>>+	/** Support for automatic CPU side mapping of object */
> >>>+	int (*mmap)(struct drm_i915_gem_object *obj, bool unmap);
> >>
> >>I don't think we need a map hook, that can still be done (if not done so
> I disagree - this keeps the interface symmetrical. Searching for the do/undo
> code paths and finding they are in difference places, called via different
> routes makes code harder to follow.
> >>already) by the callers. Also it's better to rename this to vma_unbind
> >>(and it should be at the vma level I think) since there's other potential
> Nope - the obj is created first, at a point where the map/unamp function can
> be known. Moving the map/unmap to the vma would mean having a callback path
> to the object just to set up the callback path when the vma is created
> anonymously at some later point.

One of the plans for this is to also use it for to-be-unpinned
framebuffers (4k buffers are huge ...). And in that case the unmap hook
only, and on the vma is the design we want. And since it also seems to
accomodate all the other users I do think it's the right choice.

Like I said, explicit setup and lazy, implicit cleanup is kinda how a lot
of things in gem work.
-Daniel

> >>users. So explicit maping, lazy unmapping for the kmaps we need. That's
> >>the same design we're using for binding objects into gpu address spaces.
> >>
> >>Also Chris Wilson has something similar, please align with him on the
> >>precise design of this callback.
> >
> >We need the unbind hook because of the movement in the first patch (it
> >is a separate issue, the code should work without it albeit having to
> >remap the ring/context state more often). The changelog in this patch
> >simply explains the i915_vma_move_to_active() additions. But to get the
> >shrink accurate we do need the context unpin on retirement and to do the
> >pin_count check in i915_vma_unbind() after waiting (rather than before,
> >as we currently do). However, the eviction code will not inspect the
> >active contexts objects yet (as it will continue to skip over the
> >ggtt->pin_count on them). The way I allowed ctx objects to be evicted was
> >to only keep the ctx->state pinned for the duration of the request
> >construction.
> >
> >Note that I think it should be a vma->unbind hook not an object level
> >one (it is i915_vma_unbind, without only a modicum of object level state
> >being modified in that function).
> >-Chris
> >
>
Nick Hoath Oct. 19, 2015, 10:54 a.m. UTC | #5
On 19/10/2015 10:48, Daniel Vetter wrote:
> On Fri, Oct 16, 2015 at 03:42:53PM +0100, Nick Hoath wrote:
>> On 08/10/2015 14:35, Chris Wilson wrote:
>>> On Wed, Oct 07, 2015 at 06:05:46PM +0200, Daniel Vetter wrote:
>>>> On Tue, Oct 06, 2015 at 03:52:02PM +0100, Nick Hoath wrote:
>>>>> Shovel all context related objects through the active queue and obj
>>>>> management.
>>>>>
>>>>> - Added callback in vma_(un)bind to add CPU (un)mapping at same time
>>>>>    if desired
>>>>> - Inserted LRC hw context & ringbuf to vma active list
>>>>>
>>>>> Issue: VIZ-4277
>>>>> Signed-off-by: Nick Hoath <nicholas.hoath@intel.com>
>>>>> ---
>>>>>   drivers/gpu/drm/i915/i915_drv.h         |  4 ++
>>>>>   drivers/gpu/drm/i915/i915_gem.c         |  3 ++
>>>>>   drivers/gpu/drm/i915/i915_gem_gtt.c     |  8 ++++
>>>>>   drivers/gpu/drm/i915/intel_lrc.c        | 28 +++++++++++--
>>>>>   drivers/gpu/drm/i915/intel_ringbuffer.c | 71 ++++++++++++++++++---------------
>>>>>   drivers/gpu/drm/i915/intel_ringbuffer.h |  3 --
>>>>>   6 files changed, 79 insertions(+), 38 deletions(-)
>>>>>
>>>>> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
>>>>> index 3d217f9..d660ee3 100644
>>>>> --- a/drivers/gpu/drm/i915/i915_drv.h
>>>>> +++ b/drivers/gpu/drm/i915/i915_drv.h
>>>>> @@ -2169,6 +2169,10 @@ struct drm_i915_gem_object {
>>>>>   			struct work_struct *work;
>>>>>   		} userptr;
>>>>>   	};
>>>>> +
>>>>> +	/** Support for automatic CPU side mapping of object */
>>>>> +	int (*mmap)(struct drm_i915_gem_object *obj, bool unmap);
>>>>
>>>> I don't think we need a map hook, that can still be done (if not done so
>> I disagree - this keeps the interface symmetrical. Searching for the do/undo
>> code paths and finding they are in difference places, called via different
>> routes makes code harder to follow.
>>>> already) by the callers. Also it's better to rename this to vma_unbind
>>>> (and it should be at the vma level I think) since there's other potential
>> Nope - the obj is created first, at a point where the map/unamp function can
>> be known. Moving the map/unmap to the vma would mean having a callback path
>> to the object just to set up the callback path when the vma is created
>> anonymously at some later point.
>
> One of the plans for this is to also use it for to-be-unpinned
> framebuffers (4k buffers are huge ...). And in that case the unmap hook
> only, and on the vma is the design we want. And since it also seems to
> accomodate all the other users I do think it's the right choice.

I refer you to these words found on the mail list. The may be familiar:

As a rule of thumb for refactoring and share infastructure we use the 
following recipe in drm:
- first driver implements things as straightforward as possible
- 2nd user copypastes
- 3rd one has the duty to figure out whether some refactoring is in order
   or not.

The code as I have written it works best and simplest for my use case. 
If someone else wants to refactor it differently to shoe horn in their 
use case, that's up to them.


>
> Like I said, explicit setup and lazy, implicit cleanup is kinda how a lot
> of things in gem work.

The most dangerous phrase in the language is ‘we’ve always done it this 
way.’ - Grace Hopper

> -Daniel
>
>>>> users. So explicit maping, lazy unmapping for the kmaps we need. That's
>>>> the same design we're using for binding objects into gpu address spaces.
>>>>
>>>> Also Chris Wilson has something similar, please align with him on the
>>>> precise design of this callback.
>>>
>>> We need the unbind hook because of the movement in the first patch (it
>>> is a separate issue, the code should work without it albeit having to
>>> remap the ring/context state more often). The changelog in this patch
>>> simply explains the i915_vma_move_to_active() additions. But to get the
>>> shrink accurate we do need the context unpin on retirement and to do the
>>> pin_count check in i915_vma_unbind() after waiting (rather than before,
>>> as we currently do). However, the eviction code will not inspect the
>>> active contexts objects yet (as it will continue to skip over the
>>> ggtt->pin_count on them). The way I allowed ctx objects to be evicted was
>>> to only keep the ctx->state pinned for the duration of the request
>>> construction.
>>>
>>> Note that I think it should be a vma->unbind hook not an object level
>>> one (it is i915_vma_unbind, without only a modicum of object level state
>>> being modified in that function).
>>> -Chris
>>>
>>
>
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 3d217f9..d660ee3 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2169,6 +2169,10 @@  struct drm_i915_gem_object {
 			struct work_struct *work;
 		} userptr;
 	};
+
+	/** Support for automatic CPU side mapping of object */
+	int (*mmap)(struct drm_i915_gem_object *obj, bool unmap);
+	void *mappable;
 };
 #define to_intel_bo(x) container_of(x, struct drm_i915_gem_object, base)
 
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index fc82171..56e0e00 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3262,6 +3262,9 @@  static int __i915_vma_unbind(struct i915_vma *vma, bool wait)
 	if (vma->pin_count)
 		return -EBUSY;
 
+	if (obj->mmap)
+		obj->mmap(obj, true);
+
 	BUG_ON(obj->pages == NULL);
 
 	if (wait) {
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 620d57e..786ec4b 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -3495,6 +3495,14 @@  int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level,
 
 	vma->bound |= bind_flags;
 
+	if (vma->obj->mmap) {
+		ret = vma->obj->mmap(vma->obj, false);
+		if (ret) {
+			i915_vma_unbind(vma);
+			return ret;
+		}
+	}
+
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index e8f5b6c..b807928 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -723,6 +723,18 @@  intel_logical_ring_advance_and_submit(struct drm_i915_gem_request *request)
 
 	intel_logical_ring_advance(request->ringbuf);
 
+	/* Push the hw context on to the active list */
+	i915_vma_move_to_active(
+			i915_gem_obj_to_ggtt(
+				request->ctx->engine[ring->id].state),
+			request);
+
+	/* Push the ringbuf on to the active list */
+	i915_vma_move_to_active(
+			i915_gem_obj_to_ggtt(
+			request->ctx->engine[ring->id].ringbuf->obj),
+			request);
+
 	request->tail = request->ringbuf->tail;
 
 	if (intel_ring_stopped(ring))
@@ -1006,10 +1018,15 @@  static int intel_lr_context_do_pin(struct intel_engine_cs *ring,
 	if (ret)
 		return ret;
 
-	ret = intel_pin_and_map_ringbuffer_obj(ring->dev, ringbuf);
+	ret = i915_gem_obj_ggtt_pin(ringbuf->obj, PAGE_SIZE,
+			PIN_MAPPABLE);
 	if (ret)
 		goto unpin_ctx_obj;
 
+	ret = i915_gem_object_set_to_gtt_domain(ringbuf->obj, true);
+	if (ret)
+		goto unpin_rb_obj;
+
 	ctx_obj->dirty = true;
 
 	/* Invalidate GuC TLB. */
@@ -1018,6 +1035,8 @@  static int intel_lr_context_do_pin(struct intel_engine_cs *ring,
 
 	return ret;
 
+unpin_rb_obj:
+	i915_gem_object_ggtt_unpin(ringbuf->obj);
 unpin_ctx_obj:
 	i915_gem_object_ggtt_unpin(ctx_obj);
 
@@ -1052,7 +1071,7 @@  void intel_lr_context_unpin(struct drm_i915_gem_request *rq)
 	if (ctx_obj) {
 		WARN_ON(!mutex_is_locked(&ring->dev->struct_mutex));
 		if (--rq->ctx->engine[ring->id].pin_count == 0) {
-			intel_unpin_ringbuffer_obj(ringbuf);
+			i915_gem_object_ggtt_unpin(ringbuf->obj);
 			i915_gem_object_ggtt_unpin(ctx_obj);
 		}
 	}
@@ -2369,7 +2388,7 @@  void intel_lr_context_free(struct intel_context *ctx)
 			struct intel_engine_cs *ring = ringbuf->ring;
 
 			if (ctx == ring->default_context) {
-				intel_unpin_ringbuffer_obj(ringbuf);
+				i915_gem_object_ggtt_unpin(ringbuf->obj);
 				i915_gem_object_ggtt_unpin(ctx_obj);
 			}
 			WARN_ON(ctx->engine[ring->id].pin_count);
@@ -2536,5 +2555,8 @@  void intel_lr_context_reset(struct drm_device *dev,
 
 		ringbuf->head = 0;
 		ringbuf->tail = 0;
+
+		i915_gem_object_ggtt_unpin(
+				ctx->engine[ring->id].state);
 	}
 }
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index c82c74c..79df8ca 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1958,38 +1958,35 @@  static int init_phys_status_page(struct intel_engine_cs *ring)
 	return 0;
 }
 
-void intel_unpin_ringbuffer_obj(struct intel_ringbuffer *ringbuf)
+static int intel_mmap_ringbuffer_obj(struct drm_i915_gem_object *obj,
+		bool unmap)
 {
-	iounmap(ringbuf->virtual_start);
-	ringbuf->virtual_start = NULL;
-	i915_gem_object_ggtt_unpin(ringbuf->obj);
-}
-
-int intel_pin_and_map_ringbuffer_obj(struct drm_device *dev,
-				     struct intel_ringbuffer *ringbuf)
-{
-	struct drm_i915_private *dev_priv = to_i915(dev);
-	struct drm_i915_gem_object *obj = ringbuf->obj;
-	int ret;
-
-	ret = i915_gem_obj_ggtt_pin(obj, PAGE_SIZE, PIN_MAPPABLE);
-	if (ret)
-		return ret;
-
-	ret = i915_gem_object_set_to_gtt_domain(obj, true);
-	if (ret) {
-		i915_gem_object_ggtt_unpin(obj);
-		return ret;
-	}
-
-	ringbuf->virtual_start = ioremap_wc(dev_priv->gtt.mappable_base +
-			i915_gem_obj_ggtt_offset(obj), ringbuf->size);
-	if (ringbuf->virtual_start == NULL) {
-		i915_gem_object_ggtt_unpin(obj);
-		return -EINVAL;
+	int ret = 0;
+	struct intel_ringbuffer *ringbuf =
+	(struct intel_ringbuffer *)obj->mappable;
+
+	if (!unmap) {
+		struct drm_device *dev = ringbuf->ring->dev;
+		struct drm_i915_private *dev_priv = to_i915(dev);
+
+		WARN_ON(ringbuf->virtual_start != NULL);
+		if (ringbuf->virtual_start == NULL) {
+			ringbuf->virtual_start = ioremap_wc(
+					dev_priv->gtt.mappable_base +
+					i915_gem_obj_ggtt_offset(obj),
+					ringbuf->size);
+			if (ringbuf->virtual_start == NULL) {
+				i915_gem_object_ggtt_unpin(obj);
+				return -EINVAL;
+			}
+		}
+	} else {
+		if (!i915_gem_obj_is_pinned(ringbuf->obj)) {
+			iounmap(ringbuf->virtual_start);
+			ringbuf->virtual_start = NULL;
+		}
 	}
-
-	return 0;
+	return ret;
 }
 
 static void intel_destroy_ringbuffer_obj(struct intel_ringbuffer *ringbuf)
@@ -2016,6 +2013,9 @@  static int intel_alloc_ringbuffer_obj(struct drm_device *dev,
 
 	ringbuf->obj = obj;
 
+	obj->mmap = intel_mmap_ringbuffer_obj;
+	obj->mappable = ringbuf;
+
 	return 0;
 }
 
@@ -2094,7 +2094,7 @@  static int intel_init_ring_buffer(struct drm_device *dev,
 			goto error;
 	}
 
-	ret = intel_pin_and_map_ringbuffer_obj(dev, ringbuf);
+	ret = i915_gem_obj_ggtt_pin(ringbuf->obj, PAGE_SIZE, PIN_MAPPABLE);
 	if (ret) {
 		DRM_ERROR("Failed to pin and map ringbuffer %s: %d\n",
 				ring->name, ret);
@@ -2102,12 +2102,19 @@  static int intel_init_ring_buffer(struct drm_device *dev,
 		goto error;
 	}
 
+	ret = i915_gem_object_set_to_gtt_domain(ringbuf->obj, true);
+	if (ret)
+		goto error_unpin;
+
 	ret = i915_cmd_parser_init_ring(ring);
 	if (ret)
 		goto error;
 
 	return 0;
 
+error_unpin:
+	i915_gem_object_ggtt_unpin(ringbuf->obj);
+	intel_destroy_ringbuffer_obj(ringbuf);
 error:
 	intel_ringbuffer_free(ringbuf);
 	ring->buffer = NULL;
@@ -2126,7 +2133,7 @@  void intel_cleanup_ring_buffer(struct intel_engine_cs *ring)
 	intel_stop_ring_buffer(ring);
 	WARN_ON(!IS_GEN2(ring->dev) && (I915_READ_MODE(ring) & MODE_IDLE) == 0);
 
-	intel_unpin_ringbuffer_obj(ring->buffer);
+	i915_gem_object_ggtt_unpin(ring->buffer->obj);
 	intel_ringbuffer_free(ring->buffer);
 	ring->buffer = NULL;
 
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index d99b167..8daaf99 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -421,9 +421,6 @@  intel_write_status_page(struct intel_engine_cs *ring,
 
 struct intel_ringbuffer *
 intel_engine_create_ringbuffer(struct intel_engine_cs *engine, int size);
-int intel_pin_and_map_ringbuffer_obj(struct drm_device *dev,
-				     struct intel_ringbuffer *ringbuf);
-void intel_unpin_ringbuffer_obj(struct intel_ringbuffer *ringbuf);
 void intel_ringbuffer_free(struct intel_ringbuffer *ring);
 
 void intel_stop_ring_buffer(struct intel_engine_cs *ring);