diff mbox

drm/i915: vma/ppgtt lifetime rules

Message ID 1406628485-2415-1-git-send-email-michel.thierry@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Michel Thierry July 29, 2014, 10:08 a.m. UTC
VMAs should take a reference of the address space they use.

Now, when the fd is closed, it will release the ref that the context was
holding, but it will still be referenced by any vmas that are still
active.

ppgtt_release() should then only be called when the last thing referencing
it releases the ref, and it can just call the base cleanup and free the
ppgtt.

Signed-off-by: Michel Thierry <michel.thierry@intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h         |  2 ++
 drivers/gpu/drm/i915/i915_gem.c         |  8 ++++++++
 drivers/gpu/drm/i915/i915_gem_context.c | 23 +++--------------------
 drivers/gpu/drm/i915/i915_gem_gtt.c     |  5 +++++
 4 files changed, 18 insertions(+), 20 deletions(-)

Comments

Daniel Vetter July 29, 2014, 11:06 a.m. UTC | #1
On Tue, Jul 29, 2014 at 11:08:05AM +0100, Michel Thierry wrote:
> VMAs should take a reference of the address space they use.
> 
> Now, when the fd is closed, it will release the ref that the context was
> holding, but it will still be referenced by any vmas that are still
> active.
> 
> ppgtt_release() should then only be called when the last thing referencing
> it releases the ref, and it can just call the base cleanup and free the
> ppgtt.
> 
> Signed-off-by: Michel Thierry <michel.thierry@intel.com>
> ---
>  drivers/gpu/drm/i915/i915_drv.h         |  2 ++
>  drivers/gpu/drm/i915/i915_gem.c         |  8 ++++++++
>  drivers/gpu/drm/i915/i915_gem_context.c | 23 +++--------------------
>  drivers/gpu/drm/i915/i915_gem_gtt.c     |  5 +++++
>  4 files changed, 18 insertions(+), 20 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 2acc03f..a879a93 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -2495,7 +2495,9 @@ void i915_gem_object_ggtt_unpin(struct drm_i915_gem_object *obj);
>  
>  /* i915_gem_context.c */
>  #define ctx_to_ppgtt(ctx) container_of((ctx)->vm, struct i915_hw_ppgtt, base)
> +#define vm_to_ppgtt(vm) container_of(vm, struct i915_hw_ppgtt, base)
>  int __must_check i915_gem_context_init(struct drm_device *dev);
> +void ppgtt_release(struct kref *kref);
>  void i915_gem_context_fini(struct drm_device *dev);
>  void i915_gem_context_reset(struct drm_device *dev);
>  int i915_gem_context_open(struct drm_device *dev, struct drm_file *file);
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index dcd8d7b..25a32b9 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -4499,12 +4499,20 @@ struct i915_vma *i915_gem_obj_to_vma(struct drm_i915_gem_object *obj,
>  
>  void i915_gem_vma_destroy(struct i915_vma *vma)
>  {
> +	struct i915_address_space *vm = NULL;
> +	struct i915_hw_ppgtt *ppgtt = NULL;
>  	WARN_ON(vma->node.allocated);
>  
>  	/* Keep the vma as a placeholder in the execbuffer reservation lists */
>  	if (!list_empty(&vma->exec_list))
>  		return;
>  
> +	vm = vma->vm;
> +	ppgtt = vm_to_ppgtt(vm);
> +
> +	if (ppgtt)
> +		kref_put(&ppgtt->ref, ppgtt_release);

Hm, this has the risk that we leave unwanted vmas around for a bit longer.
They will get cleaned up though when the final object references goes
away, so the leak is fairly restricted. And currently we don't have a
shrinker to just whack out vma objects even ...

It's definitely a much neater solution than what I had in mind with moving
vmas to full-blown active tracking like we do objects. So I'm tempted to
go with, but have a bit a lurking feeling that I'm missing something.

Chris?

Cheers, Daniel

> +
>  	list_del(&vma->vma_link);
>  
>  	kfree(vma);
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
> index 5b5af6c..59272f9 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
> @@ -108,30 +108,13 @@ static void do_ppgtt_cleanup(struct i915_hw_ppgtt *ppgtt)
>  		return;
>  	}
>  
> -	/*
> -	 * Make sure vmas are unbound before we take down the drm_mm
> -	 *
> -	 * FIXME: Proper refcounting should take care of this, this shouldn't be
> -	 * needed at all.
> -	 */
> -	if (!list_empty(&vm->active_list)) {
> -		struct i915_vma *vma;
> -
> -		list_for_each_entry(vma, &vm->active_list, mm_list)
> -			if (WARN_ON(list_empty(&vma->vma_link) ||
> -				    list_is_singular(&vma->vma_link)))
> -				break;
> -
> -		i915_gem_evict_vm(&ppgtt->base, true);
> -	} else {
> -		i915_gem_retire_requests(dev);
> -		i915_gem_evict_vm(&ppgtt->base, false);
> -	}
> +	/* vmas should already be unbound */
> +	WARN_ON(!list_empty(&vm->active_list));
>  
>  	ppgtt->base.cleanup(&ppgtt->base);
>  }
>  
> -static void ppgtt_release(struct kref *kref)
> +void ppgtt_release(struct kref *kref)
>  {
>  	struct i915_hw_ppgtt *ppgtt =
>  		container_of(kref, struct i915_hw_ppgtt, ref);
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> index 1411613..90c3d0f 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> @@ -2159,10 +2159,15 @@ i915_gem_obj_lookup_or_create_vma(struct drm_i915_gem_object *obj,
>  				  struct i915_address_space *vm)
>  {
>  	struct i915_vma *vma;
> +	struct i915_hw_ppgtt *ppgtt = NULL;
>  
>  	vma = i915_gem_obj_to_vma(obj, vm);
>  	if (!vma)
>  		vma = __i915_gem_vma_create(obj, vm);
>  
> +	ppgtt = vm_to_ppgtt(vm);
> +	if (ppgtt)
> +		kref_get(&ppgtt->ref);
> +
>  	return vma;
>  }
> -- 
> 1.9.0
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Chris Wilson July 29, 2014, 11:19 a.m. UTC | #2
On Tue, Jul 29, 2014 at 01:06:40PM +0200, Daniel Vetter wrote:
> On Tue, Jul 29, 2014 at 11:08:05AM +0100, Michel Thierry wrote:
> > VMAs should take a reference of the address space they use.
> > 
> > Now, when the fd is closed, it will release the ref that the context was
> > holding, but it will still be referenced by any vmas that are still
> > active.
> > 
> > ppgtt_release() should then only be called when the last thing referencing
> > it releases the ref, and it can just call the base cleanup and free the
> > ppgtt.
> > 
> > Signed-off-by: Michel Thierry <michel.thierry@intel.com>
> > ---
> >  drivers/gpu/drm/i915/i915_drv.h         |  2 ++
> >  drivers/gpu/drm/i915/i915_gem.c         |  8 ++++++++
> >  drivers/gpu/drm/i915/i915_gem_context.c | 23 +++--------------------
> >  drivers/gpu/drm/i915/i915_gem_gtt.c     |  5 +++++
> >  4 files changed, 18 insertions(+), 20 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> > index 2acc03f..a879a93 100644
> > --- a/drivers/gpu/drm/i915/i915_drv.h
> > +++ b/drivers/gpu/drm/i915/i915_drv.h
> > @@ -2495,7 +2495,9 @@ void i915_gem_object_ggtt_unpin(struct drm_i915_gem_object *obj);
> >  
> >  /* i915_gem_context.c */
> >  #define ctx_to_ppgtt(ctx) container_of((ctx)->vm, struct i915_hw_ppgtt, base)
> > +#define vm_to_ppgtt(vm) container_of(vm, struct i915_hw_ppgtt, base)
> >  int __must_check i915_gem_context_init(struct drm_device *dev);
> > +void ppgtt_release(struct kref *kref);
> >  void i915_gem_context_fini(struct drm_device *dev);
> >  void i915_gem_context_reset(struct drm_device *dev);
> >  int i915_gem_context_open(struct drm_device *dev, struct drm_file *file);
> > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> > index dcd8d7b..25a32b9 100644
> > --- a/drivers/gpu/drm/i915/i915_gem.c
> > +++ b/drivers/gpu/drm/i915/i915_gem.c
> > @@ -4499,12 +4499,20 @@ struct i915_vma *i915_gem_obj_to_vma(struct drm_i915_gem_object *obj,
> >  
> >  void i915_gem_vma_destroy(struct i915_vma *vma)
> >  {
> > +	struct i915_address_space *vm = NULL;
> > +	struct i915_hw_ppgtt *ppgtt = NULL;
> >  	WARN_ON(vma->node.allocated);
> >  
> >  	/* Keep the vma as a placeholder in the execbuffer reservation lists */
> >  	if (!list_empty(&vma->exec_list))
> >  		return;
> >  
> > +	vm = vma->vm;
> > +	ppgtt = vm_to_ppgtt(vm);
> > +
> > +	if (ppgtt)
> > +		kref_put(&ppgtt->ref, ppgtt_release);
> 
> Hm, this has the risk that we leave unwanted vmas around for a bit longer.
> They will get cleaned up though when the final object references goes
> away, so the leak is fairly restricted. And currently we don't have a
> shrinker to just whack out vma objects even ...
> 
> It's definitely a much neater solution than what I had in mind with moving
> vmas to full-blown active tracking like we do objects. So I'm tempted to
> go with, but have a bit a lurking feeling that I'm missing something.
> 
> Chris?

I don't think that only taking the reference for whilst the vma is
active would add much extra code or complexity and being consistent
with the existing active tracking has the advantages you mention.

If we could clean up the vma handling in move_to_inactive that would
remove a major wart all by itself.
-Chris
Ben Widawsky July 29, 2014, 6:32 p.m. UTC | #3
On Tue, Jul 29, 2014 at 11:08:05AM +0100, Michel Thierry wrote:
> VMAs should take a reference of the address space they use.
> 
> Now, when the fd is closed, it will release the ref that the context was
> holding, but it will still be referenced by any vmas that are still
> active.
> 
> ppgtt_release() should then only be called when the last thing referencing
> it releases the ref, and it can just call the base cleanup and free the
> ppgtt.
> 
> Signed-off-by: Michel Thierry <michel.thierry@intel.com>
> ---
>  drivers/gpu/drm/i915/i915_drv.h         |  2 ++
>  drivers/gpu/drm/i915/i915_gem.c         |  8 ++++++++
>  drivers/gpu/drm/i915/i915_gem_context.c | 23 +++--------------------
>  drivers/gpu/drm/i915/i915_gem_gtt.c     |  5 +++++
>  4 files changed, 18 insertions(+), 20 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 2acc03f..a879a93 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -2495,7 +2495,9 @@ void i915_gem_object_ggtt_unpin(struct drm_i915_gem_object *obj);
>  
>  /* i915_gem_context.c */
>  #define ctx_to_ppgtt(ctx) container_of((ctx)->vm, struct i915_hw_ppgtt, base)
> +#define vm_to_ppgtt(vm) container_of(vm, struct i915_hw_ppgtt, base)
>  int __must_check i915_gem_context_init(struct drm_device *dev);
> +void ppgtt_release(struct kref *kref);
>  void i915_gem_context_fini(struct drm_device *dev);
>  void i915_gem_context_reset(struct drm_device *dev);
>  int i915_gem_context_open(struct drm_device *dev, struct drm_file *file);
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index dcd8d7b..25a32b9 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -4499,12 +4499,20 @@ struct i915_vma *i915_gem_obj_to_vma(struct drm_i915_gem_object *obj,
>  
>  void i915_gem_vma_destroy(struct i915_vma *vma)
>  {
> +	struct i915_address_space *vm = NULL;
> +	struct i915_hw_ppgtt *ppgtt = NULL;
>  	WARN_ON(vma->node.allocated);
>  
>  	/* Keep the vma as a placeholder in the execbuffer reservation lists */
>  	if (!list_empty(&vma->exec_list))
>  		return;
>  
> +	vm = vma->vm;
> +	ppgtt = vm_to_ppgtt(vm);
> +
> +	if (ppgtt)
> +		kref_put(&ppgtt->ref, ppgtt_release);
> +
>  	list_del(&vma->vma_link);
>  
>  	kfree(vma);
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
> index 5b5af6c..59272f9 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
> @@ -108,30 +108,13 @@ static void do_ppgtt_cleanup(struct i915_hw_ppgtt *ppgtt)
>  		return;
>  	}
>  
> -	/*
> -	 * Make sure vmas are unbound before we take down the drm_mm
> -	 *
> -	 * FIXME: Proper refcounting should take care of this, this shouldn't be
> -	 * needed at all.
> -	 */
> -	if (!list_empty(&vm->active_list)) {
> -		struct i915_vma *vma;
> -
> -		list_for_each_entry(vma, &vm->active_list, mm_list)
> -			if (WARN_ON(list_empty(&vma->vma_link) ||
> -				    list_is_singular(&vma->vma_link)))
> -				break;
> -
> -		i915_gem_evict_vm(&ppgtt->base, true);
> -	} else {
> -		i915_gem_retire_requests(dev);
> -		i915_gem_evict_vm(&ppgtt->base, false);
> -	}
> +	/* vmas should already be unbound */
> +	WARN_ON(!list_empty(&vm->active_list));
>  
>  	ppgtt->base.cleanup(&ppgtt->base);
>  }
>  
> -static void ppgtt_release(struct kref *kref)
> +void ppgtt_release(struct kref *kref)
>  {
>  	struct i915_hw_ppgtt *ppgtt =
>  		container_of(kref, struct i915_hw_ppgtt, ref);
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> index 1411613..90c3d0f 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> @@ -2159,10 +2159,15 @@ i915_gem_obj_lookup_or_create_vma(struct drm_i915_gem_object *obj,
>  				  struct i915_address_space *vm)
>  {
>  	struct i915_vma *vma;
> +	struct i915_hw_ppgtt *ppgtt = NULL;
>  
>  	vma = i915_gem_obj_to_vma(obj, vm);
>  	if (!vma)
>  		vma = __i915_gem_vma_create(obj, vm);
>  
> +	ppgtt = vm_to_ppgtt(vm);
> +	if (ppgtt)
> +		kref_get(&ppgtt->ref);
> +
>  	return vma;
>  }

Seems like a reasonable fix to me, and it's simpler than Chris' request
to ref when active. Logically his makes more sense, but it's not quite
as simple to review. Actually active isn't the right thing to track IMO,
bound is what you want to track. I think if anything, do that one as a
patch on top, it does address the problem mentioned below. I also wonder
given the recent reset lolz how this behaves there; and I'd make sure to
thoroughly test the various reset/hang conditions.

Also, this puts a hard limit on the maximum number of VMAs in an address
space based on sizeof(atomic_t). (unless you do what I said above) I
doubt it's a problem in the near future, but it should be documented
somewhere and I hope kref will explode loudly if you overflow it.

For future safety, I'd recommend dropping vm->ctx at this point. It
shouldn't be hard at all to get rid of (quick grep shows only debugfs).
Ben Widawsky July 29, 2014, 6:44 p.m. UTC | #4
On Tue, Jul 29, 2014 at 11:32:07AM -0700, Ben Widawsky wrote:
> On Tue, Jul 29, 2014 at 11:08:05AM +0100, Michel Thierry wrote:
> > VMAs should take a reference of the address space they use.
> > 
> > Now, when the fd is closed, it will release the ref that the context was
> > holding, but it will still be referenced by any vmas that are still
> > active.
> > 
> > ppgtt_release() should then only be called when the last thing referencing
> > it releases the ref, and it can just call the base cleanup and free the
> > ppgtt.
> > 
> > Signed-off-by: Michel Thierry <michel.thierry@intel.com>
> > ---
> >  drivers/gpu/drm/i915/i915_drv.h         |  2 ++
> >  drivers/gpu/drm/i915/i915_gem.c         |  8 ++++++++
> >  drivers/gpu/drm/i915/i915_gem_context.c | 23 +++--------------------
> >  drivers/gpu/drm/i915/i915_gem_gtt.c     |  5 +++++
> >  4 files changed, 18 insertions(+), 20 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> > index 2acc03f..a879a93 100644
> > --- a/drivers/gpu/drm/i915/i915_drv.h
> > +++ b/drivers/gpu/drm/i915/i915_drv.h
> > @@ -2495,7 +2495,9 @@ void i915_gem_object_ggtt_unpin(struct drm_i915_gem_object *obj);
> >  
> >  /* i915_gem_context.c */
> >  #define ctx_to_ppgtt(ctx) container_of((ctx)->vm, struct i915_hw_ppgtt, base)
> > +#define vm_to_ppgtt(vm) container_of(vm, struct i915_hw_ppgtt, base)
> >  int __must_check i915_gem_context_init(struct drm_device *dev);
> > +void ppgtt_release(struct kref *kref);
> >  void i915_gem_context_fini(struct drm_device *dev);
> >  void i915_gem_context_reset(struct drm_device *dev);
> >  int i915_gem_context_open(struct drm_device *dev, struct drm_file *file);
> > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> > index dcd8d7b..25a32b9 100644
> > --- a/drivers/gpu/drm/i915/i915_gem.c
> > +++ b/drivers/gpu/drm/i915/i915_gem.c
> > @@ -4499,12 +4499,20 @@ struct i915_vma *i915_gem_obj_to_vma(struct drm_i915_gem_object *obj,
> >  
> >  void i915_gem_vma_destroy(struct i915_vma *vma)
> >  {
> > +	struct i915_address_space *vm = NULL;
> > +	struct i915_hw_ppgtt *ppgtt = NULL;
> >  	WARN_ON(vma->node.allocated);
> >  
> >  	/* Keep the vma as a placeholder in the execbuffer reservation lists */
> >  	if (!list_empty(&vma->exec_list))
> >  		return;
> >  
> > +	vm = vma->vm;
> > +	ppgtt = vm_to_ppgtt(vm);
> > +
> > +	if (ppgtt)
> > +		kref_put(&ppgtt->ref, ppgtt_release);
> > +
> >  	list_del(&vma->vma_link);
> >  
> >  	kfree(vma);
> > diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
> > index 5b5af6c..59272f9 100644
> > --- a/drivers/gpu/drm/i915/i915_gem_context.c
> > +++ b/drivers/gpu/drm/i915/i915_gem_context.c
> > @@ -108,30 +108,13 @@ static void do_ppgtt_cleanup(struct i915_hw_ppgtt *ppgtt)
> >  		return;
> >  	}
> >  
> > -	/*
> > -	 * Make sure vmas are unbound before we take down the drm_mm
> > -	 *
> > -	 * FIXME: Proper refcounting should take care of this, this shouldn't be
> > -	 * needed at all.
> > -	 */
> > -	if (!list_empty(&vm->active_list)) {
> > -		struct i915_vma *vma;
> > -
> > -		list_for_each_entry(vma, &vm->active_list, mm_list)
> > -			if (WARN_ON(list_empty(&vma->vma_link) ||
> > -				    list_is_singular(&vma->vma_link)))
> > -				break;
> > -
> > -		i915_gem_evict_vm(&ppgtt->base, true);
> > -	} else {
> > -		i915_gem_retire_requests(dev);
> > -		i915_gem_evict_vm(&ppgtt->base, false);
> > -	}
> > +	/* vmas should already be unbound */
> > +	WARN_ON(!list_empty(&vm->active_list));
> >  
> >  	ppgtt->base.cleanup(&ppgtt->base);
> >  }
> >  
> > -static void ppgtt_release(struct kref *kref)
> > +void ppgtt_release(struct kref *kref)
> >  {
> >  	struct i915_hw_ppgtt *ppgtt =
> >  		container_of(kref, struct i915_hw_ppgtt, ref);
> > diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> > index 1411613..90c3d0f 100644
> > --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> > +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> > @@ -2159,10 +2159,15 @@ i915_gem_obj_lookup_or_create_vma(struct drm_i915_gem_object *obj,
> >  				  struct i915_address_space *vm)
> >  {
> >  	struct i915_vma *vma;
> > +	struct i915_hw_ppgtt *ppgtt = NULL;
> >  
> >  	vma = i915_gem_obj_to_vma(obj, vm);
> >  	if (!vma)
> >  		vma = __i915_gem_vma_create(obj, vm);
> >  
> > +	ppgtt = vm_to_ppgtt(vm);
> > +	if (ppgtt)
> > +		kref_get(&ppgtt->ref);
> > +
> >  	return vma;
> >  }
> 
> Seems like a reasonable fix to me, and it's simpler than Chris' request
> to ref when active. Logically his makes more sense, but it's not quite
> as simple to review. Actually active isn't the right thing to track IMO,
> bound is what you want to track.

I didn't mean to imply this is anything but a semantic difference with
the driver as it exists today.

> I think if anything, do that one as a
> patch on top, it does address the problem mentioned below. I also wonder
> given the recent reset lolz how this behaves there; and I'd make sure to
> thoroughly test the various reset/hang conditions.
> 
> Also, this puts a hard limit on the maximum number of VMAs in an address
> space based on sizeof(atomic_t). (unless you do what I said above) I
> doubt it's a problem in the near future, but it should be documented
> somewhere and I hope kref will explode loudly if you overflow it.

Also, on second thought, this is a don't care.

> 
> For future safety, I'd recommend dropping vm->ctx at this point. It
> shouldn't be hard at all to get rid of (quick grep shows only debugfs).
Daniel Vetter July 29, 2014, 7:19 p.m. UTC | #5
On Tue, Jul 29, 2014 at 11:44:51AM -0700, Ben Widawsky wrote:
> On Tue, Jul 29, 2014 at 11:32:07AM -0700, Ben Widawsky wrote:
> > On Tue, Jul 29, 2014 at 11:08:05AM +0100, Michel Thierry wrote:
> > > VMAs should take a reference of the address space they use.
> > > 
> > > Now, when the fd is closed, it will release the ref that the context was
> > > holding, but it will still be referenced by any vmas that are still
> > > active.
> > > 
> > > ppgtt_release() should then only be called when the last thing referencing
> > > it releases the ref, and it can just call the base cleanup and free the
> > > ppgtt.
> > > 
> > > Signed-off-by: Michel Thierry <michel.thierry@intel.com>
> > > ---
> > >  drivers/gpu/drm/i915/i915_drv.h         |  2 ++
> > >  drivers/gpu/drm/i915/i915_gem.c         |  8 ++++++++
> > >  drivers/gpu/drm/i915/i915_gem_context.c | 23 +++--------------------
> > >  drivers/gpu/drm/i915/i915_gem_gtt.c     |  5 +++++
> > >  4 files changed, 18 insertions(+), 20 deletions(-)
> > > 
> > > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> > > index 2acc03f..a879a93 100644
> > > --- a/drivers/gpu/drm/i915/i915_drv.h
> > > +++ b/drivers/gpu/drm/i915/i915_drv.h
> > > @@ -2495,7 +2495,9 @@ void i915_gem_object_ggtt_unpin(struct drm_i915_gem_object *obj);
> > >  
> > >  /* i915_gem_context.c */
> > >  #define ctx_to_ppgtt(ctx) container_of((ctx)->vm, struct i915_hw_ppgtt, base)
> > > +#define vm_to_ppgtt(vm) container_of(vm, struct i915_hw_ppgtt, base)
> > >  int __must_check i915_gem_context_init(struct drm_device *dev);
> > > +void ppgtt_release(struct kref *kref);
> > >  void i915_gem_context_fini(struct drm_device *dev);
> > >  void i915_gem_context_reset(struct drm_device *dev);
> > >  int i915_gem_context_open(struct drm_device *dev, struct drm_file *file);
> > > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> > > index dcd8d7b..25a32b9 100644
> > > --- a/drivers/gpu/drm/i915/i915_gem.c
> > > +++ b/drivers/gpu/drm/i915/i915_gem.c
> > > @@ -4499,12 +4499,20 @@ struct i915_vma *i915_gem_obj_to_vma(struct drm_i915_gem_object *obj,
> > >  
> > >  void i915_gem_vma_destroy(struct i915_vma *vma)
> > >  {
> > > +	struct i915_address_space *vm = NULL;
> > > +	struct i915_hw_ppgtt *ppgtt = NULL;
> > >  	WARN_ON(vma->node.allocated);
> > >  
> > >  	/* Keep the vma as a placeholder in the execbuffer reservation lists */
> > >  	if (!list_empty(&vma->exec_list))
> > >  		return;
> > >  
> > > +	vm = vma->vm;
> > > +	ppgtt = vm_to_ppgtt(vm);
> > > +
> > > +	if (ppgtt)
> > > +		kref_put(&ppgtt->ref, ppgtt_release);
> > > +
> > >  	list_del(&vma->vma_link);
> > >  
> > >  	kfree(vma);
> > > diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
> > > index 5b5af6c..59272f9 100644
> > > --- a/drivers/gpu/drm/i915/i915_gem_context.c
> > > +++ b/drivers/gpu/drm/i915/i915_gem_context.c
> > > @@ -108,30 +108,13 @@ static void do_ppgtt_cleanup(struct i915_hw_ppgtt *ppgtt)
> > >  		return;
> > >  	}
> > >  
> > > -	/*
> > > -	 * Make sure vmas are unbound before we take down the drm_mm
> > > -	 *
> > > -	 * FIXME: Proper refcounting should take care of this, this shouldn't be
> > > -	 * needed at all.
> > > -	 */
> > > -	if (!list_empty(&vm->active_list)) {
> > > -		struct i915_vma *vma;
> > > -
> > > -		list_for_each_entry(vma, &vm->active_list, mm_list)
> > > -			if (WARN_ON(list_empty(&vma->vma_link) ||
> > > -				    list_is_singular(&vma->vma_link)))
> > > -				break;
> > > -
> > > -		i915_gem_evict_vm(&ppgtt->base, true);
> > > -	} else {
> > > -		i915_gem_retire_requests(dev);
> > > -		i915_gem_evict_vm(&ppgtt->base, false);
> > > -	}
> > > +	/* vmas should already be unbound */
> > > +	WARN_ON(!list_empty(&vm->active_list));
> > >  
> > >  	ppgtt->base.cleanup(&ppgtt->base);
> > >  }
> > >  
> > > -static void ppgtt_release(struct kref *kref)
> > > +void ppgtt_release(struct kref *kref)
> > >  {
> > >  	struct i915_hw_ppgtt *ppgtt =
> > >  		container_of(kref, struct i915_hw_ppgtt, ref);
> > > diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> > > index 1411613..90c3d0f 100644
> > > --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> > > +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> > > @@ -2159,10 +2159,15 @@ i915_gem_obj_lookup_or_create_vma(struct drm_i915_gem_object *obj,
> > >  				  struct i915_address_space *vm)
> > >  {
> > >  	struct i915_vma *vma;
> > > +	struct i915_hw_ppgtt *ppgtt = NULL;
> > >  
> > >  	vma = i915_gem_obj_to_vma(obj, vm);
> > >  	if (!vma)
> > >  		vma = __i915_gem_vma_create(obj, vm);
> > >  
> > > +	ppgtt = vm_to_ppgtt(vm);
> > > +	if (ppgtt)
> > > +		kref_get(&ppgtt->ref);
> > > +
> > >  	return vma;
> > >  }
> > 
> > Seems like a reasonable fix to me, and it's simpler than Chris' request
> > to ref when active. Logically his makes more sense, but it's not quite
> > as simple to review. Actually active isn't the right thing to track IMO,
> > bound is what you want to track.
> 
> I didn't mean to imply this is anything but a semantic difference with
> the driver as it exists today.

Well using active would be what we want, with bound we'd have a reference
loop: ppgtt keeps the vmas bound and the vmas keeps the ppgtt around.

But active isn't the right thing either: Currently we keep the active
state per-bo, but have per-vma lrus, so a mix of obj/vma tracking. We've
had lots of discussions about this and imo neither of the possible active
approaches is really worth the fuss of converting to it, now that the code
is in.

Assume I don't spot any hidden tricky bits after a good night's worth of
sleep (or anyone else) I'll pull this in tomorrow. So please keep poking
holes ...

> > I think if anything, do that one as a
> > patch on top, it does address the problem mentioned below. I also wonder
> > given the recent reset lolz how this behaves there; and I'd make sure to
> > thoroughly test the various reset/hang conditions.
> > 
> > Also, this puts a hard limit on the maximum number of VMAs in an address
> > space based on sizeof(atomic_t). (unless you do what I said above) I
> > doubt it's a problem in the near future, but it should be documented
> > somewhere and I hope kref will explode loudly if you overflow it.
> 
> Also, on second thought, this is a don't care.

Yeah, the kernel generally assumes that an atomic_t or long is
sufficiently big for a refcount for all of memory. Maybe giant 32bit pae
boxes violated that, but meh.

> > For future safety, I'd recommend dropping vm->ctx at this point. It
> > shouldn't be hard at all to get rid of (quick grep shows only debugfs).

Hm, that's a good point since that pointers is a loop in the refcounting.
So if we really don't need it then I agree we should ditch it. Michel,
can you please throw that follow-up patch on top if that's all there is to
it indeed?

Thanks, Daniel
Michel Thierry July 30, 2014, 8:19 a.m. UTC | #6
> -----Original Message-----
> From: Daniel Vetter [mailto:daniel.vetter@ffwll.ch] On Behalf Of Daniel
> Vetter
> Sent: Tuesday, July 29, 2014 8:20 PM
> To: Ben Widawsky
> Cc: Thierry, Michel; intel-gfx@lists.freedesktop.org
> Subject: Re: [Intel-gfx] [PATCH] drm/i915: vma/ppgtt lifetime rules
> 
> On Tue, Jul 29, 2014 at 11:44:51AM -0700, Ben Widawsky wrote:
> > On Tue, Jul 29, 2014 at 11:32:07AM -0700, Ben Widawsky wrote:
> > > On Tue, Jul 29, 2014 at 11:08:05AM +0100, Michel Thierry wrote:
> > > > VMAs should take a reference of the address space they use.
> > > >
> > > > Now, when the fd is closed, it will release the ref that the context
was
> > > > holding, but it will still be referenced by any vmas that are still
> > > > active.
> > > >
> > > > ppgtt_release() should then only be called when the last thing
> referencing
> > > > it releases the ref, and it can just call the base cleanup and free
the
> > > > ppgtt.
> > > >
> > > > Signed-off-by: Michel Thierry <michel.thierry@intel.com>
> > > > ---
> > > >  drivers/gpu/drm/i915/i915_drv.h         |  2 ++
> > > >  drivers/gpu/drm/i915/i915_gem.c         |  8 ++++++++
> > > >  drivers/gpu/drm/i915/i915_gem_context.c | 23
+++--------------------
> > > >  drivers/gpu/drm/i915/i915_gem_gtt.c     |  5 +++++
> > > >  4 files changed, 18 insertions(+), 20 deletions(-)
> > > >
> > > > diff --git a/drivers/gpu/drm/i915/i915_drv.h
> b/drivers/gpu/drm/i915/i915_drv.h
> > > > index 2acc03f..a879a93 100644
> > > > --- a/drivers/gpu/drm/i915/i915_drv.h
> > > > +++ b/drivers/gpu/drm/i915/i915_drv.h
> > > > @@ -2495,7 +2495,9 @@ void i915_gem_object_ggtt_unpin(struct
> drm_i915_gem_object *obj);
> > > >
> > > >  /* i915_gem_context.c */
> > > >  #define ctx_to_ppgtt(ctx) container_of((ctx)->vm, struct
> i915_hw_ppgtt, base)
> > > > +#define vm_to_ppgtt(vm) container_of(vm, struct i915_hw_ppgtt,
> base)
> > > >  int __must_check i915_gem_context_init(struct drm_device *dev);
> > > > +void ppgtt_release(struct kref *kref);
> > > >  void i915_gem_context_fini(struct drm_device *dev);
> > > >  void i915_gem_context_reset(struct drm_device *dev);
> > > >  int i915_gem_context_open(struct drm_device *dev, struct drm_file
> *file);
> > > > diff --git a/drivers/gpu/drm/i915/i915_gem.c
> b/drivers/gpu/drm/i915/i915_gem.c
> > > > index dcd8d7b..25a32b9 100644
> > > > --- a/drivers/gpu/drm/i915/i915_gem.c
> > > > +++ b/drivers/gpu/drm/i915/i915_gem.c
> > > > @@ -4499,12 +4499,20 @@ struct i915_vma
> *i915_gem_obj_to_vma(struct drm_i915_gem_object *obj,
> > > >
> > > >  void i915_gem_vma_destroy(struct i915_vma *vma)
> > > >  {
> > > > +	struct i915_address_space *vm = NULL;
> > > > +	struct i915_hw_ppgtt *ppgtt = NULL;
> > > >  	WARN_ON(vma->node.allocated);
> > > >
> > > >  	/* Keep the vma as a placeholder in the execbuffer
reservation lists
> */
> > > >  	if (!list_empty(&vma->exec_list))
> > > >  		return;
> > > >
> > > > +	vm = vma->vm;
> > > > +	ppgtt = vm_to_ppgtt(vm);
> > > > +
> > > > +	if (ppgtt)
> > > > +		kref_put(&ppgtt->ref, ppgtt_release);
> > > > +
> > > >  	list_del(&vma->vma_link);
> > > >
> > > >  	kfree(vma);
> > > > diff --git a/drivers/gpu/drm/i915/i915_gem_context.c
> b/drivers/gpu/drm/i915/i915_gem_context.c
> > > > index 5b5af6c..59272f9 100644
> > > > --- a/drivers/gpu/drm/i915/i915_gem_context.c
> > > > +++ b/drivers/gpu/drm/i915/i915_gem_context.c
> > > > @@ -108,30 +108,13 @@ static void do_ppgtt_cleanup(struct
> i915_hw_ppgtt *ppgtt)
> > > >  		return;
> > > >  	}
> > > >
> > > > -	/*
> > > > -	 * Make sure vmas are unbound before we take down the drm_mm
> > > > -	 *
> > > > -	 * FIXME: Proper refcounting should take care of this, this
shouldn't
> be
> > > > -	 * needed at all.
> > > > -	 */
> > > > -	if (!list_empty(&vm->active_list)) {
> > > > -		struct i915_vma *vma;
> > > > -
> > > > -		list_for_each_entry(vma, &vm->active_list, mm_list)
> > > > -			if (WARN_ON(list_empty(&vma->vma_link) ||
> > > > -
list_is_singular(&vma->vma_link)))
> > > > -				break;
> > > > -
> > > > -		i915_gem_evict_vm(&ppgtt->base, true);
> > > > -	} else {
> > > > -		i915_gem_retire_requests(dev);
> > > > -		i915_gem_evict_vm(&ppgtt->base, false);
> > > > -	}
> > > > +	/* vmas should already be unbound */
> > > > +	WARN_ON(!list_empty(&vm->active_list));
> > > >
> > > >  	ppgtt->base.cleanup(&ppgtt->base);
> > > >  }
> > > >
> > > > -static void ppgtt_release(struct kref *kref)
> > > > +void ppgtt_release(struct kref *kref)
> > > >  {
> > > >  	struct i915_hw_ppgtt *ppgtt =
> > > >  		container_of(kref, struct i915_hw_ppgtt, ref);
> > > > diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c
> b/drivers/gpu/drm/i915/i915_gem_gtt.c
> > > > index 1411613..90c3d0f 100644
> > > > --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> > > > +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> > > > @@ -2159,10 +2159,15 @@
> i915_gem_obj_lookup_or_create_vma(struct drm_i915_gem_object *obj,
> > > >  				  struct i915_address_space *vm)
> > > >  {
> > > >  	struct i915_vma *vma;
> > > > +	struct i915_hw_ppgtt *ppgtt = NULL;
> > > >
> > > >  	vma = i915_gem_obj_to_vma(obj, vm);
> > > >  	if (!vma)
> > > >  		vma = __i915_gem_vma_create(obj, vm);
> > > >
> > > > +	ppgtt = vm_to_ppgtt(vm);
> > > > +	if (ppgtt)
> > > > +		kref_get(&ppgtt->ref);
> > > > +
> > > >  	return vma;
> > > >  }
> > >
> > > Seems like a reasonable fix to me, and it's simpler than Chris'
request
> > > to ref when active. Logically his makes more sense, but it's not quite
> > > as simple to review. Actually active isn't the right thing to track
IMO,
> > > bound is what you want to track.
> >
> > I didn't mean to imply this is anything but a semantic difference with
> > the driver as it exists today.
> 
> Well using active would be what we want, with bound we'd have a reference
> loop: ppgtt keeps the vmas bound and the vmas keeps the ppgtt around.
> 
> But active isn't the right thing either: Currently we keep the active
> state per-bo, but have per-vma lrus, so a mix of obj/vma tracking. We've
> had lots of discussions about this and imo neither of the possible active
> approaches is really worth the fuss of converting to it, now that the code
> is in.
> 
> Assume I don't spot any hidden tricky bits after a good night's worth of
> sleep (or anyone else) I'll pull this in tomorrow. So please keep poking
> holes ...
> 
> > > I think if anything, do that one as a
> > > patch on top, it does address the problem mentioned below. I also
> wonder
> > > given the recent reset lolz how this behaves there; and I'd make sure
to
> > > thoroughly test the various reset/hang conditions.
> > >
> > > Also, this puts a hard limit on the maximum number of VMAs in an
> address
> > > space based on sizeof(atomic_t). (unless you do what I said above) I
> > > doubt it's a problem in the near future, but it should be documented
> > > somewhere and I hope kref will explode loudly if you overflow it.
> >
> > Also, on second thought, this is a don't care.
> 
> Yeah, the kernel generally assumes that an atomic_t or long is
> sufficiently big for a refcount for all of memory. Maybe giant 32bit pae
> boxes violated that, but meh.
> 
> > > For future safety, I'd recommend dropping vm->ctx at this point. It
> > > shouldn't be hard at all to get rid of (quick grep shows only
debugfs).
> 
> Hm, that's a good point since that pointers is a loop in the refcounting.
> So if we really don't need it then I agree we should ditch it. Michel,
> can you please throw that follow-up patch on top if that's all there is to
> it indeed?

Sure, I'll prepare that follow-up patch.

-Michel

> 
> Thanks, Daniel
> --
> Daniel Vetter
> Software Engineer, Intel Corporation
> +41 (0) 79 365 57 48 - http://blog.ffwll.ch
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 2acc03f..a879a93 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2495,7 +2495,9 @@  void i915_gem_object_ggtt_unpin(struct drm_i915_gem_object *obj);
 
 /* i915_gem_context.c */
 #define ctx_to_ppgtt(ctx) container_of((ctx)->vm, struct i915_hw_ppgtt, base)
+#define vm_to_ppgtt(vm) container_of(vm, struct i915_hw_ppgtt, base)
 int __must_check i915_gem_context_init(struct drm_device *dev);
+void ppgtt_release(struct kref *kref);
 void i915_gem_context_fini(struct drm_device *dev);
 void i915_gem_context_reset(struct drm_device *dev);
 int i915_gem_context_open(struct drm_device *dev, struct drm_file *file);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index dcd8d7b..25a32b9 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -4499,12 +4499,20 @@  struct i915_vma *i915_gem_obj_to_vma(struct drm_i915_gem_object *obj,
 
 void i915_gem_vma_destroy(struct i915_vma *vma)
 {
+	struct i915_address_space *vm = NULL;
+	struct i915_hw_ppgtt *ppgtt = NULL;
 	WARN_ON(vma->node.allocated);
 
 	/* Keep the vma as a placeholder in the execbuffer reservation lists */
 	if (!list_empty(&vma->exec_list))
 		return;
 
+	vm = vma->vm;
+	ppgtt = vm_to_ppgtt(vm);
+
+	if (ppgtt)
+		kref_put(&ppgtt->ref, ppgtt_release);
+
 	list_del(&vma->vma_link);
 
 	kfree(vma);
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 5b5af6c..59272f9 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -108,30 +108,13 @@  static void do_ppgtt_cleanup(struct i915_hw_ppgtt *ppgtt)
 		return;
 	}
 
-	/*
-	 * Make sure vmas are unbound before we take down the drm_mm
-	 *
-	 * FIXME: Proper refcounting should take care of this, this shouldn't be
-	 * needed at all.
-	 */
-	if (!list_empty(&vm->active_list)) {
-		struct i915_vma *vma;
-
-		list_for_each_entry(vma, &vm->active_list, mm_list)
-			if (WARN_ON(list_empty(&vma->vma_link) ||
-				    list_is_singular(&vma->vma_link)))
-				break;
-
-		i915_gem_evict_vm(&ppgtt->base, true);
-	} else {
-		i915_gem_retire_requests(dev);
-		i915_gem_evict_vm(&ppgtt->base, false);
-	}
+	/* vmas should already be unbound */
+	WARN_ON(!list_empty(&vm->active_list));
 
 	ppgtt->base.cleanup(&ppgtt->base);
 }
 
-static void ppgtt_release(struct kref *kref)
+void ppgtt_release(struct kref *kref)
 {
 	struct i915_hw_ppgtt *ppgtt =
 		container_of(kref, struct i915_hw_ppgtt, ref);
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 1411613..90c3d0f 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -2159,10 +2159,15 @@  i915_gem_obj_lookup_or_create_vma(struct drm_i915_gem_object *obj,
 				  struct i915_address_space *vm)
 {
 	struct i915_vma *vma;
+	struct i915_hw_ppgtt *ppgtt = NULL;
 
 	vma = i915_gem_obj_to_vma(obj, vm);
 	if (!vma)
 		vma = __i915_gem_vma_create(obj, vm);
 
+	ppgtt = vm_to_ppgtt(vm);
+	if (ppgtt)
+		kref_get(&ppgtt->ref);
+
 	return vma;
 }