drm/i915: Double check vma placement upon gaining the vm lock
diff mbox series

Message ID 20191126152616.2748154-1-chris@chris-wilson.co.uk
State New
Headers show
Series
  • drm/i915: Double check vma placement upon gaining the vm lock
Related show

Commit Message

Chris Wilson Nov. 26, 2019, 3:26 p.m. UTC
The current unbind + retry of i915_gem_object_ggtt_pin() allows for
someone else to sneak and claim the vma under a different placement
before the first GGTT bind is complete. Leading to confusion and
complaints all over.

Ideally we would pull the evict + rebind under the same lock, but for
now, simply try again.

Fixes: 2850748ef876 ("drm/i915: Pull i915_vma_pin under the vm->mutex")
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/i915_gem.c | 39 +++++++++++++++++++--------------
 drivers/gpu/drm/i915/i915_vma.c |  6 +++++
 2 files changed, 28 insertions(+), 17 deletions(-)

Comments

Tvrtko Ursulin Nov. 26, 2019, 5:04 p.m. UTC | #1
On 26/11/2019 15:26, Chris Wilson wrote:
> The current unbind + retry of i915_gem_object_ggtt_pin() allows for
> someone else to sneak and claim the vma under a different placement
> before the first GGTT bind is complete. Leading to confusion and
> complaints all over.
> 
> Ideally we would pull the evict + rebind under the same lock, but for
> now, simply try again.
> 
> Fixes: 2850748ef876 ("drm/i915: Pull i915_vma_pin under the vm->mutex")
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> ---
>   drivers/gpu/drm/i915/i915_gem.c | 39 +++++++++++++++++++--------------
>   drivers/gpu/drm/i915/i915_vma.c |  6 +++++
>   2 files changed, 28 insertions(+), 17 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 61395b03443e..b0878d35ed1d 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -938,33 +938,38 @@ i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
>   	if (IS_ERR(vma))
>   		return vma;
>   
> -	if (i915_vma_misplaced(vma, size, alignment, flags)) {
> -		if (flags & PIN_NONBLOCK) {
> -			if (i915_vma_is_pinned(vma) || i915_vma_is_active(vma))
> -				return ERR_PTR(-ENOSPC);
> -
> -			if (flags & PIN_MAPPABLE &&
> -			    vma->fence_size > ggtt->mappable_end / 2)
> -				return ERR_PTR(-ENOSPC);
> +	do {
> +		if (i915_vma_misplaced(vma, size, alignment, flags)) {
> +			if (flags & PIN_NONBLOCK) {
> +				if (i915_vma_is_pinned(vma) ||
> +				    i915_vma_is_active(vma))
> +					return ERR_PTR(-ENOSPC);
> +
> +				if (flags & PIN_MAPPABLE &&
> +				    vma->fence_size > ggtt->mappable_end / 2)
> +					return ERR_PTR(-ENOSPC);
> +			}
> +
> +			ret = i915_vma_unbind(vma);
> +			if (ret)
> +				return ERR_PTR(ret);
>   		}
>   
> -		ret = i915_vma_unbind(vma);
> -		if (ret)
> -			return ERR_PTR(ret);
> -	}
> +		ret = i915_vma_pin(vma, size, alignment, flags | PIN_GLOBAL);
> +	} while (ret == -EAGAIN); /* retry if we lost our placement */
> +	if (ret)
> +		return ERR_PTR(ret);
>   
>   	if (vma->fence && !i915_gem_object_is_tiled(obj)) {
>   		mutex_lock(&ggtt->vm.mutex);
>   		ret = i915_vma_revoke_fence(vma);
>   		mutex_unlock(&ggtt->vm.mutex);
> -		if (ret)
> +		if (ret) {
> +			i915_vma_unpin(vma);
>   			return ERR_PTR(ret);
> +		}
>   	}
>   
> -	ret = i915_vma_pin(vma, size, alignment, flags | PIN_GLOBAL);
> -	if (ret)
> -		return ERR_PTR(ret);
> -
>   	return vma;
>   }
>   
> diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
> index e5512f26e20a..f6e661428b02 100644
> --- a/drivers/gpu/drm/i915/i915_vma.c
> +++ b/drivers/gpu/drm/i915/i915_vma.c
> @@ -905,6 +905,12 @@ int i915_vma_pin(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
>   			__i915_vma_set_map_and_fenceable(vma);
>   	}
>   
> +	/* Somebody else managed to gazump our placement? */
> +	if (i915_vma_misplaced(vma, size, alignment, flags)) {
> +		err = -EAGAIN;
> +		goto err_active;
> +	}
> +

What about other callers? They will not be expecting this.

>   	GEM_BUG_ON(!vma->pages);
>   	err = i915_vma_bind(vma,
>   			    vma->obj ? vma->obj->cache_level : 0,
> 

Regards,

Tvrtko
Chris Wilson Nov. 26, 2019, 5:22 p.m. UTC | #2
Quoting Tvrtko Ursulin (2019-11-26 17:04:43)
> 
> On 26/11/2019 15:26, Chris Wilson wrote:
> > diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
> > index e5512f26e20a..f6e661428b02 100644
> > --- a/drivers/gpu/drm/i915/i915_vma.c
> > +++ b/drivers/gpu/drm/i915/i915_vma.c
> > @@ -905,6 +905,12 @@ int i915_vma_pin(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
> >                       __i915_vma_set_map_and_fenceable(vma);
> >       }
> >   
> > +     /* Somebody else managed to gazump our placement? */
> > +     if (i915_vma_misplaced(vma, size, alignment, flags)) {
> > +             err = -EAGAIN;
> > +             goto err_active;
> > +     }
> > +
> 
> What about other callers? They will not be expecting this.

The other paths should be quite happy with -EAGAIN from vma_pin, it's
already part of their retry procedure. If not, there's always more duct
tape. Hopefully the replacement is much simpler (stop laughing back
there).
-Chris
Chris Wilson Nov. 26, 2019, 5:25 p.m. UTC | #3
Quoting Chris Wilson (2019-11-26 17:22:23)
> Quoting Tvrtko Ursulin (2019-11-26 17:04:43)
> > 
> > On 26/11/2019 15:26, Chris Wilson wrote:
> > > diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
> > > index e5512f26e20a..f6e661428b02 100644
> > > --- a/drivers/gpu/drm/i915/i915_vma.c
> > > +++ b/drivers/gpu/drm/i915/i915_vma.c
> > > @@ -905,6 +905,12 @@ int i915_vma_pin(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
> > >                       __i915_vma_set_map_and_fenceable(vma);
> > >       }
> > >   
> > > +     /* Somebody else managed to gazump our placement? */
> > > +     if (i915_vma_misplaced(vma, size, alignment, flags)) {
> > > +             err = -EAGAIN;
> > > +             goto err_active;
> > > +     }
> > > +
> > 
> > What about other callers? They will not be expecting this.
> 
> The other paths should be quite happy with -EAGAIN from vma_pin, it's
> already part of their retry procedure. If not, there's always more duct
> tape. Hopefully the replacement is much simpler (stop laughing back
> there).

The alternative here is to pull in __i915_vma_unbind(), which might be
plausible.
-Chris
Tvrtko Ursulin Nov. 26, 2019, 5:33 p.m. UTC | #4
On 26/11/2019 17:25, Chris Wilson wrote:
> Quoting Chris Wilson (2019-11-26 17:22:23)
>> Quoting Tvrtko Ursulin (2019-11-26 17:04:43)
>>>
>>> On 26/11/2019 15:26, Chris Wilson wrote:
>>>> diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
>>>> index e5512f26e20a..f6e661428b02 100644
>>>> --- a/drivers/gpu/drm/i915/i915_vma.c
>>>> +++ b/drivers/gpu/drm/i915/i915_vma.c
>>>> @@ -905,6 +905,12 @@ int i915_vma_pin(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
>>>>                        __i915_vma_set_map_and_fenceable(vma);
>>>>        }
>>>>    
>>>> +     /* Somebody else managed to gazump our placement? */
>>>> +     if (i915_vma_misplaced(vma, size, alignment, flags)) {
>>>> +             err = -EAGAIN;
>>>> +             goto err_active;
>>>> +     }
>>>> +
>>>
>>> What about other callers? They will not be expecting this.
>>
>> The other paths should be quite happy with -EAGAIN from vma_pin, it's
>> already part of their retry procedure. If not, there's always more duct
>> tape. Hopefully the replacement is much simpler (stop laughing back
>> there).
> 
> The alternative here is to pull in __i915_vma_unbind(), which might be
> plausible.

To make unbind and pin atomic? You'd need unlocked vma_pin as well. Or 
some different idea?

Regards,

Tvrtko
Chris Wilson Nov. 26, 2019, 5:50 p.m. UTC | #5
Quoting Tvrtko Ursulin (2019-11-26 17:33:09)
> 
> On 26/11/2019 17:25, Chris Wilson wrote:
> > Quoting Chris Wilson (2019-11-26 17:22:23)
> >> Quoting Tvrtko Ursulin (2019-11-26 17:04:43)
> >>>
> >>> On 26/11/2019 15:26, Chris Wilson wrote:
> >>>> diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
> >>>> index e5512f26e20a..f6e661428b02 100644
> >>>> --- a/drivers/gpu/drm/i915/i915_vma.c
> >>>> +++ b/drivers/gpu/drm/i915/i915_vma.c
> >>>> @@ -905,6 +905,12 @@ int i915_vma_pin(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
> >>>>                        __i915_vma_set_map_and_fenceable(vma);
> >>>>        }
> >>>>    
> >>>> +     /* Somebody else managed to gazump our placement? */
> >>>> +     if (i915_vma_misplaced(vma, size, alignment, flags)) {
> >>>> +             err = -EAGAIN;
> >>>> +             goto err_active;
> >>>> +     }
> >>>> +
> >>>
> >>> What about other callers? They will not be expecting this.
> >>
> >> The other paths should be quite happy with -EAGAIN from vma_pin, it's
> >> already part of their retry procedure. If not, there's always more duct
> >> tape. Hopefully the replacement is much simpler (stop laughing back
> >> there).
> > 
> > The alternative here is to pull in __i915_vma_unbind(), which might be
> > plausible.
> 
> To make unbind and pin atomic? You'd need unlocked vma_pin as well. Or 
> some different idea?

Originally I had planned for an unlocked vma_pin, so that we would take
the lock once in i915_gem_object_ggtt_pin() and do the migration there.
Current plan for quick fix is to add

	if (i915_vma_misplaced() {
		err = __i915_vma_bind();
		if (err)
			goto foo;
	}

to i915_vma_pin() and see how many apple carts that upsets.
-Chris

Patch
diff mbox series

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 61395b03443e..b0878d35ed1d 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -938,33 +938,38 @@  i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
 	if (IS_ERR(vma))
 		return vma;
 
-	if (i915_vma_misplaced(vma, size, alignment, flags)) {
-		if (flags & PIN_NONBLOCK) {
-			if (i915_vma_is_pinned(vma) || i915_vma_is_active(vma))
-				return ERR_PTR(-ENOSPC);
-
-			if (flags & PIN_MAPPABLE &&
-			    vma->fence_size > ggtt->mappable_end / 2)
-				return ERR_PTR(-ENOSPC);
+	do {
+		if (i915_vma_misplaced(vma, size, alignment, flags)) {
+			if (flags & PIN_NONBLOCK) {
+				if (i915_vma_is_pinned(vma) ||
+				    i915_vma_is_active(vma))
+					return ERR_PTR(-ENOSPC);
+
+				if (flags & PIN_MAPPABLE &&
+				    vma->fence_size > ggtt->mappable_end / 2)
+					return ERR_PTR(-ENOSPC);
+			}
+
+			ret = i915_vma_unbind(vma);
+			if (ret)
+				return ERR_PTR(ret);
 		}
 
-		ret = i915_vma_unbind(vma);
-		if (ret)
-			return ERR_PTR(ret);
-	}
+		ret = i915_vma_pin(vma, size, alignment, flags | PIN_GLOBAL);
+	} while (ret == -EAGAIN); /* retry if we lost our placement */
+	if (ret)
+		return ERR_PTR(ret);
 
 	if (vma->fence && !i915_gem_object_is_tiled(obj)) {
 		mutex_lock(&ggtt->vm.mutex);
 		ret = i915_vma_revoke_fence(vma);
 		mutex_unlock(&ggtt->vm.mutex);
-		if (ret)
+		if (ret) {
+			i915_vma_unpin(vma);
 			return ERR_PTR(ret);
+		}
 	}
 
-	ret = i915_vma_pin(vma, size, alignment, flags | PIN_GLOBAL);
-	if (ret)
-		return ERR_PTR(ret);
-
 	return vma;
 }
 
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index e5512f26e20a..f6e661428b02 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -905,6 +905,12 @@  int i915_vma_pin(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
 			__i915_vma_set_map_and_fenceable(vma);
 	}
 
+	/* Somebody else managed to gazump our placement? */
+	if (i915_vma_misplaced(vma, size, alignment, flags)) {
+		err = -EAGAIN;
+		goto err_active;
+	}
+
 	GEM_BUG_ON(!vma->pages);
 	err = i915_vma_bind(vma,
 			    vma->obj ? vma->obj->cache_level : 0,