Message ID | 20191126152616.2748154-1-chris@chris-wilson.co.uk (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | drm/i915: Double check vma placement upon gaining the vm lock | expand |
On 26/11/2019 15:26, Chris Wilson wrote: > The current unbind + retry of i915_gem_object_ggtt_pin() allows for > someone else to sneak and claim the vma under a different placement > before the first GGTT bind is complete. Leading to confusion and > complaints all over. > > Ideally we would pull the evict + rebind under the same lock, but for > now, simply try again. > > Fixes: 2850748ef876 ("drm/i915: Pull i915_vma_pin under the vm->mutex") > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> > Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> > --- > drivers/gpu/drm/i915/i915_gem.c | 39 +++++++++++++++++++-------------- > drivers/gpu/drm/i915/i915_vma.c | 6 +++++ > 2 files changed, 28 insertions(+), 17 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c > index 61395b03443e..b0878d35ed1d 100644 > --- a/drivers/gpu/drm/i915/i915_gem.c > +++ b/drivers/gpu/drm/i915/i915_gem.c > @@ -938,33 +938,38 @@ i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj, > if (IS_ERR(vma)) > return vma; > > - if (i915_vma_misplaced(vma, size, alignment, flags)) { > - if (flags & PIN_NONBLOCK) { > - if (i915_vma_is_pinned(vma) || i915_vma_is_active(vma)) > - return ERR_PTR(-ENOSPC); > - > - if (flags & PIN_MAPPABLE && > - vma->fence_size > ggtt->mappable_end / 2) > - return ERR_PTR(-ENOSPC); > + do { > + if (i915_vma_misplaced(vma, size, alignment, flags)) { > + if (flags & PIN_NONBLOCK) { > + if (i915_vma_is_pinned(vma) || > + i915_vma_is_active(vma)) > + return ERR_PTR(-ENOSPC); > + > + if (flags & PIN_MAPPABLE && > + vma->fence_size > ggtt->mappable_end / 2) > + return ERR_PTR(-ENOSPC); > + } > + > + ret = i915_vma_unbind(vma); > + if (ret) > + return ERR_PTR(ret); > } > > - ret = i915_vma_unbind(vma); > - if (ret) > - return ERR_PTR(ret); > - } > + ret = i915_vma_pin(vma, size, alignment, flags | PIN_GLOBAL); > + } while (ret == -EAGAIN); /* retry if we lost our placement */ > + if (ret) > + return ERR_PTR(ret); > > if (vma->fence && !i915_gem_object_is_tiled(obj)) { > mutex_lock(&ggtt->vm.mutex); > ret = i915_vma_revoke_fence(vma); > mutex_unlock(&ggtt->vm.mutex); > - if (ret) > + if (ret) { > + i915_vma_unpin(vma); > return ERR_PTR(ret); > + } > } > > - ret = i915_vma_pin(vma, size, alignment, flags | PIN_GLOBAL); > - if (ret) > - return ERR_PTR(ret); > - > return vma; > } > > diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c > index e5512f26e20a..f6e661428b02 100644 > --- a/drivers/gpu/drm/i915/i915_vma.c > +++ b/drivers/gpu/drm/i915/i915_vma.c > @@ -905,6 +905,12 @@ int i915_vma_pin(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) > __i915_vma_set_map_and_fenceable(vma); > } > > + /* Somebody else managed to gazump our placement? */ > + if (i915_vma_misplaced(vma, size, alignment, flags)) { > + err = -EAGAIN; > + goto err_active; > + } > + What about other callers? They will not be expecting this. > GEM_BUG_ON(!vma->pages); > err = i915_vma_bind(vma, > vma->obj ? vma->obj->cache_level : 0, > Regards, Tvrtko
Quoting Tvrtko Ursulin (2019-11-26 17:04:43) > > On 26/11/2019 15:26, Chris Wilson wrote: > > diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c > > index e5512f26e20a..f6e661428b02 100644 > > --- a/drivers/gpu/drm/i915/i915_vma.c > > +++ b/drivers/gpu/drm/i915/i915_vma.c > > @@ -905,6 +905,12 @@ int i915_vma_pin(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) > > __i915_vma_set_map_and_fenceable(vma); > > } > > > > + /* Somebody else managed to gazump our placement? */ > > + if (i915_vma_misplaced(vma, size, alignment, flags)) { > > + err = -EAGAIN; > > + goto err_active; > > + } > > + > > What about other callers? They will not be expecting this. The other paths should be quite happy with -EAGAIN from vma_pin, it's already part of their retry procedure. If not, there's always more duct tape. Hopefully the replacement is much simpler (stop laughing back there). -Chris
Quoting Chris Wilson (2019-11-26 17:22:23) > Quoting Tvrtko Ursulin (2019-11-26 17:04:43) > > > > On 26/11/2019 15:26, Chris Wilson wrote: > > > diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c > > > index e5512f26e20a..f6e661428b02 100644 > > > --- a/drivers/gpu/drm/i915/i915_vma.c > > > +++ b/drivers/gpu/drm/i915/i915_vma.c > > > @@ -905,6 +905,12 @@ int i915_vma_pin(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) > > > __i915_vma_set_map_and_fenceable(vma); > > > } > > > > > > + /* Somebody else managed to gazump our placement? */ > > > + if (i915_vma_misplaced(vma, size, alignment, flags)) { > > > + err = -EAGAIN; > > > + goto err_active; > > > + } > > > + > > > > What about other callers? They will not be expecting this. > > The other paths should be quite happy with -EAGAIN from vma_pin, it's > already part of their retry procedure. If not, there's always more duct > tape. Hopefully the replacement is much simpler (stop laughing back > there). The alternative here is to pull in __i915_vma_unbind(), which might be plausible. -Chris
On 26/11/2019 17:25, Chris Wilson wrote: > Quoting Chris Wilson (2019-11-26 17:22:23) >> Quoting Tvrtko Ursulin (2019-11-26 17:04:43) >>> >>> On 26/11/2019 15:26, Chris Wilson wrote: >>>> diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c >>>> index e5512f26e20a..f6e661428b02 100644 >>>> --- a/drivers/gpu/drm/i915/i915_vma.c >>>> +++ b/drivers/gpu/drm/i915/i915_vma.c >>>> @@ -905,6 +905,12 @@ int i915_vma_pin(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) >>>> __i915_vma_set_map_and_fenceable(vma); >>>> } >>>> >>>> + /* Somebody else managed to gazump our placement? */ >>>> + if (i915_vma_misplaced(vma, size, alignment, flags)) { >>>> + err = -EAGAIN; >>>> + goto err_active; >>>> + } >>>> + >>> >>> What about other callers? They will not be expecting this. >> >> The other paths should be quite happy with -EAGAIN from vma_pin, it's >> already part of their retry procedure. If not, there's always more duct >> tape. Hopefully the replacement is much simpler (stop laughing back >> there). > > The alternative here is to pull in __i915_vma_unbind(), which might be > plausible. To make unbind and pin atomic? You'd need unlocked vma_pin as well. Or some different idea? Regards, Tvrtko
Quoting Tvrtko Ursulin (2019-11-26 17:33:09) > > On 26/11/2019 17:25, Chris Wilson wrote: > > Quoting Chris Wilson (2019-11-26 17:22:23) > >> Quoting Tvrtko Ursulin (2019-11-26 17:04:43) > >>> > >>> On 26/11/2019 15:26, Chris Wilson wrote: > >>>> diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c > >>>> index e5512f26e20a..f6e661428b02 100644 > >>>> --- a/drivers/gpu/drm/i915/i915_vma.c > >>>> +++ b/drivers/gpu/drm/i915/i915_vma.c > >>>> @@ -905,6 +905,12 @@ int i915_vma_pin(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) > >>>> __i915_vma_set_map_and_fenceable(vma); > >>>> } > >>>> > >>>> + /* Somebody else managed to gazump our placement? */ > >>>> + if (i915_vma_misplaced(vma, size, alignment, flags)) { > >>>> + err = -EAGAIN; > >>>> + goto err_active; > >>>> + } > >>>> + > >>> > >>> What about other callers? They will not be expecting this. > >> > >> The other paths should be quite happy with -EAGAIN from vma_pin, it's > >> already part of their retry procedure. If not, there's always more duct > >> tape. Hopefully the replacement is much simpler (stop laughing back > >> there). > > > > The alternative here is to pull in __i915_vma_unbind(), which might be > > plausible. > > To make unbind and pin atomic? You'd need unlocked vma_pin as well. Or > some different idea? Originally I had planned for an unlocked vma_pin, so that we would take the lock once in i915_gem_object_ggtt_pin() and do the migration there. Current plan for quick fix is to add if (i915_vma_misplaced() { err = __i915_vma_bind(); if (err) goto foo; } to i915_vma_pin() and see how many apple carts that upsets. -Chris
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 61395b03443e..b0878d35ed1d 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -938,33 +938,38 @@ i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj, if (IS_ERR(vma)) return vma; - if (i915_vma_misplaced(vma, size, alignment, flags)) { - if (flags & PIN_NONBLOCK) { - if (i915_vma_is_pinned(vma) || i915_vma_is_active(vma)) - return ERR_PTR(-ENOSPC); - - if (flags & PIN_MAPPABLE && - vma->fence_size > ggtt->mappable_end / 2) - return ERR_PTR(-ENOSPC); + do { + if (i915_vma_misplaced(vma, size, alignment, flags)) { + if (flags & PIN_NONBLOCK) { + if (i915_vma_is_pinned(vma) || + i915_vma_is_active(vma)) + return ERR_PTR(-ENOSPC); + + if (flags & PIN_MAPPABLE && + vma->fence_size > ggtt->mappable_end / 2) + return ERR_PTR(-ENOSPC); + } + + ret = i915_vma_unbind(vma); + if (ret) + return ERR_PTR(ret); } - ret = i915_vma_unbind(vma); - if (ret) - return ERR_PTR(ret); - } + ret = i915_vma_pin(vma, size, alignment, flags | PIN_GLOBAL); + } while (ret == -EAGAIN); /* retry if we lost our placement */ + if (ret) + return ERR_PTR(ret); if (vma->fence && !i915_gem_object_is_tiled(obj)) { mutex_lock(&ggtt->vm.mutex); ret = i915_vma_revoke_fence(vma); mutex_unlock(&ggtt->vm.mutex); - if (ret) + if (ret) { + i915_vma_unpin(vma); return ERR_PTR(ret); + } } - ret = i915_vma_pin(vma, size, alignment, flags | PIN_GLOBAL); - if (ret) - return ERR_PTR(ret); - return vma; } diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index e5512f26e20a..f6e661428b02 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -905,6 +905,12 @@ int i915_vma_pin(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) __i915_vma_set_map_and_fenceable(vma); } + /* Somebody else managed to gazump our placement? */ + if (i915_vma_misplaced(vma, size, alignment, flags)) { + err = -EAGAIN; + goto err_active; + } + GEM_BUG_ON(!vma->pages); err = i915_vma_bind(vma, vma->obj ? vma->obj->cache_level : 0,
The current unbind + retry of i915_gem_object_ggtt_pin() allows for someone else to sneak and claim the vma under a different placement before the first GGTT bind is complete. Leading to confusion and complaints all over. Ideally we would pull the evict + rebind under the same lock, but for now, simply try again. Fixes: 2850748ef876 ("drm/i915: Pull i915_vma_pin under the vm->mutex") Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> --- drivers/gpu/drm/i915/i915_gem.c | 39 +++++++++++++++++++-------------- drivers/gpu/drm/i915/i915_vma.c | 6 +++++ 2 files changed, 28 insertions(+), 17 deletions(-)