Message ID | 20180711073608.20286-4-chris@chris-wilson.co.uk (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Wed, Jul 11, 2018 at 08:36:04AM +0100, Chris Wilson wrote: > Introduce a new mutex to guard all of the vma operations within a vm (as > opposed to the BKL struct_mutex) and start by using it to guard the > fence operations for a GGTT VMA. Commit message is a bit confusing, since you've already introduce this new mutex in an earlier patch. Please change to "Switch from dev->struct_mutex to ggtt.vm->mutex" or similar ... For the reviewers benefit it would also be good to explain how this new vm.mutex nests with others stuff here (dev->struct_mutex and rpm come to mind, looking from the patch). Probably best here than in docs since it's likely going to get outdated. > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> > --- > drivers/gpu/drm/i915/i915_debugfs.c | 9 ++- > drivers/gpu/drm/i915/i915_gem.c | 11 +++- > drivers/gpu/drm/i915/i915_gem_execbuffer.c | 5 +- > drivers/gpu/drm/i915/i915_gem_fence_reg.c | 68 +++++++++++++++++----- > drivers/gpu/drm/i915/i915_vma.c | 12 ++-- > drivers/gpu/drm/i915/i915_vma.h | 23 +++++++- > 6 files changed, 96 insertions(+), 32 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c > index 75ffed6a3f31..e2ba298a5d88 100644 > --- a/drivers/gpu/drm/i915/i915_debugfs.c > +++ b/drivers/gpu/drm/i915/i915_debugfs.c > @@ -80,7 +80,7 @@ static char get_tiling_flag(struct drm_i915_gem_object *obj) > > static char get_global_flag(struct drm_i915_gem_object *obj) > { > - return obj->userfault_count ? 'g' : ' '; > + return READ_ONCE(obj->userfault_count) ? 'g' : ' '; The usefault_count here (and below) look like misplaced hunks? > } > > static char get_pin_mapped_flag(struct drm_i915_gem_object *obj) > @@ -914,11 +914,10 @@ static int i915_interrupt_info(struct seq_file *m, void *data) > > static int i915_gem_fence_regs_info(struct seq_file *m, void *data) > { > - struct drm_i915_private *i915 = node_to_i915(m->private); > - const struct i915_ggtt *ggtt = &i915->ggtt; > + struct i915_ggtt *ggtt = &node_to_i915(m->private)->ggtt; > int i, ret; > > - ret = mutex_lock_interruptible(&i915->drm.struct_mutex); > + ret = mutex_lock_interruptible(&ggtt->vm.mutex); > if (ret) > return ret; > > @@ -935,7 +934,7 @@ static int i915_gem_fence_regs_info(struct seq_file *m, void *data) > seq_putc(m, '\n'); > } > > - mutex_unlock(&i915->drm.struct_mutex); > + mutex_unlock(&ggtt->vm.mutex); > return 0; > } > > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c > index 356c86071ccc..cbcba613b175 100644 > --- a/drivers/gpu/drm/i915/i915_gem.c > +++ b/drivers/gpu/drm/i915/i915_gem.c > @@ -2193,8 +2193,8 @@ i915_gem_release_mmap(struct drm_i915_gem_object *obj) > * requirement that operations to the GGTT be made holding the RPM > * wakeref. > */ > - lockdep_assert_held(&i915->drm.struct_mutex); > intel_runtime_pm_get(i915); > + mutex_lock(&i915->ggtt.vm.mutex); > > if (!obj->userfault_count) > goto out; > @@ -2211,6 +2211,7 @@ i915_gem_release_mmap(struct drm_i915_gem_object *obj) > wmb(); > > out: > + mutex_unlock(&i915->ggtt.vm.mutex); > intel_runtime_pm_put(i915); > } > > @@ -2223,10 +2224,12 @@ void i915_gem_runtime_suspend(struct drm_i915_private *i915) > /* > * Only called during RPM suspend. All users of the userfault_list > * must be holding an RPM wakeref to ensure that this can not > - * run concurrently with themselves (and use the struct_mutex for > + * run concurrently with themselves (and use the ggtt->mutex for > * protection between themselves). > */ I think the above change isn't correct, at least not yet at this stage: All users of the userfault_list still use dev->struct_mutex, not vm.mutex. I guess we could move that over to the ggtt.vm.mutex eventually, but this patch doesn't do that. > > + mutex_lock(&i915->ggtt.vm.mutex); I also don't think you need to take the lock just yet. For rpm we keep the fences around (we restore them in rpm_resume after all), we just nuke the mmap ptes. > + > list_for_each_entry_safe(obj, on, > &i915->mm.userfault_list, userfault_link) > __i915_gem_object_release_mmap(obj); > @@ -2255,6 +2258,8 @@ void i915_gem_runtime_suspend(struct drm_i915_private *i915) > GEM_BUG_ON(i915_vma_has_userfault(reg->vma)); > reg->dirty = true; > } > + > + mutex_unlock(&i915->ggtt.vm.mutex); > } > > static int i915_gem_object_create_mmap_offset(struct drm_i915_gem_object *obj) > @@ -4861,7 +4866,7 @@ static void __i915_gem_free_objects(struct drm_i915_private *i915, > mutex_unlock(&i915->drm.struct_mutex); > > GEM_BUG_ON(obj->bind_count); > - GEM_BUG_ON(obj->userfault_count); > + GEM_BUG_ON(READ_ONCE(obj->userfault_count)); Another misplaced hunk? > GEM_BUG_ON(atomic_read(&obj->frontbuffer_bits)); > GEM_BUG_ON(!list_empty(&obj->lut_list)); > > diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c > index 3f0c612d42e7..e1d65b165bf1 100644 > --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c > +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c > @@ -426,8 +426,11 @@ static inline void __eb_unreserve_vma(struct i915_vma *vma, unsigned int flags) > { > GEM_BUG_ON(!(flags & __EXEC_OBJECT_HAS_PIN)); > > - if (unlikely(flags & __EXEC_OBJECT_HAS_FENCE)) > + if (unlikely(flags & __EXEC_OBJECT_HAS_FENCE)) { > + mutex_lock(&vma->vm->mutex); > __i915_vma_unpin_fence(vma); > + mutex_unlock(&vma->vm->mutex); > + } > > __i915_vma_unpin(vma); > } > diff --git a/drivers/gpu/drm/i915/i915_gem_fence_reg.c b/drivers/gpu/drm/i915/i915_gem_fence_reg.c > index 60fa5a8276cb..9313a8e675c8 100644 > --- a/drivers/gpu/drm/i915/i915_gem_fence_reg.c > +++ b/drivers/gpu/drm/i915/i915_gem_fence_reg.c > @@ -188,6 +188,8 @@ static void i830_write_fence_reg(struct drm_i915_fence_reg *fence, > static void fence_write(struct drm_i915_fence_reg *fence, > struct i915_vma *vma) > { > + lockdep_assert_held(&fence->ggtt->vm.mutex); > + > /* Previous access through the fence register is marshalled by > * the mb() inside the fault handlers (i915_gem_release_mmaps) > * and explicitly managed for internal users. > @@ -213,6 +215,8 @@ static int fence_update(struct drm_i915_fence_reg *fence, > struct i915_ggtt *ggtt = fence->ggtt; > int ret; > > + lockdep_assert_held(&ggtt->vm.mutex); > + > if (vma) { > if (!i915_vma_is_map_and_fenceable(vma)) > return -EINVAL; > @@ -289,14 +293,39 @@ static int fence_update(struct drm_i915_fence_reg *fence, > int i915_vma_put_fence(struct i915_vma *vma) > { > struct drm_i915_fence_reg *fence = vma->fence; > + int err; > > if (!fence) > return 0; > > - if (fence->pin_count) > - return -EBUSY; > + mutex_lock(&vma->vm->mutex); > + if (!fence->pin_count) > + err = fence_update(fence, NULL); > + else > + err = -EBUSY; > + mutex_unlock(&vma->vm->mutex); > > - return fence_update(fence, NULL); > + return err; > +} > + > +void i915_vma_revoke_fence(struct i915_vma *vma) > +{ > + struct drm_i915_fence_reg *fence; > + > + GEM_BUG_ON(!i915_vma_is_ggtt(vma)); > + lockdep_assert_held(&vma->vm->mutex); > + > + fence = vma->fence; > + if (!fence) > + return; > + > + GEM_BUG_ON(fence->pin_count); > + > + list_move(&fence->link, &i915_vm_to_ggtt(vma->vm)->fence_list); > + vma->fence = NULL; > + > + fence_write(fence, NULL); > + fence->vma = NULL; fence_update(fence, NULL); here please instead of open-coding it. Or I'm kinda not following why you're doing this? > } > > static struct drm_i915_fence_reg *fence_find(struct i915_ggtt *ggtt) > @@ -337,8 +366,7 @@ static struct drm_i915_fence_reg *fence_find(struct i915_ggtt *ggtt) > * > * 0 on success, negative error code on failure. > */ > -int > -i915_vma_pin_fence(struct i915_vma *vma) > +int __i915_vma_pin_fence(struct i915_vma *vma) I don't (yet) see a caller of this new __ version ... > { > struct i915_ggtt *ggtt = i915_vm_to_ggtt(vma->vm); > struct i915_vma *set = i915_gem_object_is_tiled(vma->obj) ? vma : NULL; > @@ -349,6 +377,7 @@ i915_vma_pin_fence(struct i915_vma *vma) > * must keep the device awake whilst using the fence. > */ > assert_rpm_wakelock_held(ggtt->vm.i915); > + lockdep_assert_held(&ggtt->vm.mutex); > > /* Just update our place in the LRU if our fence is getting reused. */ > if (vma->fence) { > @@ -399,27 +428,34 @@ i915_reserve_fence(struct drm_i915_private *i915) > int count; > int ret; > > - lockdep_assert_held(&i915->drm.struct_mutex); > + mutex_lock(&i915->ggtt.vm.mutex); > > /* Keep at least one fence available for the display engine. */ > count = 0; > list_for_each_entry(fence, &ggtt->fence_list, link) > count += !fence->pin_count; > - if (count <= 1) > - return ERR_PTR(-ENOSPC); > + if (count <= 1) { > + fence = ERR_PTR(-ENOSPC); > + goto out_unlock; > + } > > fence = fence_find(ggtt); > if (IS_ERR(fence)) > - return fence; > + goto out_unlock; > > if (fence->vma) { > /* Force-remove fence from VMA */ > ret = fence_update(fence, NULL); > - if (ret) > - return ERR_PTR(ret); > + if (ret) { > + fence = ERR_PTR(ret); > + goto out_unlock; > + } > } > > list_del(&fence->link); > + > +out_unlock: > + mutex_unlock(&i915->ggtt.vm.mutex); > return fence; > } > > @@ -431,9 +467,9 @@ i915_reserve_fence(struct drm_i915_private *i915) > */ > void i915_unreserve_fence(struct drm_i915_fence_reg *fence) > { > - lockdep_assert_held(&fence->ggtt->vm.i915->drm.struct_mutex); > - > + mutex_lock(&fence->ggtt->vm.mutex); > list_add(&fence->link, &fence->ggtt->fence_list); > + mutex_unlock(&fence->ggtt->vm.mutex); > } > > /** > @@ -451,8 +487,7 @@ void i915_gem_revoke_fences(struct drm_i915_private *i915) > struct i915_ggtt *ggtt = &i915->ggtt; > int i; > > - lockdep_assert_held(&i915->drm.struct_mutex); > - > + mutex_lock(&ggtt->vm.mutex); > for (i = 0; i < ggtt->num_fence_regs; i++) { > struct drm_i915_fence_reg *fence = &ggtt->fence_regs[i]; > > @@ -461,6 +496,7 @@ void i915_gem_revoke_fences(struct drm_i915_private *i915) > if (fence->vma) > i915_vma_revoke_mmap(fence->vma); > } > + mutex_unlock(&ggtt->vm.mutex); > } > > /** > @@ -476,6 +512,7 @@ void i915_gem_restore_fences(struct drm_i915_private *i915) > struct i915_ggtt *ggtt = &i915->ggtt; > int i; > > + mutex_lock(&ggtt->vm.mutex); > for (i = 0; i < ggtt->num_fence_regs; i++) { > struct drm_i915_fence_reg *reg = &ggtt->fence_regs[i]; > struct i915_vma *vma = reg->vma; > @@ -498,6 +535,7 @@ void i915_gem_restore_fences(struct drm_i915_private *i915) > fence_write(reg, vma); > reg->vma = vma; > } > + mutex_unlock(&ggtt->vm.mutex); > } > > /** > diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c > index ed4e0fb558f7..045b75d79f60 100644 > --- a/drivers/gpu/drm/i915/i915_vma.c > +++ b/drivers/gpu/drm/i915/i915_vma.c > @@ -860,7 +860,7 @@ void i915_vma_revoke_mmap(struct i915_vma *vma) > struct drm_vma_offset_node *node = &vma->obj->base.vma_node; > u64 vma_offset; > > - lockdep_assert_held(&vma->vm->i915->drm.struct_mutex); > + lockdep_assert_held(&vma->vm->mutex); > > if (!i915_vma_has_userfault(vma)) > return; > @@ -1082,6 +1082,8 @@ int i915_vma_unbind(struct i915_vma *vma) > return 0; > > if (i915_vma_is_map_and_fenceable(vma)) { > + mutex_lock(&vma->vm->mutex); > + > /* > * Check that we have flushed all writes through the GGTT > * before the unbind, other due to non-strict nature of those > @@ -1091,16 +1093,14 @@ int i915_vma_unbind(struct i915_vma *vma) > i915_vma_flush_writes(vma); > GEM_BUG_ON(i915_vma_has_ggtt_write(vma)); > > - /* release the fence reg _after_ flushing */ > - ret = i915_vma_put_fence(vma); > - if (ret) > - return ret; > - > /* Force a pagefault for domain tracking on next user access */ > i915_vma_revoke_mmap(vma); > + i915_vma_revoke_fence(vma); > > __i915_vma_iounmap(vma); > vma->flags &= ~I915_VMA_CAN_FENCE; > + > + mutex_unlock(&vma->vm->mutex); > } > GEM_BUG_ON(vma->fence); > GEM_BUG_ON(i915_vma_has_userfault(vma)); > diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h > index f06d66377107..422d90c686b5 100644 > --- a/drivers/gpu/drm/i915/i915_vma.h > +++ b/drivers/gpu/drm/i915/i915_vma.h > @@ -190,6 +190,7 @@ static inline bool i915_vma_set_userfault(struct i915_vma *vma) > > static inline void i915_vma_unset_userfault(struct i915_vma *vma) > { > + lockdep_assert_held(&vma->vm->mutex); > return __clear_bit(I915_VMA_USERFAULT_BIT, &vma->flags); > } > > @@ -378,11 +379,26 @@ static inline struct page *i915_vma_first_page(struct i915_vma *vma) > * > * True if the vma has a fence, false otherwise. > */ > -int i915_vma_pin_fence(struct i915_vma *vma); > +int __i915_vma_pin_fence(struct i915_vma *vma); > +static inline int i915_vma_pin_fence(struct i915_vma *vma) > +{ > + int err; > + > + mutex_lock(&vma->vm->mutex); > + err = __i915_vma_pin_fence(vma); > + mutex_unlock(&vma->vm->mutex); > + > + return err; > +} > + > int __must_check i915_vma_put_fence(struct i915_vma *vma); > +void i915_vma_revoke_fence(struct i915_vma *vma); > > static inline void __i915_vma_unpin_fence(struct i915_vma *vma) > { > + lockdep_assert_held(&vma->vm->mutex); > + GEM_BUG_ON(!i915_vma_is_ggtt(vma)); > + > GEM_BUG_ON(vma->fence->pin_count <= 0); > vma->fence->pin_count--; > } > @@ -399,8 +415,11 @@ static inline void > i915_vma_unpin_fence(struct i915_vma *vma) > { > /* lockdep_assert_held(&vma->vm->i915->drm.struct_mutex); */ > - if (vma->fence) > + if (vma->fence) { I think you need the lock here outside of vma->fence. At least I understood the fancy new locking rules to mean that vma->fence is protected by vma->vm.mutex. Since vma's can't move between vm this is safe. Would be good to update the kerneldoc/comments for that accordingly too. > + mutex_lock(&vma->vm->mutex); > __i915_vma_unpin_fence(vma); > + mutex_unlock(&vma->vm->mutex); > + } > } > > void i915_vma_parked(struct drm_i915_private *i915); Ok the locking changes in i915_gem_fence_reg.c look good, but git grep says there's more: - describe_obj in i915_debugfs.c only wants dev->struct_mutex, but really wants vm.mutex on top too now. At least for ggtt. - i915_gem_fence_regs_info in i915_debugfs.c is already fixed in this patch. - I've ignored i915_gpu_error.c as usual :-) - intel_display.c is annoying, since the way it tries to figure out whether it got a fence or not is racy. That one probably needs the __i915_vma_pin_fence version that I didn't find ... - intel_fbc.c looks safe due to the PLANE_HAS_FENCE flags (minus the issue in intel_display.c) - I found one more access to vma->fence->id in the selftests, that's really the same as we do in i915_gpu_error.c. If you feel like it, a i915_vma_fence_id() static inline that does the correct READ_ONCE dance on vma->fence could be useful for the paranoid. Absolutely loved the naming collision with dma_fence, and my grep fu isn't good enough to avoid these. So probably missed some (but tried rather hard not to). Besides these small nits and questions looks all good. -Daniel
Quoting Daniel Vetter (2018-07-11 10:08:46) > On Wed, Jul 11, 2018 at 08:36:04AM +0100, Chris Wilson wrote: > > Introduce a new mutex to guard all of the vma operations within a vm (as > > opposed to the BKL struct_mutex) and start by using it to guard the > > fence operations for a GGTT VMA. > > Commit message is a bit confusing, since you've already introduce this new > mutex in an earlier patch. Please change to "Switch from dev->struct_mutex > to ggtt.vm->mutex" or similar ... > > For the reviewers benefit it would also be good to explain how this new > vm.mutex nests with others stuff here (dev->struct_mutex and rpm come to > mind, looking from the patch). Probably best here than in docs since it's > likely going to get outdated. > > > > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> > > --- > > drivers/gpu/drm/i915/i915_debugfs.c | 9 ++- > > drivers/gpu/drm/i915/i915_gem.c | 11 +++- > > drivers/gpu/drm/i915/i915_gem_execbuffer.c | 5 +- > > drivers/gpu/drm/i915/i915_gem_fence_reg.c | 68 +++++++++++++++++----- > > drivers/gpu/drm/i915/i915_vma.c | 12 ++-- > > drivers/gpu/drm/i915/i915_vma.h | 23 +++++++- > > 6 files changed, 96 insertions(+), 32 deletions(-) > > > > diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c > > index 75ffed6a3f31..e2ba298a5d88 100644 > > --- a/drivers/gpu/drm/i915/i915_debugfs.c > > +++ b/drivers/gpu/drm/i915/i915_debugfs.c > > @@ -80,7 +80,7 @@ static char get_tiling_flag(struct drm_i915_gem_object *obj) > > > > static char get_global_flag(struct drm_i915_gem_object *obj) > > { > > - return obj->userfault_count ? 'g' : ' '; > > + return READ_ONCE(obj->userfault_count) ? 'g' : ' '; > > The usefault_count here (and below) look like misplaced hunks? > > > } > > > > static char get_pin_mapped_flag(struct drm_i915_gem_object *obj) > > @@ -914,11 +914,10 @@ static int i915_interrupt_info(struct seq_file *m, void *data) > > > > static int i915_gem_fence_regs_info(struct seq_file *m, void *data) > > { > > - struct drm_i915_private *i915 = node_to_i915(m->private); > > - const struct i915_ggtt *ggtt = &i915->ggtt; > > + struct i915_ggtt *ggtt = &node_to_i915(m->private)->ggtt; > > int i, ret; > > > > - ret = mutex_lock_interruptible(&i915->drm.struct_mutex); > > + ret = mutex_lock_interruptible(&ggtt->vm.mutex); > > if (ret) > > return ret; > > > > @@ -935,7 +934,7 @@ static int i915_gem_fence_regs_info(struct seq_file *m, void *data) > > seq_putc(m, '\n'); > > } > > > > - mutex_unlock(&i915->drm.struct_mutex); > > + mutex_unlock(&ggtt->vm.mutex); > > return 0; > > } > > > > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c > > index 356c86071ccc..cbcba613b175 100644 > > --- a/drivers/gpu/drm/i915/i915_gem.c > > +++ b/drivers/gpu/drm/i915/i915_gem.c > > @@ -2193,8 +2193,8 @@ i915_gem_release_mmap(struct drm_i915_gem_object *obj) > > * requirement that operations to the GGTT be made holding the RPM > > * wakeref. > > */ > > - lockdep_assert_held(&i915->drm.struct_mutex); > > intel_runtime_pm_get(i915); > > + mutex_lock(&i915->ggtt.vm.mutex); > > > > if (!obj->userfault_count) > > goto out; > > @@ -2211,6 +2211,7 @@ i915_gem_release_mmap(struct drm_i915_gem_object *obj) > > wmb(); > > > > out: > > + mutex_unlock(&i915->ggtt.vm.mutex); > > intel_runtime_pm_put(i915); > > } > > > > @@ -2223,10 +2224,12 @@ void i915_gem_runtime_suspend(struct drm_i915_private *i915) > > /* > > * Only called during RPM suspend. All users of the userfault_list > > * must be holding an RPM wakeref to ensure that this can not > > - * run concurrently with themselves (and use the struct_mutex for > > + * run concurrently with themselves (and use the ggtt->mutex for > > * protection between themselves). > > */ > > I think the above change isn't correct, at least not yet at this stage: > All users of the userfault_list still use dev->struct_mutex, not vm.mutex. > I guess we could move that over to the ggtt.vm.mutex eventually, but this > patch doesn't do that. It does, all those misplaced hunks are not so misplaced. -Chris
Quoting Chris Wilson (2018-07-11 11:57:38) > Quoting Daniel Vetter (2018-07-11 10:08:46) > > I think the above change isn't correct, at least not yet at this stage: > > All users of the userfault_list still use dev->struct_mutex, not vm.mutex. > > I guess we could move that over to the ggtt.vm.mutex eventually, but this > > patch doesn't do that. > > It does, all those misplaced hunks are not so misplaced. Since we have differing opinions on whether this is or is not sufficiently guarding GGTT vs rpm, what test are we missing in CI to conclusively indicate whether or not this is broken. As it stands, CI is happy, and I don't have many machines where rpm works (since it requires fw, sound drivers, etc). -Chris
On Wed, Jul 11, 2018 at 12:12:39PM +0100, Chris Wilson wrote: > Quoting Chris Wilson (2018-07-11 11:57:38) > > Quoting Daniel Vetter (2018-07-11 10:08:46) > > > I think the above change isn't correct, at least not yet at this stage: > > > All users of the userfault_list still use dev->struct_mutex, not vm.mutex. > > > I guess we could move that over to the ggtt.vm.mutex eventually, but this > > > patch doesn't do that. > > > > It does, all those misplaced hunks are not so misplaced. > > Since we have differing opinions on whether this is or is not > sufficiently guarding GGTT vs rpm, what test are we missing in CI to > conclusively indicate whether or not this is broken. As it stands, CI is > happy, and I don't have many machines where rpm works (since it requires > fw, sound drivers, etc). So the "misplaced hunk" comment I typed before I fully read through the patch and spotted the rework of the userfault list. My understanding is still that the userfault_list is protected by rpm (see some of the assert_rpm_wakelock_held right next to them), or maybe a combination of rpm + dev->struct_mutex. Afaict it's definitely not protected by the new vm.mutex, even after this patch. That's why I complained about the comment change, and also why I didn't see why this patch here needs the userfault_count changes. Now could very well be that I'm missing something around userfault - I don't remember the details, except that every time I try to reconstruct a mental model for this I get wrong for a few days. But if that's the case then I think you're patch isn't sufficient. - Just for code organization reasons I think we should then move mm.userfault_list to the ggtt, like you've done with the fence stuff. Separate patch probably, like the fence prep. - All the other places that touch userfault_* need to be audited/fixed too. This is probably something we need/want to do, but I don't see the relationship with fences. And assuming the current locking scheme is sound (with it's funky combination of dev->struct_mutex + rpm) I don't see why we need to do anything in a patch that only moves the fence stuff over (and not anything of the other ggtt mmap trickery) to vm.mutex. Wrt CI coverage: Given that I think the current code without your changes would be fine, even with fence reg tracking protected by vm.mutex, there's nothing for CI to prove. I'm just saying you could drop all the userfault_* related changes from this patch, and it should still all work. tldr; I think userfault_* is safe as is and will stay safe even with fences moved to other locks. You can keep the various hunks I complained about for future patches which will move userfault_* over to ggtt.vm.mutex. And oh dear do I not look forward to revieweing userfault_* locking changes :-) Cheers, Daniel
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 75ffed6a3f31..e2ba298a5d88 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -80,7 +80,7 @@ static char get_tiling_flag(struct drm_i915_gem_object *obj) static char get_global_flag(struct drm_i915_gem_object *obj) { - return obj->userfault_count ? 'g' : ' '; + return READ_ONCE(obj->userfault_count) ? 'g' : ' '; } static char get_pin_mapped_flag(struct drm_i915_gem_object *obj) @@ -914,11 +914,10 @@ static int i915_interrupt_info(struct seq_file *m, void *data) static int i915_gem_fence_regs_info(struct seq_file *m, void *data) { - struct drm_i915_private *i915 = node_to_i915(m->private); - const struct i915_ggtt *ggtt = &i915->ggtt; + struct i915_ggtt *ggtt = &node_to_i915(m->private)->ggtt; int i, ret; - ret = mutex_lock_interruptible(&i915->drm.struct_mutex); + ret = mutex_lock_interruptible(&ggtt->vm.mutex); if (ret) return ret; @@ -935,7 +934,7 @@ static int i915_gem_fence_regs_info(struct seq_file *m, void *data) seq_putc(m, '\n'); } - mutex_unlock(&i915->drm.struct_mutex); + mutex_unlock(&ggtt->vm.mutex); return 0; } diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 356c86071ccc..cbcba613b175 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2193,8 +2193,8 @@ i915_gem_release_mmap(struct drm_i915_gem_object *obj) * requirement that operations to the GGTT be made holding the RPM * wakeref. */ - lockdep_assert_held(&i915->drm.struct_mutex); intel_runtime_pm_get(i915); + mutex_lock(&i915->ggtt.vm.mutex); if (!obj->userfault_count) goto out; @@ -2211,6 +2211,7 @@ i915_gem_release_mmap(struct drm_i915_gem_object *obj) wmb(); out: + mutex_unlock(&i915->ggtt.vm.mutex); intel_runtime_pm_put(i915); } @@ -2223,10 +2224,12 @@ void i915_gem_runtime_suspend(struct drm_i915_private *i915) /* * Only called during RPM suspend. All users of the userfault_list * must be holding an RPM wakeref to ensure that this can not - * run concurrently with themselves (and use the struct_mutex for + * run concurrently with themselves (and use the ggtt->mutex for * protection between themselves). */ + mutex_lock(&i915->ggtt.vm.mutex); + list_for_each_entry_safe(obj, on, &i915->mm.userfault_list, userfault_link) __i915_gem_object_release_mmap(obj); @@ -2255,6 +2258,8 @@ void i915_gem_runtime_suspend(struct drm_i915_private *i915) GEM_BUG_ON(i915_vma_has_userfault(reg->vma)); reg->dirty = true; } + + mutex_unlock(&i915->ggtt.vm.mutex); } static int i915_gem_object_create_mmap_offset(struct drm_i915_gem_object *obj) @@ -4861,7 +4866,7 @@ static void __i915_gem_free_objects(struct drm_i915_private *i915, mutex_unlock(&i915->drm.struct_mutex); GEM_BUG_ON(obj->bind_count); - GEM_BUG_ON(obj->userfault_count); + GEM_BUG_ON(READ_ONCE(obj->userfault_count)); GEM_BUG_ON(atomic_read(&obj->frontbuffer_bits)); GEM_BUG_ON(!list_empty(&obj->lut_list)); diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 3f0c612d42e7..e1d65b165bf1 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -426,8 +426,11 @@ static inline void __eb_unreserve_vma(struct i915_vma *vma, unsigned int flags) { GEM_BUG_ON(!(flags & __EXEC_OBJECT_HAS_PIN)); - if (unlikely(flags & __EXEC_OBJECT_HAS_FENCE)) + if (unlikely(flags & __EXEC_OBJECT_HAS_FENCE)) { + mutex_lock(&vma->vm->mutex); __i915_vma_unpin_fence(vma); + mutex_unlock(&vma->vm->mutex); + } __i915_vma_unpin(vma); } diff --git a/drivers/gpu/drm/i915/i915_gem_fence_reg.c b/drivers/gpu/drm/i915/i915_gem_fence_reg.c index 60fa5a8276cb..9313a8e675c8 100644 --- a/drivers/gpu/drm/i915/i915_gem_fence_reg.c +++ b/drivers/gpu/drm/i915/i915_gem_fence_reg.c @@ -188,6 +188,8 @@ static void i830_write_fence_reg(struct drm_i915_fence_reg *fence, static void fence_write(struct drm_i915_fence_reg *fence, struct i915_vma *vma) { + lockdep_assert_held(&fence->ggtt->vm.mutex); + /* Previous access through the fence register is marshalled by * the mb() inside the fault handlers (i915_gem_release_mmaps) * and explicitly managed for internal users. @@ -213,6 +215,8 @@ static int fence_update(struct drm_i915_fence_reg *fence, struct i915_ggtt *ggtt = fence->ggtt; int ret; + lockdep_assert_held(&ggtt->vm.mutex); + if (vma) { if (!i915_vma_is_map_and_fenceable(vma)) return -EINVAL; @@ -289,14 +293,39 @@ static int fence_update(struct drm_i915_fence_reg *fence, int i915_vma_put_fence(struct i915_vma *vma) { struct drm_i915_fence_reg *fence = vma->fence; + int err; if (!fence) return 0; - if (fence->pin_count) - return -EBUSY; + mutex_lock(&vma->vm->mutex); + if (!fence->pin_count) + err = fence_update(fence, NULL); + else + err = -EBUSY; + mutex_unlock(&vma->vm->mutex); - return fence_update(fence, NULL); + return err; +} + +void i915_vma_revoke_fence(struct i915_vma *vma) +{ + struct drm_i915_fence_reg *fence; + + GEM_BUG_ON(!i915_vma_is_ggtt(vma)); + lockdep_assert_held(&vma->vm->mutex); + + fence = vma->fence; + if (!fence) + return; + + GEM_BUG_ON(fence->pin_count); + + list_move(&fence->link, &i915_vm_to_ggtt(vma->vm)->fence_list); + vma->fence = NULL; + + fence_write(fence, NULL); + fence->vma = NULL; } static struct drm_i915_fence_reg *fence_find(struct i915_ggtt *ggtt) @@ -337,8 +366,7 @@ static struct drm_i915_fence_reg *fence_find(struct i915_ggtt *ggtt) * * 0 on success, negative error code on failure. */ -int -i915_vma_pin_fence(struct i915_vma *vma) +int __i915_vma_pin_fence(struct i915_vma *vma) { struct i915_ggtt *ggtt = i915_vm_to_ggtt(vma->vm); struct i915_vma *set = i915_gem_object_is_tiled(vma->obj) ? vma : NULL; @@ -349,6 +377,7 @@ i915_vma_pin_fence(struct i915_vma *vma) * must keep the device awake whilst using the fence. */ assert_rpm_wakelock_held(ggtt->vm.i915); + lockdep_assert_held(&ggtt->vm.mutex); /* Just update our place in the LRU if our fence is getting reused. */ if (vma->fence) { @@ -399,27 +428,34 @@ i915_reserve_fence(struct drm_i915_private *i915) int count; int ret; - lockdep_assert_held(&i915->drm.struct_mutex); + mutex_lock(&i915->ggtt.vm.mutex); /* Keep at least one fence available for the display engine. */ count = 0; list_for_each_entry(fence, &ggtt->fence_list, link) count += !fence->pin_count; - if (count <= 1) - return ERR_PTR(-ENOSPC); + if (count <= 1) { + fence = ERR_PTR(-ENOSPC); + goto out_unlock; + } fence = fence_find(ggtt); if (IS_ERR(fence)) - return fence; + goto out_unlock; if (fence->vma) { /* Force-remove fence from VMA */ ret = fence_update(fence, NULL); - if (ret) - return ERR_PTR(ret); + if (ret) { + fence = ERR_PTR(ret); + goto out_unlock; + } } list_del(&fence->link); + +out_unlock: + mutex_unlock(&i915->ggtt.vm.mutex); return fence; } @@ -431,9 +467,9 @@ i915_reserve_fence(struct drm_i915_private *i915) */ void i915_unreserve_fence(struct drm_i915_fence_reg *fence) { - lockdep_assert_held(&fence->ggtt->vm.i915->drm.struct_mutex); - + mutex_lock(&fence->ggtt->vm.mutex); list_add(&fence->link, &fence->ggtt->fence_list); + mutex_unlock(&fence->ggtt->vm.mutex); } /** @@ -451,8 +487,7 @@ void i915_gem_revoke_fences(struct drm_i915_private *i915) struct i915_ggtt *ggtt = &i915->ggtt; int i; - lockdep_assert_held(&i915->drm.struct_mutex); - + mutex_lock(&ggtt->vm.mutex); for (i = 0; i < ggtt->num_fence_regs; i++) { struct drm_i915_fence_reg *fence = &ggtt->fence_regs[i]; @@ -461,6 +496,7 @@ void i915_gem_revoke_fences(struct drm_i915_private *i915) if (fence->vma) i915_vma_revoke_mmap(fence->vma); } + mutex_unlock(&ggtt->vm.mutex); } /** @@ -476,6 +512,7 @@ void i915_gem_restore_fences(struct drm_i915_private *i915) struct i915_ggtt *ggtt = &i915->ggtt; int i; + mutex_lock(&ggtt->vm.mutex); for (i = 0; i < ggtt->num_fence_regs; i++) { struct drm_i915_fence_reg *reg = &ggtt->fence_regs[i]; struct i915_vma *vma = reg->vma; @@ -498,6 +535,7 @@ void i915_gem_restore_fences(struct drm_i915_private *i915) fence_write(reg, vma); reg->vma = vma; } + mutex_unlock(&ggtt->vm.mutex); } /** diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index ed4e0fb558f7..045b75d79f60 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -860,7 +860,7 @@ void i915_vma_revoke_mmap(struct i915_vma *vma) struct drm_vma_offset_node *node = &vma->obj->base.vma_node; u64 vma_offset; - lockdep_assert_held(&vma->vm->i915->drm.struct_mutex); + lockdep_assert_held(&vma->vm->mutex); if (!i915_vma_has_userfault(vma)) return; @@ -1082,6 +1082,8 @@ int i915_vma_unbind(struct i915_vma *vma) return 0; if (i915_vma_is_map_and_fenceable(vma)) { + mutex_lock(&vma->vm->mutex); + /* * Check that we have flushed all writes through the GGTT * before the unbind, other due to non-strict nature of those @@ -1091,16 +1093,14 @@ int i915_vma_unbind(struct i915_vma *vma) i915_vma_flush_writes(vma); GEM_BUG_ON(i915_vma_has_ggtt_write(vma)); - /* release the fence reg _after_ flushing */ - ret = i915_vma_put_fence(vma); - if (ret) - return ret; - /* Force a pagefault for domain tracking on next user access */ i915_vma_revoke_mmap(vma); + i915_vma_revoke_fence(vma); __i915_vma_iounmap(vma); vma->flags &= ~I915_VMA_CAN_FENCE; + + mutex_unlock(&vma->vm->mutex); } GEM_BUG_ON(vma->fence); GEM_BUG_ON(i915_vma_has_userfault(vma)); diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h index f06d66377107..422d90c686b5 100644 --- a/drivers/gpu/drm/i915/i915_vma.h +++ b/drivers/gpu/drm/i915/i915_vma.h @@ -190,6 +190,7 @@ static inline bool i915_vma_set_userfault(struct i915_vma *vma) static inline void i915_vma_unset_userfault(struct i915_vma *vma) { + lockdep_assert_held(&vma->vm->mutex); return __clear_bit(I915_VMA_USERFAULT_BIT, &vma->flags); } @@ -378,11 +379,26 @@ static inline struct page *i915_vma_first_page(struct i915_vma *vma) * * True if the vma has a fence, false otherwise. */ -int i915_vma_pin_fence(struct i915_vma *vma); +int __i915_vma_pin_fence(struct i915_vma *vma); +static inline int i915_vma_pin_fence(struct i915_vma *vma) +{ + int err; + + mutex_lock(&vma->vm->mutex); + err = __i915_vma_pin_fence(vma); + mutex_unlock(&vma->vm->mutex); + + return err; +} + int __must_check i915_vma_put_fence(struct i915_vma *vma); +void i915_vma_revoke_fence(struct i915_vma *vma); static inline void __i915_vma_unpin_fence(struct i915_vma *vma) { + lockdep_assert_held(&vma->vm->mutex); + GEM_BUG_ON(!i915_vma_is_ggtt(vma)); + GEM_BUG_ON(vma->fence->pin_count <= 0); vma->fence->pin_count--; } @@ -399,8 +415,11 @@ static inline void i915_vma_unpin_fence(struct i915_vma *vma) { /* lockdep_assert_held(&vma->vm->i915->drm.struct_mutex); */ - if (vma->fence) + if (vma->fence) { + mutex_lock(&vma->vm->mutex); __i915_vma_unpin_fence(vma); + mutex_unlock(&vma->vm->mutex); + } } void i915_vma_parked(struct drm_i915_private *i915);
Introduce a new mutex to guard all of the vma operations within a vm (as opposed to the BKL struct_mutex) and start by using it to guard the fence operations for a GGTT VMA. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> --- drivers/gpu/drm/i915/i915_debugfs.c | 9 ++- drivers/gpu/drm/i915/i915_gem.c | 11 +++- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 5 +- drivers/gpu/drm/i915/i915_gem_fence_reg.c | 68 +++++++++++++++++----- drivers/gpu/drm/i915/i915_vma.c | 12 ++-- drivers/gpu/drm/i915/i915_vma.h | 23 +++++++- 6 files changed, 96 insertions(+), 32 deletions(-)