Message ID | 20211118130230.154988-2-thomas.hellstrom@linux.intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | drm/i915/ttm: Async migration | expand |
On 18/11/2021 13:02, Thomas Hellström wrote: > From: Maarten Lankhorst <maarten.lankhorst@linux.intel.com> > > For now, we will only allow async migration when TTM is used, > so the paths we care about are related to TTM. > > The mmap path is handled by having the fence in ttm_bo->moving, > when pinning, the binding only becomes available after the moving > fence is signaled, and pinning a cpu map will only work after > the moving fence signals. > > This should close all holes where userspace can read a buffer > before it's fully migrated. > > v2: > - Fix a couple of SPARSE warnings > v3: > - Fix a NULL pointer dereference > v4: > - Ditch the moving fence waiting for i915_vma_pin_iomap() and > replace with a verification that the vma is already bound. > (Matthew Auld) > - Squash with a previous patch introducing moving fence waiting and > accessing interfaces (Matthew Auld) > - Rename to indicated that we also add support for sync waiting. > > Co-developed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com> > Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com> > Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com> > --- > drivers/gpu/drm/i915/gem/i915_gem_object.c | 52 ++++++++++++++++++++++ > drivers/gpu/drm/i915/gem/i915_gem_object.h | 6 +++ > drivers/gpu/drm/i915/gem/i915_gem_pages.c | 6 +++ > drivers/gpu/drm/i915/i915_vma.c | 36 ++++++++++++++- > 4 files changed, 99 insertions(+), 1 deletion(-) > > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c > index 591ee3cb7275..7b7d9415c9cb 100644 > --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c > +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c > @@ -33,6 +33,7 @@ > #include "i915_gem_object.h" > #include "i915_memcpy.h" > #include "i915_trace.h" > +#include "i915_gem_ttm.h" Nit: Ordering. > > static struct kmem_cache *slab_objects; > > @@ -726,6 +727,57 @@ static const struct drm_gem_object_funcs i915_gem_object_funcs = { > .export = i915_gem_prime_export, > }; > > +/** > + * i915_gem_object_get_moving_fence - Get the object's moving fence if any > + * @obj: The object whose moving fence to get. > + * > + * A non-signaled moving fence means that there is an async operation > + * pending on the object that needs to be waited on before setting up > + * any GPU- or CPU PTEs to the object's pages. > + * > + * Return: A refcounted pointer to the object's moving fence if any, > + * NULL otherwise. > + */ > +struct dma_fence * > +i915_gem_object_get_moving_fence(struct drm_i915_gem_object *obj) > +{ > + return dma_fence_get(i915_gem_to_ttm(obj)->moving); > +} > + > +/** > + * i915_gem_object_wait_moving_fence - Wait for the object's moving fence if any > + * @obj: The object whose moving fence to wait for. > + * @intr: Whether to wait interruptible. > + * > + * If the moving fence signaled without an error, it is detached from the > + * object and put. > + * > + * Return: 0 if successful, -ERESTARTSYS if the wait was interrupted, > + * negative error code if the async operation represented by the > + * moving fence failed. > + */ > +int i915_gem_object_wait_moving_fence(struct drm_i915_gem_object *obj, > + bool intr) > +{ > + struct dma_fence *fence = i915_gem_to_ttm(obj)->moving; > + int ret; > + > + assert_object_held(obj); > + if (!fence) > + return 0; > + > + ret = dma_fence_wait(fence, intr); > + if (ret) > + return ret; > + > + if (fence->error) > + return fence->error; > + > + i915_gem_to_ttm(obj)->moving = NULL; > + dma_fence_put(fence); > + return 0; > +} > + > #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) > #include "selftests/huge_gem_object.c" > #include "selftests/huge_pages.c" > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h > index 133963b46135..66f20b803b01 100644 > --- a/drivers/gpu/drm/i915/gem/i915_gem_object.h > +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h > @@ -517,6 +517,12 @@ i915_gem_object_finish_access(struct drm_i915_gem_object *obj) > i915_gem_object_unpin_pages(obj); > } > > +struct dma_fence * > +i915_gem_object_get_moving_fence(struct drm_i915_gem_object *obj); > + > +int i915_gem_object_wait_moving_fence(struct drm_i915_gem_object *obj, > + bool intr); > + > void i915_gem_object_set_cache_coherency(struct drm_i915_gem_object *obj, > unsigned int cache_level); > bool i915_gem_object_can_bypass_llc(struct drm_i915_gem_object *obj); > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pages.c b/drivers/gpu/drm/i915/gem/i915_gem_pages.c > index c4f684b7cc51..49c6e55c68ce 100644 > --- a/drivers/gpu/drm/i915/gem/i915_gem_pages.c > +++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c > @@ -418,6 +418,12 @@ void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj, > } > > if (!ptr) { > + err = i915_gem_object_wait_moving_fence(obj, true); > + if (err) { > + ptr = ERR_PTR(err); > + goto err_unpin; > + } > + > if (GEM_WARN_ON(type == I915_MAP_WC && > !static_cpu_has(X86_FEATURE_PAT))) > ptr = ERR_PTR(-ENODEV); > diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c > index 0896656896d0..00d3daf72329 100644 > --- a/drivers/gpu/drm/i915/i915_vma.c > +++ b/drivers/gpu/drm/i915/i915_vma.c > @@ -354,6 +354,25 @@ int i915_vma_wait_for_bind(struct i915_vma *vma) > return err; > } > > +#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) > +static int i915_vma_verify_bind_complete(struct i915_vma *vma) > +{ > + int err = 0; > + > + if (i915_active_has_exclusive(&vma->active)) { > + struct dma_fence *fence = > + i915_active_fence_get(&vma->active.excl); if (!fence) return 0; ? > + > + if (dma_fence_is_signaled(fence)) > + err = fence->error; > + else > + err = -EBUSY; dma_fence_put(fence); ? Reviewed-by: Matthew Auld <matthew.auld@intel.com> > + } > + > + return err; > +} > +#endif > + > /** > * i915_vma_bind - Sets up PTEs for an VMA in it's corresponding address space. > * @vma: VMA to map > @@ -428,6 +447,13 @@ int i915_vma_bind(struct i915_vma *vma, > work->pinned = i915_gem_object_get(vma->obj); > } > } else { > + if (vma->obj) { > + int ret; > + > + ret = i915_gem_object_wait_moving_fence(vma->obj, true); > + if (ret) > + return ret; > + } > vma->ops->bind_vma(vma->vm, NULL, vma, cache_level, bind_flags); > } > > @@ -449,6 +475,7 @@ void __iomem *i915_vma_pin_iomap(struct i915_vma *vma) > > GEM_BUG_ON(!i915_vma_is_ggtt(vma)); > GEM_BUG_ON(!i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND)); > + GEM_BUG_ON(i915_vma_verify_bind_complete(vma)); > > ptr = READ_ONCE(vma->iomap); > if (ptr == NULL) { > @@ -867,6 +894,7 @@ int i915_vma_pin_ww(struct i915_vma *vma, struct i915_gem_ww_ctx *ww, > u64 size, u64 alignment, u64 flags) > { > struct i915_vma_work *work = NULL; > + struct dma_fence *moving = NULL; > intel_wakeref_t wakeref = 0; > unsigned int bound; > int err; > @@ -892,7 +920,8 @@ int i915_vma_pin_ww(struct i915_vma *vma, struct i915_gem_ww_ctx *ww, > if (flags & PIN_GLOBAL) > wakeref = intel_runtime_pm_get(&vma->vm->i915->runtime_pm); > > - if (flags & vma->vm->bind_async_flags) { > + moving = vma->obj ? i915_gem_object_get_moving_fence(vma->obj) : NULL; > + if (flags & vma->vm->bind_async_flags || moving) { > /* lock VM */ > err = i915_vm_lock_objects(vma->vm, ww); > if (err) > @@ -906,6 +935,8 @@ int i915_vma_pin_ww(struct i915_vma *vma, struct i915_gem_ww_ctx *ww, > > work->vm = i915_vm_get(vma->vm); > > + dma_fence_work_chain(&work->base, moving); > + > /* Allocate enough page directories to used PTE */ > if (vma->vm->allocate_va_range) { > err = i915_vm_alloc_pt_stash(vma->vm, > @@ -1010,7 +1041,10 @@ int i915_vma_pin_ww(struct i915_vma *vma, struct i915_gem_ww_ctx *ww, > err_rpm: > if (wakeref) > intel_runtime_pm_put(&vma->vm->i915->runtime_pm, wakeref); > + if (moving) > + dma_fence_put(moving); > vma_put_pages(vma); > + > return err; > } > >
Hi "Thomas, I love your patch! Yet something to improve: [auto build test ERROR on drm-tip/drm-tip] [also build test ERROR on drm-exynos/exynos-drm-next drm/drm-next v5.16-rc1 next-20211118] [cannot apply to drm-intel/for-linux-next tegra-drm/drm/tegra/for-next airlied/drm-next] [If your patch is applied to the wrong git tree, kindly drop us a note. And when submitting patch, we suggest to use '--base' as documented in https://git-scm.com/docs/git-format-patch] url: https://github.com/0day-ci/linux/commits/Thomas-Hellstr-m/drm-i915-ttm-Async-migration/20211118-210436 base: git://anongit.freedesktop.org/drm/drm-tip drm-tip config: x86_64-randconfig-r001-20211118 (attached as .config) reproduce (this is a W=1 build): wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross chmod +x ~/bin/make.cross # https://github.com/0day-ci/linux/commit/316dda0c6b91d043111cb348b43be6a6f2e3bb0a git remote add linux-review https://github.com/0day-ci/linux git fetch --no-tags linux-review Thomas-Hellstr-m/drm-i915-ttm-Async-migration/20211118-210436 git checkout 316dda0c6b91d043111cb348b43be6a6f2e3bb0a # save the attached .config to linux build tree COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross W=1 ARCH=x86_64 If you fix the issue, kindly add following tag as appropriate Reported-by: kernel test robot <lkp@intel.com> All errors (new ones prefixed by >>): >> drivers/gpu/drm/i915/i915_vma.c:478:13: error: implicit declaration of function 'i915_vma_verify_bind_complete' [-Werror,-Wimplicit-function-declaration] GEM_BUG_ON(i915_vma_verify_bind_complete(vma)); ^ 1 error generated. vim +/i915_vma_verify_bind_complete +478 drivers/gpu/drm/i915/i915_vma.c 463 464 void __iomem *i915_vma_pin_iomap(struct i915_vma *vma) 465 { 466 void __iomem *ptr; 467 int err; 468 469 if (!i915_gem_object_is_lmem(vma->obj)) { 470 if (GEM_WARN_ON(!i915_vma_is_map_and_fenceable(vma))) { 471 err = -ENODEV; 472 goto err; 473 } 474 } 475 476 GEM_BUG_ON(!i915_vma_is_ggtt(vma)); 477 GEM_BUG_ON(!i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND)); > 478 GEM_BUG_ON(i915_vma_verify_bind_complete(vma)); 479 480 ptr = READ_ONCE(vma->iomap); 481 if (ptr == NULL) { 482 /* 483 * TODO: consider just using i915_gem_object_pin_map() for lmem 484 * instead, which already supports mapping non-contiguous chunks 485 * of pages, that way we can also drop the 486 * I915_BO_ALLOC_CONTIGUOUS when allocating the object. 487 */ 488 if (i915_gem_object_is_lmem(vma->obj)) 489 ptr = i915_gem_object_lmem_io_map(vma->obj, 0, 490 vma->obj->base.size); 491 else 492 ptr = io_mapping_map_wc(&i915_vm_to_ggtt(vma->vm)->iomap, 493 vma->node.start, 494 vma->node.size); 495 if (ptr == NULL) { 496 err = -ENOMEM; 497 goto err; 498 } 499 500 if (unlikely(cmpxchg(&vma->iomap, NULL, ptr))) { 501 io_mapping_unmap(ptr); 502 ptr = vma->iomap; 503 } 504 } 505 506 __i915_vma_pin(vma); 507 508 err = i915_vma_pin_fence(vma); 509 if (err) 510 goto err_unpin; 511 512 i915_vma_set_ggtt_write(vma); 513 514 /* NB Access through the GTT requires the device to be awake. */ 515 return ptr; 516 517 err_unpin: 518 __i915_vma_unpin(vma); 519 err: 520 return IO_ERR_PTR(err); 521 } 522 --- 0-DAY CI Kernel Test Service, Intel Corporation https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c index 591ee3cb7275..7b7d9415c9cb 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c @@ -33,6 +33,7 @@ #include "i915_gem_object.h" #include "i915_memcpy.h" #include "i915_trace.h" +#include "i915_gem_ttm.h" static struct kmem_cache *slab_objects; @@ -726,6 +727,57 @@ static const struct drm_gem_object_funcs i915_gem_object_funcs = { .export = i915_gem_prime_export, }; +/** + * i915_gem_object_get_moving_fence - Get the object's moving fence if any + * @obj: The object whose moving fence to get. + * + * A non-signaled moving fence means that there is an async operation + * pending on the object that needs to be waited on before setting up + * any GPU- or CPU PTEs to the object's pages. + * + * Return: A refcounted pointer to the object's moving fence if any, + * NULL otherwise. + */ +struct dma_fence * +i915_gem_object_get_moving_fence(struct drm_i915_gem_object *obj) +{ + return dma_fence_get(i915_gem_to_ttm(obj)->moving); +} + +/** + * i915_gem_object_wait_moving_fence - Wait for the object's moving fence if any + * @obj: The object whose moving fence to wait for. + * @intr: Whether to wait interruptible. + * + * If the moving fence signaled without an error, it is detached from the + * object and put. + * + * Return: 0 if successful, -ERESTARTSYS if the wait was interrupted, + * negative error code if the async operation represented by the + * moving fence failed. + */ +int i915_gem_object_wait_moving_fence(struct drm_i915_gem_object *obj, + bool intr) +{ + struct dma_fence *fence = i915_gem_to_ttm(obj)->moving; + int ret; + + assert_object_held(obj); + if (!fence) + return 0; + + ret = dma_fence_wait(fence, intr); + if (ret) + return ret; + + if (fence->error) + return fence->error; + + i915_gem_to_ttm(obj)->moving = NULL; + dma_fence_put(fence); + return 0; +} + #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) #include "selftests/huge_gem_object.c" #include "selftests/huge_pages.c" diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h index 133963b46135..66f20b803b01 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h @@ -517,6 +517,12 @@ i915_gem_object_finish_access(struct drm_i915_gem_object *obj) i915_gem_object_unpin_pages(obj); } +struct dma_fence * +i915_gem_object_get_moving_fence(struct drm_i915_gem_object *obj); + +int i915_gem_object_wait_moving_fence(struct drm_i915_gem_object *obj, + bool intr); + void i915_gem_object_set_cache_coherency(struct drm_i915_gem_object *obj, unsigned int cache_level); bool i915_gem_object_can_bypass_llc(struct drm_i915_gem_object *obj); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pages.c b/drivers/gpu/drm/i915/gem/i915_gem_pages.c index c4f684b7cc51..49c6e55c68ce 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_pages.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c @@ -418,6 +418,12 @@ void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj, } if (!ptr) { + err = i915_gem_object_wait_moving_fence(obj, true); + if (err) { + ptr = ERR_PTR(err); + goto err_unpin; + } + if (GEM_WARN_ON(type == I915_MAP_WC && !static_cpu_has(X86_FEATURE_PAT))) ptr = ERR_PTR(-ENODEV); diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index 0896656896d0..00d3daf72329 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -354,6 +354,25 @@ int i915_vma_wait_for_bind(struct i915_vma *vma) return err; } +#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) +static int i915_vma_verify_bind_complete(struct i915_vma *vma) +{ + int err = 0; + + if (i915_active_has_exclusive(&vma->active)) { + struct dma_fence *fence = + i915_active_fence_get(&vma->active.excl); + + if (dma_fence_is_signaled(fence)) + err = fence->error; + else + err = -EBUSY; + } + + return err; +} +#endif + /** * i915_vma_bind - Sets up PTEs for an VMA in it's corresponding address space. * @vma: VMA to map @@ -428,6 +447,13 @@ int i915_vma_bind(struct i915_vma *vma, work->pinned = i915_gem_object_get(vma->obj); } } else { + if (vma->obj) { + int ret; + + ret = i915_gem_object_wait_moving_fence(vma->obj, true); + if (ret) + return ret; + } vma->ops->bind_vma(vma->vm, NULL, vma, cache_level, bind_flags); } @@ -449,6 +475,7 @@ void __iomem *i915_vma_pin_iomap(struct i915_vma *vma) GEM_BUG_ON(!i915_vma_is_ggtt(vma)); GEM_BUG_ON(!i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND)); + GEM_BUG_ON(i915_vma_verify_bind_complete(vma)); ptr = READ_ONCE(vma->iomap); if (ptr == NULL) { @@ -867,6 +894,7 @@ int i915_vma_pin_ww(struct i915_vma *vma, struct i915_gem_ww_ctx *ww, u64 size, u64 alignment, u64 flags) { struct i915_vma_work *work = NULL; + struct dma_fence *moving = NULL; intel_wakeref_t wakeref = 0; unsigned int bound; int err; @@ -892,7 +920,8 @@ int i915_vma_pin_ww(struct i915_vma *vma, struct i915_gem_ww_ctx *ww, if (flags & PIN_GLOBAL) wakeref = intel_runtime_pm_get(&vma->vm->i915->runtime_pm); - if (flags & vma->vm->bind_async_flags) { + moving = vma->obj ? i915_gem_object_get_moving_fence(vma->obj) : NULL; + if (flags & vma->vm->bind_async_flags || moving) { /* lock VM */ err = i915_vm_lock_objects(vma->vm, ww); if (err) @@ -906,6 +935,8 @@ int i915_vma_pin_ww(struct i915_vma *vma, struct i915_gem_ww_ctx *ww, work->vm = i915_vm_get(vma->vm); + dma_fence_work_chain(&work->base, moving); + /* Allocate enough page directories to used PTE */ if (vma->vm->allocate_va_range) { err = i915_vm_alloc_pt_stash(vma->vm, @@ -1010,7 +1041,10 @@ int i915_vma_pin_ww(struct i915_vma *vma, struct i915_gem_ww_ctx *ww, err_rpm: if (wakeref) intel_runtime_pm_put(&vma->vm->i915->runtime_pm, wakeref); + if (moving) + dma_fence_put(moving); vma_put_pages(vma); + return err; }