diff mbox series

[RFC/HACK] drm/ttm: avoid multihop moves in drivers.

Message ID 20201020041606.1701145-2-airlied@gmail.com (mailing list archive)
State New, archived
Headers show
Series [RFC/HACK] drm/ttm: avoid multihop moves in drivers. | expand

Commit Message

Dave Airlie Oct. 20, 2020, 4:16 a.m. UTC
From: Dave Airlie <airlied@redhat.com>

---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 128 +++---------------------
 drivers/gpu/drm/nouveau/nouveau_bo.c    | 106 ++------------------
 drivers/gpu/drm/radeon/radeon_ttm.c     | 110 ++------------------
 drivers/gpu/drm/ttm/ttm_bo.c            |  44 +++++++-
 4 files changed, 77 insertions(+), 311 deletions(-)

Comments

Christian König Oct. 20, 2020, 8:30 a.m. UTC | #1
Yes, please! That approach looks even better than what I had in mind.

A few comments below:

Am 20.10.20 um 06:16 schrieb Dave Airlie:
> From: Dave Airlie <airlied@redhat.com>
>
> [SNIP]
> +	/* don't go from system->vram in one hop,
> +	   report this back to the caller tell it
> +	   where to bounce this buffer through. */
> +
> +	if ((old_mem->mem_type == TTM_PL_SYSTEM &&
> +	     new_mem->mem_type == TTM_PL_VRAM) ||
> +	    (old_mem->mem_type == TTM_PL_VRAM &&
> +	     new_mem->mem_type == TTM_PL_SYSTEM)) {
> +		new_mem->mem_type = TTM_PL_TT;

Not sure if that is such a good idea, new_mem can be some allocated 
memory in the VRAM domain at this moment.

Maybe instead give the callback a separate bounce argument instead.

> +		new_mem->placement = 0;
> +		return -EMULTIHOP;

Using EMULTIHOP here is a really nice idea.

> [SNIP]
>
> +static int ttm_bo_bounce_temp_buffer(struct ttm_buffer_object *bo,
> +				     struct ttm_resource *mem,
> +				     struct ttm_operation_ctx *ctx)
> +{
> +	struct ttm_place placement_memtype = {
> +		.fpfn = 0,
> +		.lpfn = 0,
> +		.mem_type = mem->mem_type,
> +		.flags = mem->placement,
> +	};
> +	struct ttm_placement bounce_placement;
> +	int ret;
> +
> +	bounce_placement.num_placement = bounce_placement.num_busy_placement = 1;
> +	bounce_placement.placement = bounce_placement.busy_placement = &placement_memtype;
> +
> +	/* find space in the bounce domain */
> +	ret = ttm_bo_mem_space(bo, &bounce_placement, mem, ctx);
> +	if (ret)
> +		return ret;
> +	/* move to the bounce domain */
> +	ret = ttm_bo_handle_move_mem(bo, mem, false, ctx);

Is this a recursion? I don't think it is, but I thought I better double 
check.

Regards,
Christian.


> +	if (ret)
> +		return ret;
> +	return 0;
> +}
> +
>   static int ttm_bo_move_buffer(struct ttm_buffer_object *bo,
>   			      struct ttm_placement *placement,
>   			      struct ttm_operation_ctx *ctx)
> @@ -954,11 +984,18 @@ static int ttm_bo_move_buffer(struct ttm_buffer_object *bo,
>   	/*
>   	 * Determine where to move the buffer.
>   	 */
> +bounce:
>   	ret = ttm_bo_mem_space(bo, placement, &mem, ctx);
>   	if (ret)
> -		goto out_unlock;
> +		return ret;
>   	ret = ttm_bo_handle_move_mem(bo, &mem, false, ctx);
> -out_unlock:
> +	if (ret == -EMULTIHOP) {
> +		ret = ttm_bo_bounce_temp_buffer(bo, &mem, ctx);
> +		if (ret)
> +			return ret;
> +		/* try and move to final place now. */
> +		goto bounce;
> +	}
>   	if (ret)
>   		ttm_resource_free(bo, &mem);
>   	return ret;
> @@ -1478,4 +1515,3 @@ void ttm_bo_tt_destroy(struct ttm_buffer_object *bo)
>   	ttm_tt_destroy(bo->bdev, bo->ttm);
>   	bo->ttm = NULL;
>   }
> -
Dave Airlie Oct. 21, 2020, 4:42 a.m. UTC | #2
On Tue, 20 Oct 2020 at 18:31, Christian König <christian.koenig@amd.com> wrote:
>
> Yes, please! That approach looks even better than what I had in mind.
>
> A few comments below:
>
> Am 20.10.20 um 06:16 schrieb Dave Airlie:
> > From: Dave Airlie <airlied@redhat.com>
> >
> > [SNIP]
> > +     /* don't go from system->vram in one hop,
> > +        report this back to the caller tell it
> > +        where to bounce this buffer through. */
> > +
> > +     if ((old_mem->mem_type == TTM_PL_SYSTEM &&
> > +          new_mem->mem_type == TTM_PL_VRAM) ||
> > +         (old_mem->mem_type == TTM_PL_VRAM &&
> > +          new_mem->mem_type == TTM_PL_SYSTEM)) {
> > +             new_mem->mem_type = TTM_PL_TT;
>
> Not sure if that is such a good idea, new_mem can be some allocated
> memory in the VRAM domain at this moment.
>
> Maybe instead give the callback a separate bounce argument instead.

I've changed this in the latest posting to take a ttm_place that the
driver fills out
if it needs a hop.

> > +     /* find space in the bounce domain */
> > +     ret = ttm_bo_mem_space(bo, &bounce_placement, mem, ctx);
> > +     if (ret)
> > +             return ret;
> > +     /* move to the bounce domain */
> > +     ret = ttm_bo_handle_move_mem(bo, mem, false, ctx);
>
> Is this a recursion? I don't think it is, but I thought I better double
> check.

No it should never recurse because the driver should only return
-EMULTIHOP once,
even if a theoretical driver was to need > 1 hops it should work until
it returns something
other than -EMULTIHOP. Of course a broken driver could cause bad
things, but that is
always true.

Dave.
diff mbox series

Patch

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 872795affe87..51d55e3fd8c8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -515,109 +515,6 @@  static int amdgpu_move_blit(struct ttm_buffer_object *bo,
 	return r;
 }
 
-static int amdgpu_move_setup_tt(struct ttm_buffer_object *bo,
-				struct ttm_operation_ctx *ctx,
-				struct ttm_resource *tmp_mem)
-{
-	struct ttm_place placements;
-	struct ttm_placement placement;
-	int r;
-
-	placement.num_placement = 1;
-	placement.placement = &placements;
-	placement.num_busy_placement = 1;
-	placement.busy_placement = &placements;
-	placements.fpfn = 0;
-	placements.lpfn = 0;
-	placements.mem_type = TTM_PL_TT;
-	placements.flags = 0;
-	r = ttm_bo_mem_space(bo, &placement, tmp_mem, ctx);
-	if (unlikely(r)) {
-		pr_err("Failed to find GTT space for blit from VRAM\n");
-		return r;
-	}
-
-	r = ttm_tt_populate(bo->bdev, bo->ttm, ctx);
-	if (unlikely(r))
-		goto out_cleanup;
-
-	/* Bind the memory to the GTT space */
-	r = amdgpu_ttm_backend_bind(bo->bdev, bo->ttm, tmp_mem);
-	if (unlikely(r)) {
-		goto out_cleanup;
-	}
-	return 0;
-out_cleanup:
-	ttm_resource_free(bo, tmp_mem);
-	return r;
-}
-/**
- * amdgpu_move_vram_ram - Copy VRAM buffer to RAM buffer
- *
- * Called by amdgpu_bo_move().
- */
-static int amdgpu_move_vram_ram(struct ttm_buffer_object *bo, bool evict,
-				struct ttm_operation_ctx *ctx,
-				struct ttm_resource *new_mem)
-{
-	struct ttm_resource *old_mem = &bo->mem;
-	struct ttm_resource tmp_mem;
-	int r;
-
-	/* create space/pages for new_mem in GTT space */
-	tmp_mem = *new_mem;
-	tmp_mem.mm_node = NULL;
-
-	r = amdgpu_move_setup_tt(bo, ctx, &tmp_mem);
-	if (unlikely(r))
-		return r;
-
-	/* blit VRAM to GTT */
-	r = amdgpu_move_blit(bo, evict, &tmp_mem, old_mem);
-	if (unlikely(r)) {
-		goto out_cleanup;
-	}
-
-	r = ttm_bo_wait_ctx(bo, ctx);
-	if (unlikely(r))
-		goto out_cleanup;
-
-	amdgpu_ttm_backend_unbind(bo->bdev, bo->ttm);
-	ttm_resource_free(bo, &bo->mem);
-	ttm_bo_assign_mem(bo, new_mem);
-out_cleanup:
-	ttm_resource_free(bo, &tmp_mem);
-	return r;
-}
-
-/**
- * amdgpu_move_ram_vram - Copy buffer from RAM to VRAM
- *
- * Called by amdgpu_bo_move().
- */
-static int amdgpu_move_ram_vram(struct ttm_buffer_object *bo, bool evict,
-				struct ttm_operation_ctx *ctx,
-				struct ttm_resource *new_mem)
-{
-	struct ttm_resource *old_mem = &bo->mem;
-	struct ttm_resource tmp_mem;
-	int r;
-
-	/* make space in GTT for old_mem buffer */
-	tmp_mem = *new_mem;
-	tmp_mem.mm_node = NULL;
-
-	r = amdgpu_move_setup_tt(bo, ctx, &tmp_mem);
-	if (unlikely(r))
-		return r;
-
-	ttm_bo_assign_mem(bo, &tmp_mem);
-	/* copy to VRAM */
-	r = amdgpu_move_blit(bo, evict, new_mem, old_mem);
-	ttm_resource_free(bo, &tmp_mem);
-	return r;
-}
-
 /**
  * amdgpu_mem_visible - Check that memory can be accessed by ttm_bo_move_memcpy
  *
@@ -656,6 +553,19 @@  static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict,
 	struct ttm_resource *old_mem = &bo->mem;
 	int r;
 
+	/* don't go from system->vram in one hop,
+	   report this back to the caller tell it
+	   where to bounce this buffer through. */
+
+	if ((old_mem->mem_type == TTM_PL_SYSTEM &&
+	     new_mem->mem_type == TTM_PL_VRAM) ||
+	    (old_mem->mem_type == TTM_PL_VRAM &&
+	     new_mem->mem_type == TTM_PL_SYSTEM)) {
+		new_mem->mem_type = TTM_PL_TT;
+		new_mem->placement = 0;
+		return -EMULTIHOP;
+	}
+
 	if (new_mem->mem_type == TTM_PL_TT) {
 		r = amdgpu_ttm_backend_bind(bo->bdev, bo->ttm, new_mem);
 		if (r)
@@ -709,16 +619,8 @@  static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict,
 		goto memcpy;
 	}
 
-	if (old_mem->mem_type == TTM_PL_VRAM &&
-	    new_mem->mem_type == TTM_PL_SYSTEM) {
-		r = amdgpu_move_vram_ram(bo, evict, ctx, new_mem);
-	} else if (old_mem->mem_type == TTM_PL_SYSTEM &&
-		   new_mem->mem_type == TTM_PL_VRAM) {
-		r = amdgpu_move_ram_vram(bo, evict, ctx, new_mem);
-	} else {
-		r = amdgpu_move_blit(bo, evict,
-				     new_mem, old_mem);
-	}
+	r = amdgpu_move_blit(bo, evict,
+			     new_mem, old_mem);
 
 	if (r) {
 memcpy:
diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c
index 038974d9a0d6..c08b3da804fb 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bo.c
+++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
@@ -862,92 +862,6 @@  nouveau_bo_move_init(struct nouveau_drm *drm)
 	NV_INFO(drm, "MM: using %s for buffer copies\n", name);
 }
 
-static int
-nouveau_move_setup_tt(struct ttm_buffer_object *bo,
-		      struct ttm_operation_ctx *ctx,
-		      struct ttm_resource *tmp_reg)
-{
-	struct ttm_place placement_memtype = {
-		.fpfn = 0,
-		.lpfn = 0,
-		.mem_type = TTM_PL_TT,
-		.flags = 0
-	};
-	struct ttm_placement placement;
-	int ret;
-
-	placement.num_placement = placement.num_busy_placement = 1;
-	placement.placement = placement.busy_placement = &placement_memtype;
-
-	ret = ttm_bo_mem_space(bo, &placement, tmp_reg, ctx);
-	if (ret)
-		return ret;
-
-	ret = ttm_tt_populate(bo->bdev, bo->ttm, ctx);
-	if (ret)
-		goto out_cleanup;
-
-	ret = nouveau_ttm_tt_bind(bo->bdev, bo->ttm, tmp_reg);
-	if (ret)
-		goto out_cleanup;
-	return 0;
-out_cleanup:
-	ttm_resource_free(bo, tmp_reg);
-	return ret;
-}
-
-static int
-nouveau_bo_move_flipd(struct ttm_buffer_object *bo, bool evict,
-		      struct ttm_operation_ctx *ctx,
-		      struct ttm_resource *new_reg)
-{
-	struct ttm_resource tmp_reg;
-	int ret;
-
-	tmp_reg = *new_reg;
-	tmp_reg.mm_node = NULL;
-
-	ret = nouveau_move_setup_tt(bo, ctx, &tmp_reg);
-	if (ret)
-		return ret;
-
-	ret = nouveau_bo_move_m2mf(bo, true, ctx, &tmp_reg);
-	if (ret)
-		goto out;
-
-	ret = ttm_bo_wait_ctx(bo, ctx);
-	if (ret)
-		goto out;
-
-	nouveau_ttm_tt_unbind(bo->bdev, bo->ttm);
-	ttm_resource_free(bo, &bo->mem);
-	ttm_bo_assign_mem(bo, &tmp_reg);
-out:
-	ttm_resource_free(bo, &tmp_reg);
-	return ret;
-}
-
-static int
-nouveau_bo_move_flips(struct ttm_buffer_object *bo, bool evict,
-		      struct ttm_operation_ctx *ctx,
-		      struct ttm_resource *new_reg)
-{
-	struct ttm_resource tmp_reg;
-	int ret;
-
-	tmp_reg = *new_reg;
-	tmp_reg.mm_node = NULL;
-
-	ret = nouveau_move_setup_tt(bo, ctx, &tmp_reg);
-	if (ret)
-		return ret;
-
-	ttm_bo_assign_mem(bo, &tmp_reg);
-	ret = nouveau_bo_move_m2mf(bo, true, ctx, new_reg);
-	ttm_resource_free(bo, &tmp_reg);
-	return ret;
-}
-
 static void
 nouveau_bo_move_ntfy(struct ttm_buffer_object *bo, bool evict,
 		     struct ttm_resource *new_reg)
@@ -1028,6 +942,15 @@  nouveau_bo_move(struct ttm_buffer_object *bo, bool evict,
 	struct nouveau_drm_tile *new_tile = NULL;
 	int ret = 0;
 
+	if ((old_reg->mem_type == TTM_PL_SYSTEM &&
+	     new_reg->mem_type == TTM_PL_VRAM) ||
+	    (old_reg->mem_type == TTM_PL_VRAM &&
+	     new_reg->mem_type == TTM_PL_SYSTEM)) {
+		new_reg->mem_type = TTM_PL_TT;
+		new_reg->placement = 0;
+		return -EMULTIHOP;
+	}
+	
 	if (new_reg->mem_type == TTM_PL_TT) {
 		ret = nouveau_ttm_tt_bind(bo->bdev, bo->ttm, new_reg);
 		if (ret)
@@ -1070,15 +993,8 @@  nouveau_bo_move(struct ttm_buffer_object *bo, bool evict,
 
 	/* Hardware assisted copy. */
 	if (drm->ttm.move) {
-		if (new_reg->mem_type == TTM_PL_SYSTEM)
-			ret = nouveau_bo_move_flipd(bo, evict, ctx,
-						    new_reg);
-		else if (old_reg->mem_type == TTM_PL_SYSTEM)
-			ret = nouveau_bo_move_flips(bo, evict, ctx,
-						    new_reg);
-		else
-			ret = nouveau_bo_move_m2mf(bo, evict, ctx,
-						   new_reg);
+		ret = nouveau_bo_move_m2mf(bo, evict, ctx,
+					   new_reg);
 		if (!ret)
 			goto out;
 	}
diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c
index efe5046e3268..d36321b699e0 100644
--- a/drivers/gpu/drm/radeon/radeon_ttm.c
+++ b/drivers/gpu/drm/radeon/radeon_ttm.c
@@ -207,94 +207,6 @@  static int radeon_move_blit(struct ttm_buffer_object *bo,
 	return r;
 }
 
-static int radeon_move_setup_tt(struct ttm_buffer_object *bo,
-				struct ttm_operation_ctx *ctx,
-				struct ttm_resource *tmp_mem)
-{
-	struct ttm_place placements;
-	struct ttm_placement placement;
-	int r;
-
-	placement.num_placement = 1;
-	placement.placement = &placements;
-	placement.num_busy_placement = 1;
-	placement.busy_placement = &placements;
-	placements.fpfn = 0;
-	placements.lpfn = 0;
-	placements.mem_type = TTM_PL_TT;
-	placements.flags = 0;
-
-	r = ttm_bo_mem_space(bo, &placement, tmp_mem, ctx);
-	if (unlikely(r))
-		return r;
-
-	r = ttm_tt_populate(bo->bdev, bo->ttm, ctx);
-	if (unlikely(r))
-		goto out_cleanup;
-
-	r = radeon_ttm_tt_bind(bo->bdev, bo->ttm, tmp_mem);
-	if (unlikely(r))
-		goto out_cleanup;
-	return 0;
-out_cleanup:
-	ttm_resource_free(bo, tmp_mem);
-	return r;
-}
-
-static int radeon_move_vram_ram(struct ttm_buffer_object *bo,
-				bool evict,
-				struct ttm_operation_ctx *ctx,
-				struct ttm_resource *new_mem)
-{
-	struct ttm_resource *old_mem = &bo->mem;
-	struct ttm_resource tmp_mem;
-	int r;
-
-	tmp_mem = *new_mem;
-	tmp_mem.mm_node = NULL;
-
-	r = radeon_move_setup_tt(bo, ctx, &tmp_mem);
-	if (unlikely(r))
-		return r;
-
-	r = radeon_move_blit(bo, true, &tmp_mem, old_mem);
-	if (unlikely(r)) {
-		goto out_cleanup;
-	}
-	r = ttm_bo_wait_ctx(bo, ctx);
-	if (unlikely(r))
-		goto out_cleanup;
-
-	radeon_ttm_tt_unbind(bo->bdev, bo->ttm);
-	ttm_resource_free(bo, &bo->mem);
-	ttm_bo_assign_mem(bo, new_mem);
-out_cleanup:
-	ttm_resource_free(bo, &tmp_mem);
-	return r;
-}
-
-static int radeon_move_ram_vram(struct ttm_buffer_object *bo,
-				bool evict,
-				struct ttm_operation_ctx *ctx,
-				struct ttm_resource *new_mem)
-{
-	struct ttm_resource *old_mem = &bo->mem;
-	struct ttm_resource tmp_mem;
-	int r;
-
-	tmp_mem = *new_mem;
-	tmp_mem.mm_node = NULL;
-
-	r = radeon_move_setup_tt(bo, ctx, &tmp_mem);
-	if (unlikely(r))
-		return r;
-
-	ttm_bo_assign_mem(bo, &tmp_mem);
-	r = radeon_move_blit(bo, true, new_mem, old_mem);
-	ttm_resource_free(bo, &tmp_mem);
-	return r;
-}
-
 static int radeon_bo_move(struct ttm_buffer_object *bo, bool evict,
 			  struct ttm_operation_ctx *ctx,
 			  struct ttm_resource *new_mem)
@@ -304,6 +216,15 @@  static int radeon_bo_move(struct ttm_buffer_object *bo, bool evict,
 	struct ttm_resource *old_mem = &bo->mem;
 	int r;
 
+	if ((old_mem->mem_type == TTM_PL_SYSTEM &&
+	     new_mem->mem_type == TTM_PL_VRAM) ||
+	    (old_mem->mem_type == TTM_PL_VRAM &&
+	     new_mem->mem_type == TTM_PL_SYSTEM)) {
+		new_mem->mem_type = TTM_PL_TT;
+		new_mem->placement = 0;
+		return -EMULTIHOP;
+	}
+
 	if (new_mem->mem_type == TTM_PL_TT) {
 		r = radeon_ttm_tt_bind(bo->bdev, bo->ttm, new_mem);
 		if (r)
@@ -344,17 +265,8 @@  static int radeon_bo_move(struct ttm_buffer_object *bo, bool evict,
 		goto memcpy;
 	}
 
-	if (old_mem->mem_type == TTM_PL_VRAM &&
-	    new_mem->mem_type == TTM_PL_SYSTEM) {
-		r = radeon_move_vram_ram(bo, evict, ctx, new_mem);
-	} else if (old_mem->mem_type == TTM_PL_SYSTEM &&
-		   new_mem->mem_type == TTM_PL_VRAM) {
-		r = radeon_move_ram_vram(bo, evict, ctx, new_mem);
-	} else {
-		r = radeon_move_blit(bo, evict,
-				     new_mem, old_mem);
-	}
-
+	r = radeon_move_blit(bo, evict,
+			     new_mem, old_mem);
 	if (r) {
 memcpy:
 		r = ttm_bo_move_memcpy(bo, ctx, new_mem);
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index 9b2be4e44bea..f4a6d5f29be8 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -260,8 +260,11 @@  static int ttm_bo_handle_move_mem(struct ttm_buffer_object *bo,
 	}
 
 	ret = bdev->driver->move(bo, evict, ctx, mem);
-	if (ret)
+	if (ret) {
+		if (ret == -EMULTIHOP)
+			return ret;
 		goto out_err;
+	}
 
 	ctx->bytes_moved += bo->mem.num_pages << PAGE_SHIFT;
 	return 0;
@@ -936,6 +939,33 @@  int ttm_bo_mem_space(struct ttm_buffer_object *bo,
 }
 EXPORT_SYMBOL(ttm_bo_mem_space);
 
+static int ttm_bo_bounce_temp_buffer(struct ttm_buffer_object *bo,
+				     struct ttm_resource *mem,
+				     struct ttm_operation_ctx *ctx)
+{
+	struct ttm_place placement_memtype = {
+		.fpfn = 0,
+		.lpfn = 0,
+		.mem_type = mem->mem_type,
+		.flags = mem->placement,
+	};
+	struct ttm_placement bounce_placement;
+	int ret;
+
+	bounce_placement.num_placement = bounce_placement.num_busy_placement = 1;
+	bounce_placement.placement = bounce_placement.busy_placement = &placement_memtype;
+
+	/* find space in the bounce domain */
+	ret = ttm_bo_mem_space(bo, &bounce_placement, mem, ctx);
+	if (ret)
+		return ret;
+	/* move to the bounce domain */
+	ret = ttm_bo_handle_move_mem(bo, mem, false, ctx);
+	if (ret)
+		return ret;
+	return 0;
+}
+
 static int ttm_bo_move_buffer(struct ttm_buffer_object *bo,
 			      struct ttm_placement *placement,
 			      struct ttm_operation_ctx *ctx)
@@ -954,11 +984,18 @@  static int ttm_bo_move_buffer(struct ttm_buffer_object *bo,
 	/*
 	 * Determine where to move the buffer.
 	 */
+bounce:
 	ret = ttm_bo_mem_space(bo, placement, &mem, ctx);
 	if (ret)
-		goto out_unlock;
+		return ret;
 	ret = ttm_bo_handle_move_mem(bo, &mem, false, ctx);
-out_unlock:
+	if (ret == -EMULTIHOP) {
+		ret = ttm_bo_bounce_temp_buffer(bo, &mem, ctx);
+		if (ret)
+			return ret;
+		/* try and move to final place now. */
+		goto bounce;
+	}
 	if (ret)
 		ttm_resource_free(bo, &mem);
 	return ret;
@@ -1478,4 +1515,3 @@  void ttm_bo_tt_destroy(struct ttm_buffer_object *bo)
 	ttm_tt_destroy(bo->bdev, bo->ttm);
 	bo->ttm = NULL;
 }
-