diff mbox series

[02/11] drm/ttm: fix busy memory to fail other user v8

Message ID 20190514123127.1650-2-christian.koenig@amd.com (mailing list archive)
State New, archived
Headers show
Series [01/11] drm/ttm: Make LRU removal optional. | expand

Commit Message

Christian König May 14, 2019, 12:31 p.m. UTC
From: Chunming Zhou <david1.zhou@amd.com>

heavy gpu job could occupy memory long time, which lead other user fail to get memory.

basically pick up Christian idea:

1. Reserve the BO in DC using a ww_mutex ticket (trivial).
2. If we then run into this EBUSY condition in TTM check if the BO we need memory for (or rather the ww_mutex of its reservation object) has a ticket assigned.
3. If we have a ticket we grab a reference to the first BO on the LRU, drop the LRU lock and try to grab the reservation lock with the ticket.
4. If getting the reservation lock with the ticket succeeded we check if the BO is still the first one on the LRU in question (the BO could have moved).
5. If the BO is still the first one on the LRU in question we try to evict it as we would evict any other BO.
6. If any of the "If's" above fail we just back off and return -EBUSY.

v2: fix some minor check
v3: address Christian v2 comments.
v4: fix some missing
v5: handle first_bo unlock and bo_get/put
v6: abstract unified iterate function, and handle all possible usecase not only pinned bo.
v7: pass request bo->resv to ttm_bo_evict_first
v8 (chk): minimal coding style fix

Change-Id: I21423fb922f885465f13833c41df1e134364a8e7
Signed-off-by: Chunming Zhou <david1.zhou@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/ttm/ttm_bo.c | 113 +++++++++++++++++++++++++++++------
 1 file changed, 96 insertions(+), 17 deletions(-)

Comments

Daniel Vetter May 15, 2019, 8:38 a.m. UTC | #1
On Tue, May 14, 2019 at 02:31:18PM +0200, Christian König wrote:
> From: Chunming Zhou <david1.zhou@amd.com>
> 
> heavy gpu job could occupy memory long time, which lead other user fail to get memory.
> 
> basically pick up Christian idea:
> 
> 1. Reserve the BO in DC using a ww_mutex ticket (trivial).
> 2. If we then run into this EBUSY condition in TTM check if the BO we need memory for (or rather the ww_mutex of its reservation object) has a ticket assigned.
> 3. If we have a ticket we grab a reference to the first BO on the LRU, drop the LRU lock and try to grab the reservation lock with the ticket.
> 4. If getting the reservation lock with the ticket succeeded we check if the BO is still the first one on the LRU in question (the BO could have moved).
> 5. If the BO is still the first one on the LRU in question we try to evict it as we would evict any other BO.
> 6. If any of the "If's" above fail we just back off and return -EBUSY.
> 
> v2: fix some minor check
> v3: address Christian v2 comments.
> v4: fix some missing
> v5: handle first_bo unlock and bo_get/put
> v6: abstract unified iterate function, and handle all possible usecase not only pinned bo.
> v7: pass request bo->resv to ttm_bo_evict_first
> v8 (chk): minimal coding style fix
> 
> Change-Id: I21423fb922f885465f13833c41df1e134364a8e7
> Signed-off-by: Chunming Zhou <david1.zhou@amd.com>
> Reviewed-by: Christian König <christian.koenig@amd.com>

I think this closes a big gap between ttm and the bkl/struct_mutex
drivers - it's much easier to guarantee you can evict everything if
there's only a single lock :-)

Would be absolutely awesome if we could extract this as some kind of
building block, like we've done with lots of other ttm concepts already
(reservation_obj, fences, ...).

Just an aside really.
-Daniel

> ---
>  drivers/gpu/drm/ttm/ttm_bo.c | 113 +++++++++++++++++++++++++++++------
>  1 file changed, 96 insertions(+), 17 deletions(-)
> 
> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
> index 2845fceb2fbd..e634d3a36923 100644
> --- a/drivers/gpu/drm/ttm/ttm_bo.c
> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
> @@ -766,11 +766,13 @@ EXPORT_SYMBOL(ttm_bo_eviction_valuable);
>   * b. Otherwise, trylock it.
>   */
>  static bool ttm_bo_evict_swapout_allowable(struct ttm_buffer_object *bo,
> -			struct ttm_operation_ctx *ctx, bool *locked)
> +			struct ttm_operation_ctx *ctx, bool *locked, bool *busy)
>  {
>  	bool ret = false;
>  
>  	*locked = false;
> +	if (busy)
> +		*busy = false;
>  	if (bo->resv == ctx->resv) {
>  		reservation_object_assert_held(bo->resv);
>  		if (ctx->flags & TTM_OPT_FLAG_ALLOW_RES_EVICT
> @@ -779,35 +781,46 @@ static bool ttm_bo_evict_swapout_allowable(struct ttm_buffer_object *bo,
>  	} else {
>  		*locked = reservation_object_trylock(bo->resv);
>  		ret = *locked;
> +		if (!ret && busy)
> +			*busy = true;
>  	}
>  
>  	return ret;
>  }
>  
> -static int ttm_mem_evict_first(struct ttm_bo_device *bdev,
> -			       uint32_t mem_type,
> -			       const struct ttm_place *place,
> -			       struct ttm_operation_ctx *ctx)
> +static struct ttm_buffer_object*
> +ttm_mem_find_evitable_bo(struct ttm_bo_device *bdev,
> +			 struct ttm_mem_type_manager *man,
> +			 const struct ttm_place *place,
> +			 struct ttm_operation_ctx *ctx,
> +			 struct ttm_buffer_object **first_bo,
> +			 bool *locked)
>  {
> -	struct ttm_bo_global *glob = bdev->glob;
> -	struct ttm_mem_type_manager *man = &bdev->man[mem_type];
>  	struct ttm_buffer_object *bo = NULL;
> -	bool locked = false;
> -	unsigned i;
> -	int ret;
> +	int i;
>  
> -	spin_lock(&glob->lru_lock);
> +	if (first_bo)
> +		*first_bo = NULL;
>  	for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
>  		list_for_each_entry(bo, &man->lru[i], lru) {
> -			if (!ttm_bo_evict_swapout_allowable(bo, ctx, &locked))
> +			bool busy = false;
> +
> +			if (!ttm_bo_evict_swapout_allowable(bo, ctx, locked,
> +							    &busy)) {
> +				if (first_bo && !(*first_bo) && busy) {
> +					ttm_bo_get(bo);
> +					*first_bo = bo;
> +				}
>  				continue;
> +			}
>  
>  			if (place && !bdev->driver->eviction_valuable(bo,
>  								      place)) {
> -				if (locked)
> +				if (*locked)
>  					reservation_object_unlock(bo->resv);
>  				continue;
>  			}
> +
>  			break;
>  		}
>  
> @@ -818,9 +831,69 @@ static int ttm_mem_evict_first(struct ttm_bo_device *bdev,
>  		bo = NULL;
>  	}
>  
> +	return bo;
> +}
> +
> +static int ttm_mem_evict_first(struct ttm_bo_device *bdev,
> +			       uint32_t mem_type,
> +			       const struct ttm_place *place,
> +			       struct ttm_operation_ctx *ctx,
> +			       struct reservation_object *request_resv)
> +{
> +	struct ttm_bo_global *glob = bdev->glob;
> +	struct ttm_mem_type_manager *man = &bdev->man[mem_type];
> +	struct ttm_buffer_object *bo = NULL, *first_bo = NULL;
> +	bool locked = false;
> +	int ret;
> +
> +	spin_lock(&glob->lru_lock);
> +	bo = ttm_mem_find_evitable_bo(bdev, man, place, ctx, &first_bo,
> +				      &locked);
>  	if (!bo) {
> +		struct ww_acquire_ctx *acquire_ctx = request_resv->lock.ctx;
> +		struct ttm_operation_ctx busy_ctx;
> +
>  		spin_unlock(&glob->lru_lock);
> -		return -EBUSY;
> +		/* check if other user occupy memory too long time */
> +		if (!first_bo || !request_resv || !request_resv->lock.ctx) {
> +			if (first_bo)
> +				ttm_bo_put(first_bo);
> +			return -EBUSY;
> +		}
> +		if (first_bo->resv == request_resv) {
> +			ttm_bo_put(first_bo);
> +			return -EBUSY;
> +		}
> +		if (ctx->interruptible)
> +			ret = ww_mutex_lock_interruptible(&first_bo->resv->lock,
> +							  acquire_ctx);
> +		else
> +			ret = ww_mutex_lock(&first_bo->resv->lock,
> +					    acquire_ctx);
> +		if (ret) {
> +			ttm_bo_put(first_bo);
> +			return ret;
> +		}
> +		spin_lock(&glob->lru_lock);
> +		/* previous busy resv lock is held by above, idle now,
> +		 * so let them evictable.
> +		 */
> +		busy_ctx.interruptible = ctx->interruptible;
> +		busy_ctx.no_wait_gpu   = ctx->no_wait_gpu;
> +		busy_ctx.resv	       = first_bo->resv;
> +		busy_ctx.flags	       = TTM_OPT_FLAG_ALLOW_RES_EVICT;
> +
> +		bo = ttm_mem_find_evitable_bo(bdev, man, place, &busy_ctx, NULL,
> +					      &locked);
> +		if (bo && (bo->resv == first_bo->resv))
> +			locked = true;
> +		else if (bo)
> +			ww_mutex_unlock(&first_bo->resv->lock);
> +		if (!bo) {
> +			spin_unlock(&glob->lru_lock);
> +			ttm_bo_put(first_bo);
> +			return -EBUSY;
> +		}
>  	}
>  
>  	kref_get(&bo->list_kref);
> @@ -829,11 +902,15 @@ static int ttm_mem_evict_first(struct ttm_bo_device *bdev,
>  		ret = ttm_bo_cleanup_refs(bo, ctx->interruptible,
>  					  ctx->no_wait_gpu, locked);
>  		kref_put(&bo->list_kref, ttm_bo_release_list);
> +		if (first_bo)
> +			ttm_bo_put(first_bo);
>  		return ret;
>  	}
>  
>  	ttm_bo_del_from_lru(bo);
>  	spin_unlock(&glob->lru_lock);
> +	if (first_bo)
> +		ttm_bo_put(first_bo);
>  
>  	ret = ttm_bo_evict(bo, ctx);
>  	if (locked) {
> @@ -907,7 +984,7 @@ static int ttm_bo_mem_force_space(struct ttm_buffer_object *bo,
>  			return ret;
>  		if (mem->mm_node)
>  			break;
> -		ret = ttm_mem_evict_first(bdev, mem_type, place, ctx);
> +		ret = ttm_mem_evict_first(bdev, mem_type, place, ctx, bo->resv);
>  		if (unlikely(ret != 0))
>  			return ret;
>  	} while (1);
> @@ -1401,7 +1478,8 @@ static int ttm_bo_force_list_clean(struct ttm_bo_device *bdev,
>  	for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
>  		while (!list_empty(&man->lru[i])) {
>  			spin_unlock(&glob->lru_lock);
> -			ret = ttm_mem_evict_first(bdev, mem_type, NULL, &ctx);
> +			ret = ttm_mem_evict_first(bdev, mem_type, NULL, &ctx,
> +						  NULL);
>  			if (ret)
>  				return ret;
>  			spin_lock(&glob->lru_lock);
> @@ -1772,7 +1850,8 @@ int ttm_bo_swapout(struct ttm_bo_global *glob, struct ttm_operation_ctx *ctx)
>  	spin_lock(&glob->lru_lock);
>  	for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
>  		list_for_each_entry(bo, &glob->swap_lru[i], swap) {
> -			if (ttm_bo_evict_swapout_allowable(bo, ctx, &locked)) {
> +			if (ttm_bo_evict_swapout_allowable(bo, ctx, &locked,
> +							   NULL)) {
>  				ret = 0;
>  				break;
>  			}
> -- 
> 2.17.1
> 
> _______________________________________________
> dri-devel mailing list
> dri-devel@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/dri-devel
Daniel Vetter May 15, 2019, 8:45 a.m. UTC | #2
On Wed, May 15, 2019 at 10:38:28AM +0200, Daniel Vetter wrote:
> On Tue, May 14, 2019 at 02:31:18PM +0200, Christian König wrote:
> > From: Chunming Zhou <david1.zhou@amd.com>
> > 
> > heavy gpu job could occupy memory long time, which lead other user fail to get memory.
> > 
> > basically pick up Christian idea:
> > 
> > 1. Reserve the BO in DC using a ww_mutex ticket (trivial).
> > 2. If we then run into this EBUSY condition in TTM check if the BO we need memory for (or rather the ww_mutex of its reservation object) has a ticket assigned.
> > 3. If we have a ticket we grab a reference to the first BO on the LRU, drop the LRU lock and try to grab the reservation lock with the ticket.
> > 4. If getting the reservation lock with the ticket succeeded we check if the BO is still the first one on the LRU in question (the BO could have moved).
> > 5. If the BO is still the first one on the LRU in question we try to evict it as we would evict any other BO.
> > 6. If any of the "If's" above fail we just back off and return -EBUSY.
> > 
> > v2: fix some minor check
> > v3: address Christian v2 comments.
> > v4: fix some missing
> > v5: handle first_bo unlock and bo_get/put
> > v6: abstract unified iterate function, and handle all possible usecase not only pinned bo.
> > v7: pass request bo->resv to ttm_bo_evict_first
> > v8 (chk): minimal coding style fix
> > 
> > Change-Id: I21423fb922f885465f13833c41df1e134364a8e7
> > Signed-off-by: Chunming Zhou <david1.zhou@amd.com>
> > Reviewed-by: Christian König <christian.koenig@amd.com>
> 
> I think this closes a big gap between ttm and the bkl/struct_mutex
> drivers - it's much easier to guarantee you can evict everything if
> there's only a single lock :-)
> 
> Would be absolutely awesome if we could extract this as some kind of
> building block, like we've done with lots of other ttm concepts already
> (reservation_obj, fences, ...).
> 
> Just an aside really.

Ofc this is meant as a comment on the entire patch series, without all the
other patches to make sure BO always stay on a relevant LRU there's still
gaps in the guaranteed forward progress eviction algorithm.
-Daniel

> -Daniel
> 
> > ---
> >  drivers/gpu/drm/ttm/ttm_bo.c | 113 +++++++++++++++++++++++++++++------
> >  1 file changed, 96 insertions(+), 17 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
> > index 2845fceb2fbd..e634d3a36923 100644
> > --- a/drivers/gpu/drm/ttm/ttm_bo.c
> > +++ b/drivers/gpu/drm/ttm/ttm_bo.c
> > @@ -766,11 +766,13 @@ EXPORT_SYMBOL(ttm_bo_eviction_valuable);
> >   * b. Otherwise, trylock it.
> >   */
> >  static bool ttm_bo_evict_swapout_allowable(struct ttm_buffer_object *bo,
> > -			struct ttm_operation_ctx *ctx, bool *locked)
> > +			struct ttm_operation_ctx *ctx, bool *locked, bool *busy)
> >  {
> >  	bool ret = false;
> >  
> >  	*locked = false;
> > +	if (busy)
> > +		*busy = false;
> >  	if (bo->resv == ctx->resv) {
> >  		reservation_object_assert_held(bo->resv);
> >  		if (ctx->flags & TTM_OPT_FLAG_ALLOW_RES_EVICT
> > @@ -779,35 +781,46 @@ static bool ttm_bo_evict_swapout_allowable(struct ttm_buffer_object *bo,
> >  	} else {
> >  		*locked = reservation_object_trylock(bo->resv);
> >  		ret = *locked;
> > +		if (!ret && busy)
> > +			*busy = true;
> >  	}
> >  
> >  	return ret;
> >  }
> >  
> > -static int ttm_mem_evict_first(struct ttm_bo_device *bdev,
> > -			       uint32_t mem_type,
> > -			       const struct ttm_place *place,
> > -			       struct ttm_operation_ctx *ctx)
> > +static struct ttm_buffer_object*
> > +ttm_mem_find_evitable_bo(struct ttm_bo_device *bdev,
> > +			 struct ttm_mem_type_manager *man,
> > +			 const struct ttm_place *place,
> > +			 struct ttm_operation_ctx *ctx,
> > +			 struct ttm_buffer_object **first_bo,
> > +			 bool *locked)
> >  {
> > -	struct ttm_bo_global *glob = bdev->glob;
> > -	struct ttm_mem_type_manager *man = &bdev->man[mem_type];
> >  	struct ttm_buffer_object *bo = NULL;
> > -	bool locked = false;
> > -	unsigned i;
> > -	int ret;
> > +	int i;
> >  
> > -	spin_lock(&glob->lru_lock);
> > +	if (first_bo)
> > +		*first_bo = NULL;
> >  	for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
> >  		list_for_each_entry(bo, &man->lru[i], lru) {
> > -			if (!ttm_bo_evict_swapout_allowable(bo, ctx, &locked))
> > +			bool busy = false;
> > +
> > +			if (!ttm_bo_evict_swapout_allowable(bo, ctx, locked,
> > +							    &busy)) {
> > +				if (first_bo && !(*first_bo) && busy) {
> > +					ttm_bo_get(bo);
> > +					*first_bo = bo;
> > +				}
> >  				continue;
> > +			}
> >  
> >  			if (place && !bdev->driver->eviction_valuable(bo,
> >  								      place)) {
> > -				if (locked)
> > +				if (*locked)
> >  					reservation_object_unlock(bo->resv);
> >  				continue;
> >  			}
> > +
> >  			break;
> >  		}
> >  
> > @@ -818,9 +831,69 @@ static int ttm_mem_evict_first(struct ttm_bo_device *bdev,
> >  		bo = NULL;
> >  	}
> >  
> > +	return bo;
> > +}
> > +
> > +static int ttm_mem_evict_first(struct ttm_bo_device *bdev,
> > +			       uint32_t mem_type,
> > +			       const struct ttm_place *place,
> > +			       struct ttm_operation_ctx *ctx,
> > +			       struct reservation_object *request_resv)
> > +{
> > +	struct ttm_bo_global *glob = bdev->glob;
> > +	struct ttm_mem_type_manager *man = &bdev->man[mem_type];
> > +	struct ttm_buffer_object *bo = NULL, *first_bo = NULL;
> > +	bool locked = false;
> > +	int ret;
> > +
> > +	spin_lock(&glob->lru_lock);
> > +	bo = ttm_mem_find_evitable_bo(bdev, man, place, ctx, &first_bo,
> > +				      &locked);
> >  	if (!bo) {
> > +		struct ww_acquire_ctx *acquire_ctx = request_resv->lock.ctx;
> > +		struct ttm_operation_ctx busy_ctx;
> > +
> >  		spin_unlock(&glob->lru_lock);
> > -		return -EBUSY;
> > +		/* check if other user occupy memory too long time */
> > +		if (!first_bo || !request_resv || !request_resv->lock.ctx) {
> > +			if (first_bo)
> > +				ttm_bo_put(first_bo);
> > +			return -EBUSY;
> > +		}
> > +		if (first_bo->resv == request_resv) {
> > +			ttm_bo_put(first_bo);
> > +			return -EBUSY;
> > +		}
> > +		if (ctx->interruptible)
> > +			ret = ww_mutex_lock_interruptible(&first_bo->resv->lock,
> > +							  acquire_ctx);
> > +		else
> > +			ret = ww_mutex_lock(&first_bo->resv->lock,
> > +					    acquire_ctx);
> > +		if (ret) {
> > +			ttm_bo_put(first_bo);
> > +			return ret;
> > +		}
> > +		spin_lock(&glob->lru_lock);
> > +		/* previous busy resv lock is held by above, idle now,
> > +		 * so let them evictable.
> > +		 */
> > +		busy_ctx.interruptible = ctx->interruptible;
> > +		busy_ctx.no_wait_gpu   = ctx->no_wait_gpu;
> > +		busy_ctx.resv	       = first_bo->resv;
> > +		busy_ctx.flags	       = TTM_OPT_FLAG_ALLOW_RES_EVICT;
> > +
> > +		bo = ttm_mem_find_evitable_bo(bdev, man, place, &busy_ctx, NULL,
> > +					      &locked);
> > +		if (bo && (bo->resv == first_bo->resv))
> > +			locked = true;
> > +		else if (bo)
> > +			ww_mutex_unlock(&first_bo->resv->lock);
> > +		if (!bo) {
> > +			spin_unlock(&glob->lru_lock);
> > +			ttm_bo_put(first_bo);
> > +			return -EBUSY;
> > +		}
> >  	}
> >  
> >  	kref_get(&bo->list_kref);
> > @@ -829,11 +902,15 @@ static int ttm_mem_evict_first(struct ttm_bo_device *bdev,
> >  		ret = ttm_bo_cleanup_refs(bo, ctx->interruptible,
> >  					  ctx->no_wait_gpu, locked);
> >  		kref_put(&bo->list_kref, ttm_bo_release_list);
> > +		if (first_bo)
> > +			ttm_bo_put(first_bo);
> >  		return ret;
> >  	}
> >  
> >  	ttm_bo_del_from_lru(bo);
> >  	spin_unlock(&glob->lru_lock);
> > +	if (first_bo)
> > +		ttm_bo_put(first_bo);
> >  
> >  	ret = ttm_bo_evict(bo, ctx);
> >  	if (locked) {
> > @@ -907,7 +984,7 @@ static int ttm_bo_mem_force_space(struct ttm_buffer_object *bo,
> >  			return ret;
> >  		if (mem->mm_node)
> >  			break;
> > -		ret = ttm_mem_evict_first(bdev, mem_type, place, ctx);
> > +		ret = ttm_mem_evict_first(bdev, mem_type, place, ctx, bo->resv);
> >  		if (unlikely(ret != 0))
> >  			return ret;
> >  	} while (1);
> > @@ -1401,7 +1478,8 @@ static int ttm_bo_force_list_clean(struct ttm_bo_device *bdev,
> >  	for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
> >  		while (!list_empty(&man->lru[i])) {
> >  			spin_unlock(&glob->lru_lock);
> > -			ret = ttm_mem_evict_first(bdev, mem_type, NULL, &ctx);
> > +			ret = ttm_mem_evict_first(bdev, mem_type, NULL, &ctx,
> > +						  NULL);
> >  			if (ret)
> >  				return ret;
> >  			spin_lock(&glob->lru_lock);
> > @@ -1772,7 +1850,8 @@ int ttm_bo_swapout(struct ttm_bo_global *glob, struct ttm_operation_ctx *ctx)
> >  	spin_lock(&glob->lru_lock);
> >  	for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
> >  		list_for_each_entry(bo, &glob->swap_lru[i], swap) {
> > -			if (ttm_bo_evict_swapout_allowable(bo, ctx, &locked)) {
> > +			if (ttm_bo_evict_swapout_allowable(bo, ctx, &locked,
> > +							   NULL)) {
> >  				ret = 0;
> >  				break;
> >  			}
> > -- 
> > 2.17.1
> > 
> > _______________________________________________
> > dri-devel mailing list
> > dri-devel@lists.freedesktop.org
> > https://lists.freedesktop.org/mailman/listinfo/dri-devel
> 
> -- 
> Daniel Vetter
> Software Engineer, Intel Corporation
> http://blog.ffwll.ch
Christian König May 15, 2019, 9:27 a.m. UTC | #3
Am 15.05.19 um 10:45 schrieb Daniel Vetter:
> On Wed, May 15, 2019 at 10:38:28AM +0200, Daniel Vetter wrote:
>> On Tue, May 14, 2019 at 02:31:18PM +0200, Christian König wrote:
>>> From: Chunming Zhou <david1.zhou@amd.com>
>>>
>>> heavy gpu job could occupy memory long time, which lead other user fail to get memory.
>>>
>>> basically pick up Christian idea:
>>>
>>> 1. Reserve the BO in DC using a ww_mutex ticket (trivial).
>>> 2. If we then run into this EBUSY condition in TTM check if the BO we need memory for (or rather the ww_mutex of its reservation object) has a ticket assigned.
>>> 3. If we have a ticket we grab a reference to the first BO on the LRU, drop the LRU lock and try to grab the reservation lock with the ticket.
>>> 4. If getting the reservation lock with the ticket succeeded we check if the BO is still the first one on the LRU in question (the BO could have moved).
>>> 5. If the BO is still the first one on the LRU in question we try to evict it as we would evict any other BO.
>>> 6. If any of the "If's" above fail we just back off and return -EBUSY.
>>>
>>> v2: fix some minor check
>>> v3: address Christian v2 comments.
>>> v4: fix some missing
>>> v5: handle first_bo unlock and bo_get/put
>>> v6: abstract unified iterate function, and handle all possible usecase not only pinned bo.
>>> v7: pass request bo->resv to ttm_bo_evict_first
>>> v8 (chk): minimal coding style fix
>>>
>>> Change-Id: I21423fb922f885465f13833c41df1e134364a8e7
>>> Signed-off-by: Chunming Zhou <david1.zhou@amd.com>
>>> Reviewed-by: Christian König <christian.koenig@amd.com>
>> I think this closes a big gap between ttm and the bkl/struct_mutex
>> drivers - it's much easier to guarantee you can evict everything if
>> there's only a single lock :-)
>>
>> Would be absolutely awesome if we could extract this as some kind of
>> building block, like we've done with lots of other ttm concepts already
>> (reservation_obj, fences, ...).
>>
>> Just an aside really.
> Ofc this is meant as a comment on the entire patch series, without all the
> other patches to make sure BO always stay on a relevant LRU there's still
> gaps in the guaranteed forward progress eviction algorithm.

Yeah, the problem surfaced because of patch #4. Previously TTM would 
have just ignored all errors and continued to try different placements 
and only return -ENOMEM when we ran out of a possible placements.

I probably need to either fix patch #4 or reorder the patches.

Thanks for the note,
Christian.

> -Daniel
>
>> -Daniel
>>
>>> ---
>>>   drivers/gpu/drm/ttm/ttm_bo.c | 113 +++++++++++++++++++++++++++++------
>>>   1 file changed, 96 insertions(+), 17 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
>>> index 2845fceb2fbd..e634d3a36923 100644
>>> --- a/drivers/gpu/drm/ttm/ttm_bo.c
>>> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
>>> @@ -766,11 +766,13 @@ EXPORT_SYMBOL(ttm_bo_eviction_valuable);
>>>    * b. Otherwise, trylock it.
>>>    */
>>>   static bool ttm_bo_evict_swapout_allowable(struct ttm_buffer_object *bo,
>>> -			struct ttm_operation_ctx *ctx, bool *locked)
>>> +			struct ttm_operation_ctx *ctx, bool *locked, bool *busy)
>>>   {
>>>   	bool ret = false;
>>>   
>>>   	*locked = false;
>>> +	if (busy)
>>> +		*busy = false;
>>>   	if (bo->resv == ctx->resv) {
>>>   		reservation_object_assert_held(bo->resv);
>>>   		if (ctx->flags & TTM_OPT_FLAG_ALLOW_RES_EVICT
>>> @@ -779,35 +781,46 @@ static bool ttm_bo_evict_swapout_allowable(struct ttm_buffer_object *bo,
>>>   	} else {
>>>   		*locked = reservation_object_trylock(bo->resv);
>>>   		ret = *locked;
>>> +		if (!ret && busy)
>>> +			*busy = true;
>>>   	}
>>>   
>>>   	return ret;
>>>   }
>>>   
>>> -static int ttm_mem_evict_first(struct ttm_bo_device *bdev,
>>> -			       uint32_t mem_type,
>>> -			       const struct ttm_place *place,
>>> -			       struct ttm_operation_ctx *ctx)
>>> +static struct ttm_buffer_object*
>>> +ttm_mem_find_evitable_bo(struct ttm_bo_device *bdev,
>>> +			 struct ttm_mem_type_manager *man,
>>> +			 const struct ttm_place *place,
>>> +			 struct ttm_operation_ctx *ctx,
>>> +			 struct ttm_buffer_object **first_bo,
>>> +			 bool *locked)
>>>   {
>>> -	struct ttm_bo_global *glob = bdev->glob;
>>> -	struct ttm_mem_type_manager *man = &bdev->man[mem_type];
>>>   	struct ttm_buffer_object *bo = NULL;
>>> -	bool locked = false;
>>> -	unsigned i;
>>> -	int ret;
>>> +	int i;
>>>   
>>> -	spin_lock(&glob->lru_lock);
>>> +	if (first_bo)
>>> +		*first_bo = NULL;
>>>   	for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
>>>   		list_for_each_entry(bo, &man->lru[i], lru) {
>>> -			if (!ttm_bo_evict_swapout_allowable(bo, ctx, &locked))
>>> +			bool busy = false;
>>> +
>>> +			if (!ttm_bo_evict_swapout_allowable(bo, ctx, locked,
>>> +							    &busy)) {
>>> +				if (first_bo && !(*first_bo) && busy) {
>>> +					ttm_bo_get(bo);
>>> +					*first_bo = bo;
>>> +				}
>>>   				continue;
>>> +			}
>>>   
>>>   			if (place && !bdev->driver->eviction_valuable(bo,
>>>   								      place)) {
>>> -				if (locked)
>>> +				if (*locked)
>>>   					reservation_object_unlock(bo->resv);
>>>   				continue;
>>>   			}
>>> +
>>>   			break;
>>>   		}
>>>   
>>> @@ -818,9 +831,69 @@ static int ttm_mem_evict_first(struct ttm_bo_device *bdev,
>>>   		bo = NULL;
>>>   	}
>>>   
>>> +	return bo;
>>> +}
>>> +
>>> +static int ttm_mem_evict_first(struct ttm_bo_device *bdev,
>>> +			       uint32_t mem_type,
>>> +			       const struct ttm_place *place,
>>> +			       struct ttm_operation_ctx *ctx,
>>> +			       struct reservation_object *request_resv)
>>> +{
>>> +	struct ttm_bo_global *glob = bdev->glob;
>>> +	struct ttm_mem_type_manager *man = &bdev->man[mem_type];
>>> +	struct ttm_buffer_object *bo = NULL, *first_bo = NULL;
>>> +	bool locked = false;
>>> +	int ret;
>>> +
>>> +	spin_lock(&glob->lru_lock);
>>> +	bo = ttm_mem_find_evitable_bo(bdev, man, place, ctx, &first_bo,
>>> +				      &locked);
>>>   	if (!bo) {
>>> +		struct ww_acquire_ctx *acquire_ctx = request_resv->lock.ctx;
>>> +		struct ttm_operation_ctx busy_ctx;
>>> +
>>>   		spin_unlock(&glob->lru_lock);
>>> -		return -EBUSY;
>>> +		/* check if other user occupy memory too long time */
>>> +		if (!first_bo || !request_resv || !request_resv->lock.ctx) {
>>> +			if (first_bo)
>>> +				ttm_bo_put(first_bo);
>>> +			return -EBUSY;
>>> +		}
>>> +		if (first_bo->resv == request_resv) {
>>> +			ttm_bo_put(first_bo);
>>> +			return -EBUSY;
>>> +		}
>>> +		if (ctx->interruptible)
>>> +			ret = ww_mutex_lock_interruptible(&first_bo->resv->lock,
>>> +							  acquire_ctx);
>>> +		else
>>> +			ret = ww_mutex_lock(&first_bo->resv->lock,
>>> +					    acquire_ctx);
>>> +		if (ret) {
>>> +			ttm_bo_put(first_bo);
>>> +			return ret;
>>> +		}
>>> +		spin_lock(&glob->lru_lock);
>>> +		/* previous busy resv lock is held by above, idle now,
>>> +		 * so let them evictable.
>>> +		 */
>>> +		busy_ctx.interruptible = ctx->interruptible;
>>> +		busy_ctx.no_wait_gpu   = ctx->no_wait_gpu;
>>> +		busy_ctx.resv	       = first_bo->resv;
>>> +		busy_ctx.flags	       = TTM_OPT_FLAG_ALLOW_RES_EVICT;
>>> +
>>> +		bo = ttm_mem_find_evitable_bo(bdev, man, place, &busy_ctx, NULL,
>>> +					      &locked);
>>> +		if (bo && (bo->resv == first_bo->resv))
>>> +			locked = true;
>>> +		else if (bo)
>>> +			ww_mutex_unlock(&first_bo->resv->lock);
>>> +		if (!bo) {
>>> +			spin_unlock(&glob->lru_lock);
>>> +			ttm_bo_put(first_bo);
>>> +			return -EBUSY;
>>> +		}
>>>   	}
>>>   
>>>   	kref_get(&bo->list_kref);
>>> @@ -829,11 +902,15 @@ static int ttm_mem_evict_first(struct ttm_bo_device *bdev,
>>>   		ret = ttm_bo_cleanup_refs(bo, ctx->interruptible,
>>>   					  ctx->no_wait_gpu, locked);
>>>   		kref_put(&bo->list_kref, ttm_bo_release_list);
>>> +		if (first_bo)
>>> +			ttm_bo_put(first_bo);
>>>   		return ret;
>>>   	}
>>>   
>>>   	ttm_bo_del_from_lru(bo);
>>>   	spin_unlock(&glob->lru_lock);
>>> +	if (first_bo)
>>> +		ttm_bo_put(first_bo);
>>>   
>>>   	ret = ttm_bo_evict(bo, ctx);
>>>   	if (locked) {
>>> @@ -907,7 +984,7 @@ static int ttm_bo_mem_force_space(struct ttm_buffer_object *bo,
>>>   			return ret;
>>>   		if (mem->mm_node)
>>>   			break;
>>> -		ret = ttm_mem_evict_first(bdev, mem_type, place, ctx);
>>> +		ret = ttm_mem_evict_first(bdev, mem_type, place, ctx, bo->resv);
>>>   		if (unlikely(ret != 0))
>>>   			return ret;
>>>   	} while (1);
>>> @@ -1401,7 +1478,8 @@ static int ttm_bo_force_list_clean(struct ttm_bo_device *bdev,
>>>   	for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
>>>   		while (!list_empty(&man->lru[i])) {
>>>   			spin_unlock(&glob->lru_lock);
>>> -			ret = ttm_mem_evict_first(bdev, mem_type, NULL, &ctx);
>>> +			ret = ttm_mem_evict_first(bdev, mem_type, NULL, &ctx,
>>> +						  NULL);
>>>   			if (ret)
>>>   				return ret;
>>>   			spin_lock(&glob->lru_lock);
>>> @@ -1772,7 +1850,8 @@ int ttm_bo_swapout(struct ttm_bo_global *glob, struct ttm_operation_ctx *ctx)
>>>   	spin_lock(&glob->lru_lock);
>>>   	for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
>>>   		list_for_each_entry(bo, &glob->swap_lru[i], swap) {
>>> -			if (ttm_bo_evict_swapout_allowable(bo, ctx, &locked)) {
>>> +			if (ttm_bo_evict_swapout_allowable(bo, ctx, &locked,
>>> +							   NULL)) {
>>>   				ret = 0;
>>>   				break;
>>>   			}
>>> -- 
>>> 2.17.1
>>>
>>> _______________________________________________
>>> dri-devel mailing list
>>> dri-devel@lists.freedesktop.org
>>> https://lists.freedesktop.org/mailman/listinfo/dri-devel
>> -- 
>> Daniel Vetter
>> Software Engineer, Intel Corporation
>> http://blog.ffwll.ch
Christian König May 15, 2019, 9:28 a.m. UTC | #4
Am 15.05.19 um 11:27 schrieb Christian König:
> Am 15.05.19 um 10:45 schrieb Daniel Vetter:
>> On Wed, May 15, 2019 at 10:38:28AM +0200, Daniel Vetter wrote:
>>> On Tue, May 14, 2019 at 02:31:18PM +0200, Christian König wrote:
>>>> From: Chunming Zhou <david1.zhou@amd.com>
>>>>
>>>> heavy gpu job could occupy memory long time, which lead other user 
>>>> fail to get memory.
>>>>
>>>> basically pick up Christian idea:
>>>>
>>>> 1. Reserve the BO in DC using a ww_mutex ticket (trivial).
>>>> 2. If we then run into this EBUSY condition in TTM check if the BO 
>>>> we need memory for (or rather the ww_mutex of its reservation 
>>>> object) has a ticket assigned.
>>>> 3. If we have a ticket we grab a reference to the first BO on the 
>>>> LRU, drop the LRU lock and try to grab the reservation lock with 
>>>> the ticket.
>>>> 4. If getting the reservation lock with the ticket succeeded we 
>>>> check if the BO is still the first one on the LRU in question (the 
>>>> BO could have moved).
>>>> 5. If the BO is still the first one on the LRU in question we try 
>>>> to evict it as we would evict any other BO.
>>>> 6. If any of the "If's" above fail we just back off and return -EBUSY.
>>>>
>>>> v2: fix some minor check
>>>> v3: address Christian v2 comments.
>>>> v4: fix some missing
>>>> v5: handle first_bo unlock and bo_get/put
>>>> v6: abstract unified iterate function, and handle all possible 
>>>> usecase not only pinned bo.
>>>> v7: pass request bo->resv to ttm_bo_evict_first
>>>> v8 (chk): minimal coding style fix
>>>>
>>>> Change-Id: I21423fb922f885465f13833c41df1e134364a8e7
>>>> Signed-off-by: Chunming Zhou <david1.zhou@amd.com>
>>>> Reviewed-by: Christian König <christian.koenig@amd.com>
>>> I think this closes a big gap between ttm and the bkl/struct_mutex
>>> drivers - it's much easier to guarantee you can evict everything if
>>> there's only a single lock :-)
>>>
>>> Would be absolutely awesome if we could extract this as some kind of
>>> building block, like we've done with lots of other ttm concepts already
>>> (reservation_obj, fences, ...).
>>>
>>> Just an aside really.
>> Ofc this is meant as a comment on the entire patch series, without 
>> all the
>> other patches to make sure BO always stay on a relevant LRU there's 
>> still
>> gaps in the guaranteed forward progress eviction algorithm.
>
> Yeah, the problem surfaced because of patch #4. Previously TTM would 
> have just ignored all errors and continued to try different placements 
> and only return -ENOMEM when we ran out of a possible placements.
>
> I probably need to either fix patch #4 or reorder the patches.

Ups, please ignore. I accidentally replied to the wrong mail.

Christian.

>
> Thanks for the note,
> Christian.
>
>> -Daniel
>>
>>> -Daniel
>>>
>>>> ---
>>>>   drivers/gpu/drm/ttm/ttm_bo.c | 113 
>>>> +++++++++++++++++++++++++++++------
>>>>   1 file changed, 96 insertions(+), 17 deletions(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c 
>>>> b/drivers/gpu/drm/ttm/ttm_bo.c
>>>> index 2845fceb2fbd..e634d3a36923 100644
>>>> --- a/drivers/gpu/drm/ttm/ttm_bo.c
>>>> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
>>>> @@ -766,11 +766,13 @@ EXPORT_SYMBOL(ttm_bo_eviction_valuable);
>>>>    * b. Otherwise, trylock it.
>>>>    */
>>>>   static bool ttm_bo_evict_swapout_allowable(struct 
>>>> ttm_buffer_object *bo,
>>>> -            struct ttm_operation_ctx *ctx, bool *locked)
>>>> +            struct ttm_operation_ctx *ctx, bool *locked, bool *busy)
>>>>   {
>>>>       bool ret = false;
>>>>         *locked = false;
>>>> +    if (busy)
>>>> +        *busy = false;
>>>>       if (bo->resv == ctx->resv) {
>>>>           reservation_object_assert_held(bo->resv);
>>>>           if (ctx->flags & TTM_OPT_FLAG_ALLOW_RES_EVICT
>>>> @@ -779,35 +781,46 @@ static bool 
>>>> ttm_bo_evict_swapout_allowable(struct ttm_buffer_object *bo,
>>>>       } else {
>>>>           *locked = reservation_object_trylock(bo->resv);
>>>>           ret = *locked;
>>>> +        if (!ret && busy)
>>>> +            *busy = true;
>>>>       }
>>>>         return ret;
>>>>   }
>>>>   -static int ttm_mem_evict_first(struct ttm_bo_device *bdev,
>>>> -                   uint32_t mem_type,
>>>> -                   const struct ttm_place *place,
>>>> -                   struct ttm_operation_ctx *ctx)
>>>> +static struct ttm_buffer_object*
>>>> +ttm_mem_find_evitable_bo(struct ttm_bo_device *bdev,
>>>> +             struct ttm_mem_type_manager *man,
>>>> +             const struct ttm_place *place,
>>>> +             struct ttm_operation_ctx *ctx,
>>>> +             struct ttm_buffer_object **first_bo,
>>>> +             bool *locked)
>>>>   {
>>>> -    struct ttm_bo_global *glob = bdev->glob;
>>>> -    struct ttm_mem_type_manager *man = &bdev->man[mem_type];
>>>>       struct ttm_buffer_object *bo = NULL;
>>>> -    bool locked = false;
>>>> -    unsigned i;
>>>> -    int ret;
>>>> +    int i;
>>>>   -    spin_lock(&glob->lru_lock);
>>>> +    if (first_bo)
>>>> +        *first_bo = NULL;
>>>>       for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
>>>>           list_for_each_entry(bo, &man->lru[i], lru) {
>>>> -            if (!ttm_bo_evict_swapout_allowable(bo, ctx, &locked))
>>>> +            bool busy = false;
>>>> +
>>>> +            if (!ttm_bo_evict_swapout_allowable(bo, ctx, locked,
>>>> +                                &busy)) {
>>>> +                if (first_bo && !(*first_bo) && busy) {
>>>> +                    ttm_bo_get(bo);
>>>> +                    *first_bo = bo;
>>>> +                }
>>>>                   continue;
>>>> +            }
>>>>                 if (place && !bdev->driver->eviction_valuable(bo,
>>>>                                         place)) {
>>>> -                if (locked)
>>>> +                if (*locked)
>>>> reservation_object_unlock(bo->resv);
>>>>                   continue;
>>>>               }
>>>> +
>>>>               break;
>>>>           }
>>>>   @@ -818,9 +831,69 @@ static int ttm_mem_evict_first(struct 
>>>> ttm_bo_device *bdev,
>>>>           bo = NULL;
>>>>       }
>>>>   +    return bo;
>>>> +}
>>>> +
>>>> +static int ttm_mem_evict_first(struct ttm_bo_device *bdev,
>>>> +                   uint32_t mem_type,
>>>> +                   const struct ttm_place *place,
>>>> +                   struct ttm_operation_ctx *ctx,
>>>> +                   struct reservation_object *request_resv)
>>>> +{
>>>> +    struct ttm_bo_global *glob = bdev->glob;
>>>> +    struct ttm_mem_type_manager *man = &bdev->man[mem_type];
>>>> +    struct ttm_buffer_object *bo = NULL, *first_bo = NULL;
>>>> +    bool locked = false;
>>>> +    int ret;
>>>> +
>>>> +    spin_lock(&glob->lru_lock);
>>>> +    bo = ttm_mem_find_evitable_bo(bdev, man, place, ctx, &first_bo,
>>>> +                      &locked);
>>>>       if (!bo) {
>>>> +        struct ww_acquire_ctx *acquire_ctx = request_resv->lock.ctx;
>>>> +        struct ttm_operation_ctx busy_ctx;
>>>> +
>>>>           spin_unlock(&glob->lru_lock);
>>>> -        return -EBUSY;
>>>> +        /* check if other user occupy memory too long time */
>>>> +        if (!first_bo || !request_resv || !request_resv->lock.ctx) {
>>>> +            if (first_bo)
>>>> +                ttm_bo_put(first_bo);
>>>> +            return -EBUSY;
>>>> +        }
>>>> +        if (first_bo->resv == request_resv) {
>>>> +            ttm_bo_put(first_bo);
>>>> +            return -EBUSY;
>>>> +        }
>>>> +        if (ctx->interruptible)
>>>> +            ret = ww_mutex_lock_interruptible(&first_bo->resv->lock,
>>>> +                              acquire_ctx);
>>>> +        else
>>>> +            ret = ww_mutex_lock(&first_bo->resv->lock,
>>>> +                        acquire_ctx);
>>>> +        if (ret) {
>>>> +            ttm_bo_put(first_bo);
>>>> +            return ret;
>>>> +        }
>>>> +        spin_lock(&glob->lru_lock);
>>>> +        /* previous busy resv lock is held by above, idle now,
>>>> +         * so let them evictable.
>>>> +         */
>>>> +        busy_ctx.interruptible = ctx->interruptible;
>>>> +        busy_ctx.no_wait_gpu   = ctx->no_wait_gpu;
>>>> +        busy_ctx.resv           = first_bo->resv;
>>>> +        busy_ctx.flags           = TTM_OPT_FLAG_ALLOW_RES_EVICT;
>>>> +
>>>> +        bo = ttm_mem_find_evitable_bo(bdev, man, place, &busy_ctx, 
>>>> NULL,
>>>> +                          &locked);
>>>> +        if (bo && (bo->resv == first_bo->resv))
>>>> +            locked = true;
>>>> +        else if (bo)
>>>> + ww_mutex_unlock(&first_bo->resv->lock);
>>>> +        if (!bo) {
>>>> +            spin_unlock(&glob->lru_lock);
>>>> +            ttm_bo_put(first_bo);
>>>> +            return -EBUSY;
>>>> +        }
>>>>       }
>>>>         kref_get(&bo->list_kref);
>>>> @@ -829,11 +902,15 @@ static int ttm_mem_evict_first(struct 
>>>> ttm_bo_device *bdev,
>>>>           ret = ttm_bo_cleanup_refs(bo, ctx->interruptible,
>>>>                         ctx->no_wait_gpu, locked);
>>>>           kref_put(&bo->list_kref, ttm_bo_release_list);
>>>> +        if (first_bo)
>>>> +            ttm_bo_put(first_bo);
>>>>           return ret;
>>>>       }
>>>>         ttm_bo_del_from_lru(bo);
>>>>       spin_unlock(&glob->lru_lock);
>>>> +    if (first_bo)
>>>> +        ttm_bo_put(first_bo);
>>>>         ret = ttm_bo_evict(bo, ctx);
>>>>       if (locked) {
>>>> @@ -907,7 +984,7 @@ static int ttm_bo_mem_force_space(struct 
>>>> ttm_buffer_object *bo,
>>>>               return ret;
>>>>           if (mem->mm_node)
>>>>               break;
>>>> -        ret = ttm_mem_evict_first(bdev, mem_type, place, ctx);
>>>> +        ret = ttm_mem_evict_first(bdev, mem_type, place, ctx, 
>>>> bo->resv);
>>>>           if (unlikely(ret != 0))
>>>>               return ret;
>>>>       } while (1);
>>>> @@ -1401,7 +1478,8 @@ static int ttm_bo_force_list_clean(struct 
>>>> ttm_bo_device *bdev,
>>>>       for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
>>>>           while (!list_empty(&man->lru[i])) {
>>>>               spin_unlock(&glob->lru_lock);
>>>> -            ret = ttm_mem_evict_first(bdev, mem_type, NULL, &ctx);
>>>> +            ret = ttm_mem_evict_first(bdev, mem_type, NULL, &ctx,
>>>> +                          NULL);
>>>>               if (ret)
>>>>                   return ret;
>>>>               spin_lock(&glob->lru_lock);
>>>> @@ -1772,7 +1850,8 @@ int ttm_bo_swapout(struct ttm_bo_global 
>>>> *glob, struct ttm_operation_ctx *ctx)
>>>>       spin_lock(&glob->lru_lock);
>>>>       for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
>>>>           list_for_each_entry(bo, &glob->swap_lru[i], swap) {
>>>> -            if (ttm_bo_evict_swapout_allowable(bo, ctx, &locked)) {
>>>> +            if (ttm_bo_evict_swapout_allowable(bo, ctx, &locked,
>>>> +                               NULL)) {
>>>>                   ret = 0;
>>>>                   break;
>>>>               }
>>>> -- 
>>>> 2.17.1
>>>>
>>>> _______________________________________________
>>>> dri-devel mailing list
>>>> dri-devel@lists.freedesktop.org
>>>> https://lists.freedesktop.org/mailman/listinfo/dri-devel
>>> -- 
>>> Daniel Vetter
>>> Software Engineer, Intel Corporation
>>> http://blog.ffwll.ch
>
Christian König May 15, 2019, 9:30 a.m. UTC | #5
Am 15.05.19 um 10:45 schrieb Daniel Vetter:
> On Wed, May 15, 2019 at 10:38:28AM +0200, Daniel Vetter wrote:
>> On Tue, May 14, 2019 at 02:31:18PM +0200, Christian König wrote:
>>> From: Chunming Zhou <david1.zhou@amd.com>
>>>
>>> heavy gpu job could occupy memory long time, which lead other user fail to get memory.
>>>
>>> basically pick up Christian idea:
>>>
>>> 1. Reserve the BO in DC using a ww_mutex ticket (trivial).
>>> 2. If we then run into this EBUSY condition in TTM check if the BO we need memory for (or rather the ww_mutex of its reservation object) has a ticket assigned.
>>> 3. If we have a ticket we grab a reference to the first BO on the LRU, drop the LRU lock and try to grab the reservation lock with the ticket.
>>> 4. If getting the reservation lock with the ticket succeeded we check if the BO is still the first one on the LRU in question (the BO could have moved).
>>> 5. If the BO is still the first one on the LRU in question we try to evict it as we would evict any other BO.
>>> 6. If any of the "If's" above fail we just back off and return -EBUSY.
>>>
>>> v2: fix some minor check
>>> v3: address Christian v2 comments.
>>> v4: fix some missing
>>> v5: handle first_bo unlock and bo_get/put
>>> v6: abstract unified iterate function, and handle all possible usecase not only pinned bo.
>>> v7: pass request bo->resv to ttm_bo_evict_first
>>> v8 (chk): minimal coding style fix
>>>
>>> Change-Id: I21423fb922f885465f13833c41df1e134364a8e7
>>> Signed-off-by: Chunming Zhou <david1.zhou@amd.com>
>>> Reviewed-by: Christian König <christian.koenig@amd.com>
>> I think this closes a big gap between ttm and the bkl/struct_mutex
>> drivers - it's much easier to guarantee you can evict everything if
>> there's only a single lock :-)
>>
>> Would be absolutely awesome if we could extract this as some kind of
>> building block, like we've done with lots of other ttm concepts already
>> (reservation_obj, fences, ...).
>>
>> Just an aside really.
> Ofc this is meant as a comment on the entire patch series, without all the
> other patches to make sure BO always stay on a relevant LRU there's still
> gaps in the guaranteed forward progress eviction algorithm.

Yeah, and especially that Marek ran into a bad in kernel deadlock is a 
serious no-go for the moment.

Need to figure out what exactly is going wrong here first, but in 
general I completely agree that we should move this logic out of TTM.

Christian.

> -Daniel
>
>> -Daniel
>>
>>> ---
>>>   drivers/gpu/drm/ttm/ttm_bo.c | 113 +++++++++++++++++++++++++++++------
>>>   1 file changed, 96 insertions(+), 17 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
>>> index 2845fceb2fbd..e634d3a36923 100644
>>> --- a/drivers/gpu/drm/ttm/ttm_bo.c
>>> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
>>> @@ -766,11 +766,13 @@ EXPORT_SYMBOL(ttm_bo_eviction_valuable);
>>>    * b. Otherwise, trylock it.
>>>    */
>>>   static bool ttm_bo_evict_swapout_allowable(struct ttm_buffer_object *bo,
>>> -			struct ttm_operation_ctx *ctx, bool *locked)
>>> +			struct ttm_operation_ctx *ctx, bool *locked, bool *busy)
>>>   {
>>>   	bool ret = false;
>>>   
>>>   	*locked = false;
>>> +	if (busy)
>>> +		*busy = false;
>>>   	if (bo->resv == ctx->resv) {
>>>   		reservation_object_assert_held(bo->resv);
>>>   		if (ctx->flags & TTM_OPT_FLAG_ALLOW_RES_EVICT
>>> @@ -779,35 +781,46 @@ static bool ttm_bo_evict_swapout_allowable(struct ttm_buffer_object *bo,
>>>   	} else {
>>>   		*locked = reservation_object_trylock(bo->resv);
>>>   		ret = *locked;
>>> +		if (!ret && busy)
>>> +			*busy = true;
>>>   	}
>>>   
>>>   	return ret;
>>>   }
>>>   
>>> -static int ttm_mem_evict_first(struct ttm_bo_device *bdev,
>>> -			       uint32_t mem_type,
>>> -			       const struct ttm_place *place,
>>> -			       struct ttm_operation_ctx *ctx)
>>> +static struct ttm_buffer_object*
>>> +ttm_mem_find_evitable_bo(struct ttm_bo_device *bdev,
>>> +			 struct ttm_mem_type_manager *man,
>>> +			 const struct ttm_place *place,
>>> +			 struct ttm_operation_ctx *ctx,
>>> +			 struct ttm_buffer_object **first_bo,
>>> +			 bool *locked)
>>>   {
>>> -	struct ttm_bo_global *glob = bdev->glob;
>>> -	struct ttm_mem_type_manager *man = &bdev->man[mem_type];
>>>   	struct ttm_buffer_object *bo = NULL;
>>> -	bool locked = false;
>>> -	unsigned i;
>>> -	int ret;
>>> +	int i;
>>>   
>>> -	spin_lock(&glob->lru_lock);
>>> +	if (first_bo)
>>> +		*first_bo = NULL;
>>>   	for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
>>>   		list_for_each_entry(bo, &man->lru[i], lru) {
>>> -			if (!ttm_bo_evict_swapout_allowable(bo, ctx, &locked))
>>> +			bool busy = false;
>>> +
>>> +			if (!ttm_bo_evict_swapout_allowable(bo, ctx, locked,
>>> +							    &busy)) {
>>> +				if (first_bo && !(*first_bo) && busy) {
>>> +					ttm_bo_get(bo);
>>> +					*first_bo = bo;
>>> +				}
>>>   				continue;
>>> +			}
>>>   
>>>   			if (place && !bdev->driver->eviction_valuable(bo,
>>>   								      place)) {
>>> -				if (locked)
>>> +				if (*locked)
>>>   					reservation_object_unlock(bo->resv);
>>>   				continue;
>>>   			}
>>> +
>>>   			break;
>>>   		}
>>>   
>>> @@ -818,9 +831,69 @@ static int ttm_mem_evict_first(struct ttm_bo_device *bdev,
>>>   		bo = NULL;
>>>   	}
>>>   
>>> +	return bo;
>>> +}
>>> +
>>> +static int ttm_mem_evict_first(struct ttm_bo_device *bdev,
>>> +			       uint32_t mem_type,
>>> +			       const struct ttm_place *place,
>>> +			       struct ttm_operation_ctx *ctx,
>>> +			       struct reservation_object *request_resv)
>>> +{
>>> +	struct ttm_bo_global *glob = bdev->glob;
>>> +	struct ttm_mem_type_manager *man = &bdev->man[mem_type];
>>> +	struct ttm_buffer_object *bo = NULL, *first_bo = NULL;
>>> +	bool locked = false;
>>> +	int ret;
>>> +
>>> +	spin_lock(&glob->lru_lock);
>>> +	bo = ttm_mem_find_evitable_bo(bdev, man, place, ctx, &first_bo,
>>> +				      &locked);
>>>   	if (!bo) {
>>> +		struct ww_acquire_ctx *acquire_ctx = request_resv->lock.ctx;
>>> +		struct ttm_operation_ctx busy_ctx;
>>> +
>>>   		spin_unlock(&glob->lru_lock);
>>> -		return -EBUSY;
>>> +		/* check if other user occupy memory too long time */
>>> +		if (!first_bo || !request_resv || !request_resv->lock.ctx) {
>>> +			if (first_bo)
>>> +				ttm_bo_put(first_bo);
>>> +			return -EBUSY;
>>> +		}
>>> +		if (first_bo->resv == request_resv) {
>>> +			ttm_bo_put(first_bo);
>>> +			return -EBUSY;
>>> +		}
>>> +		if (ctx->interruptible)
>>> +			ret = ww_mutex_lock_interruptible(&first_bo->resv->lock,
>>> +							  acquire_ctx);
>>> +		else
>>> +			ret = ww_mutex_lock(&first_bo->resv->lock,
>>> +					    acquire_ctx);
>>> +		if (ret) {
>>> +			ttm_bo_put(first_bo);
>>> +			return ret;
>>> +		}
>>> +		spin_lock(&glob->lru_lock);
>>> +		/* previous busy resv lock is held by above, idle now,
>>> +		 * so let them evictable.
>>> +		 */
>>> +		busy_ctx.interruptible = ctx->interruptible;
>>> +		busy_ctx.no_wait_gpu   = ctx->no_wait_gpu;
>>> +		busy_ctx.resv	       = first_bo->resv;
>>> +		busy_ctx.flags	       = TTM_OPT_FLAG_ALLOW_RES_EVICT;
>>> +
>>> +		bo = ttm_mem_find_evitable_bo(bdev, man, place, &busy_ctx, NULL,
>>> +					      &locked);
>>> +		if (bo && (bo->resv == first_bo->resv))
>>> +			locked = true;
>>> +		else if (bo)
>>> +			ww_mutex_unlock(&first_bo->resv->lock);
>>> +		if (!bo) {
>>> +			spin_unlock(&glob->lru_lock);
>>> +			ttm_bo_put(first_bo);
>>> +			return -EBUSY;
>>> +		}
>>>   	}
>>>   
>>>   	kref_get(&bo->list_kref);
>>> @@ -829,11 +902,15 @@ static int ttm_mem_evict_first(struct ttm_bo_device *bdev,
>>>   		ret = ttm_bo_cleanup_refs(bo, ctx->interruptible,
>>>   					  ctx->no_wait_gpu, locked);
>>>   		kref_put(&bo->list_kref, ttm_bo_release_list);
>>> +		if (first_bo)
>>> +			ttm_bo_put(first_bo);
>>>   		return ret;
>>>   	}
>>>   
>>>   	ttm_bo_del_from_lru(bo);
>>>   	spin_unlock(&glob->lru_lock);
>>> +	if (first_bo)
>>> +		ttm_bo_put(first_bo);
>>>   
>>>   	ret = ttm_bo_evict(bo, ctx);
>>>   	if (locked) {
>>> @@ -907,7 +984,7 @@ static int ttm_bo_mem_force_space(struct ttm_buffer_object *bo,
>>>   			return ret;
>>>   		if (mem->mm_node)
>>>   			break;
>>> -		ret = ttm_mem_evict_first(bdev, mem_type, place, ctx);
>>> +		ret = ttm_mem_evict_first(bdev, mem_type, place, ctx, bo->resv);
>>>   		if (unlikely(ret != 0))
>>>   			return ret;
>>>   	} while (1);
>>> @@ -1401,7 +1478,8 @@ static int ttm_bo_force_list_clean(struct ttm_bo_device *bdev,
>>>   	for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
>>>   		while (!list_empty(&man->lru[i])) {
>>>   			spin_unlock(&glob->lru_lock);
>>> -			ret = ttm_mem_evict_first(bdev, mem_type, NULL, &ctx);
>>> +			ret = ttm_mem_evict_first(bdev, mem_type, NULL, &ctx,
>>> +						  NULL);
>>>   			if (ret)
>>>   				return ret;
>>>   			spin_lock(&glob->lru_lock);
>>> @@ -1772,7 +1850,8 @@ int ttm_bo_swapout(struct ttm_bo_global *glob, struct ttm_operation_ctx *ctx)
>>>   	spin_lock(&glob->lru_lock);
>>>   	for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
>>>   		list_for_each_entry(bo, &glob->swap_lru[i], swap) {
>>> -			if (ttm_bo_evict_swapout_allowable(bo, ctx, &locked)) {
>>> +			if (ttm_bo_evict_swapout_allowable(bo, ctx, &locked,
>>> +							   NULL)) {
>>>   				ret = 0;
>>>   				break;
>>>   			}
>>> -- 
>>> 2.17.1
>>>
>>> _______________________________________________
>>> dri-devel mailing list
>>> dri-devel@lists.freedesktop.org
>>> https://lists.freedesktop.org/mailman/listinfo/dri-devel
>> -- 
>> Daniel Vetter
>> Software Engineer, Intel Corporation
>> http://blog.ffwll.ch
diff mbox series

Patch

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index 2845fceb2fbd..e634d3a36923 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -766,11 +766,13 @@  EXPORT_SYMBOL(ttm_bo_eviction_valuable);
  * b. Otherwise, trylock it.
  */
 static bool ttm_bo_evict_swapout_allowable(struct ttm_buffer_object *bo,
-			struct ttm_operation_ctx *ctx, bool *locked)
+			struct ttm_operation_ctx *ctx, bool *locked, bool *busy)
 {
 	bool ret = false;
 
 	*locked = false;
+	if (busy)
+		*busy = false;
 	if (bo->resv == ctx->resv) {
 		reservation_object_assert_held(bo->resv);
 		if (ctx->flags & TTM_OPT_FLAG_ALLOW_RES_EVICT
@@ -779,35 +781,46 @@  static bool ttm_bo_evict_swapout_allowable(struct ttm_buffer_object *bo,
 	} else {
 		*locked = reservation_object_trylock(bo->resv);
 		ret = *locked;
+		if (!ret && busy)
+			*busy = true;
 	}
 
 	return ret;
 }
 
-static int ttm_mem_evict_first(struct ttm_bo_device *bdev,
-			       uint32_t mem_type,
-			       const struct ttm_place *place,
-			       struct ttm_operation_ctx *ctx)
+static struct ttm_buffer_object*
+ttm_mem_find_evitable_bo(struct ttm_bo_device *bdev,
+			 struct ttm_mem_type_manager *man,
+			 const struct ttm_place *place,
+			 struct ttm_operation_ctx *ctx,
+			 struct ttm_buffer_object **first_bo,
+			 bool *locked)
 {
-	struct ttm_bo_global *glob = bdev->glob;
-	struct ttm_mem_type_manager *man = &bdev->man[mem_type];
 	struct ttm_buffer_object *bo = NULL;
-	bool locked = false;
-	unsigned i;
-	int ret;
+	int i;
 
-	spin_lock(&glob->lru_lock);
+	if (first_bo)
+		*first_bo = NULL;
 	for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
 		list_for_each_entry(bo, &man->lru[i], lru) {
-			if (!ttm_bo_evict_swapout_allowable(bo, ctx, &locked))
+			bool busy = false;
+
+			if (!ttm_bo_evict_swapout_allowable(bo, ctx, locked,
+							    &busy)) {
+				if (first_bo && !(*first_bo) && busy) {
+					ttm_bo_get(bo);
+					*first_bo = bo;
+				}
 				continue;
+			}
 
 			if (place && !bdev->driver->eviction_valuable(bo,
 								      place)) {
-				if (locked)
+				if (*locked)
 					reservation_object_unlock(bo->resv);
 				continue;
 			}
+
 			break;
 		}
 
@@ -818,9 +831,69 @@  static int ttm_mem_evict_first(struct ttm_bo_device *bdev,
 		bo = NULL;
 	}
 
+	return bo;
+}
+
+static int ttm_mem_evict_first(struct ttm_bo_device *bdev,
+			       uint32_t mem_type,
+			       const struct ttm_place *place,
+			       struct ttm_operation_ctx *ctx,
+			       struct reservation_object *request_resv)
+{
+	struct ttm_bo_global *glob = bdev->glob;
+	struct ttm_mem_type_manager *man = &bdev->man[mem_type];
+	struct ttm_buffer_object *bo = NULL, *first_bo = NULL;
+	bool locked = false;
+	int ret;
+
+	spin_lock(&glob->lru_lock);
+	bo = ttm_mem_find_evitable_bo(bdev, man, place, ctx, &first_bo,
+				      &locked);
 	if (!bo) {
+		struct ww_acquire_ctx *acquire_ctx = request_resv->lock.ctx;
+		struct ttm_operation_ctx busy_ctx;
+
 		spin_unlock(&glob->lru_lock);
-		return -EBUSY;
+		/* check if other user occupy memory too long time */
+		if (!first_bo || !request_resv || !request_resv->lock.ctx) {
+			if (first_bo)
+				ttm_bo_put(first_bo);
+			return -EBUSY;
+		}
+		if (first_bo->resv == request_resv) {
+			ttm_bo_put(first_bo);
+			return -EBUSY;
+		}
+		if (ctx->interruptible)
+			ret = ww_mutex_lock_interruptible(&first_bo->resv->lock,
+							  acquire_ctx);
+		else
+			ret = ww_mutex_lock(&first_bo->resv->lock,
+					    acquire_ctx);
+		if (ret) {
+			ttm_bo_put(first_bo);
+			return ret;
+		}
+		spin_lock(&glob->lru_lock);
+		/* previous busy resv lock is held by above, idle now,
+		 * so let them evictable.
+		 */
+		busy_ctx.interruptible = ctx->interruptible;
+		busy_ctx.no_wait_gpu   = ctx->no_wait_gpu;
+		busy_ctx.resv	       = first_bo->resv;
+		busy_ctx.flags	       = TTM_OPT_FLAG_ALLOW_RES_EVICT;
+
+		bo = ttm_mem_find_evitable_bo(bdev, man, place, &busy_ctx, NULL,
+					      &locked);
+		if (bo && (bo->resv == first_bo->resv))
+			locked = true;
+		else if (bo)
+			ww_mutex_unlock(&first_bo->resv->lock);
+		if (!bo) {
+			spin_unlock(&glob->lru_lock);
+			ttm_bo_put(first_bo);
+			return -EBUSY;
+		}
 	}
 
 	kref_get(&bo->list_kref);
@@ -829,11 +902,15 @@  static int ttm_mem_evict_first(struct ttm_bo_device *bdev,
 		ret = ttm_bo_cleanup_refs(bo, ctx->interruptible,
 					  ctx->no_wait_gpu, locked);
 		kref_put(&bo->list_kref, ttm_bo_release_list);
+		if (first_bo)
+			ttm_bo_put(first_bo);
 		return ret;
 	}
 
 	ttm_bo_del_from_lru(bo);
 	spin_unlock(&glob->lru_lock);
+	if (first_bo)
+		ttm_bo_put(first_bo);
 
 	ret = ttm_bo_evict(bo, ctx);
 	if (locked) {
@@ -907,7 +984,7 @@  static int ttm_bo_mem_force_space(struct ttm_buffer_object *bo,
 			return ret;
 		if (mem->mm_node)
 			break;
-		ret = ttm_mem_evict_first(bdev, mem_type, place, ctx);
+		ret = ttm_mem_evict_first(bdev, mem_type, place, ctx, bo->resv);
 		if (unlikely(ret != 0))
 			return ret;
 	} while (1);
@@ -1401,7 +1478,8 @@  static int ttm_bo_force_list_clean(struct ttm_bo_device *bdev,
 	for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
 		while (!list_empty(&man->lru[i])) {
 			spin_unlock(&glob->lru_lock);
-			ret = ttm_mem_evict_first(bdev, mem_type, NULL, &ctx);
+			ret = ttm_mem_evict_first(bdev, mem_type, NULL, &ctx,
+						  NULL);
 			if (ret)
 				return ret;
 			spin_lock(&glob->lru_lock);
@@ -1772,7 +1850,8 @@  int ttm_bo_swapout(struct ttm_bo_global *glob, struct ttm_operation_ctx *ctx)
 	spin_lock(&glob->lru_lock);
 	for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
 		list_for_each_entry(bo, &glob->swap_lru[i], swap) {
-			if (ttm_bo_evict_swapout_allowable(bo, ctx, &locked)) {
+			if (ttm_bo_evict_swapout_allowable(bo, ctx, &locked,
+							   NULL)) {
 				ret = 0;
 				break;
 			}