diff mbox

[2/2] drm/ttm: optimize ttm_mem_evict_first v4

Message ID 20171113095409.1904-2-christian.koenig@amd.com (mailing list archive)
State New, archived
Headers show

Commit Message

Christian König Nov. 13, 2017, 9:54 a.m. UTC
Deleted BOs with the same reservation object can be reaped even if they
can't be reserved.

v2: rebase and we still need to remove/add the BO from/to the LRU.
v3: fix remove/add one more time, cleanup the logic a bit
v4: we should still check if the eviction is valuable

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/ttm/ttm_bo.c | 54 +++++++++++++++++++++++++++-----------------
 1 file changed, 33 insertions(+), 21 deletions(-)

Comments

Chunming Zhou Nov. 14, 2017, 2:40 a.m. UTC | #1
On 2017年11月13日 17:54, Christian König wrote:
> Deleted BOs with the same reservation object can be reaped even if they
> can't be reserved.
>
> v2: rebase and we still need to remove/add the BO from/to the LRU.
> v3: fix remove/add one more time, cleanup the logic a bit
> v4: we should still check if the eviction is valuable
>
> Signed-off-by: Christian König <christian.koenig@amd.com>
> ---
>   drivers/gpu/drm/ttm/ttm_bo.c | 54 +++++++++++++++++++++++++++-----------------
>   1 file changed, 33 insertions(+), 21 deletions(-)
>
> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
> index 691646c0f8d3..7b1525d39ea8 100644
> --- a/drivers/gpu/drm/ttm/ttm_bo.c
> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
> @@ -738,47 +738,57 @@ bool ttm_bo_eviction_valuable(struct ttm_buffer_object *bo,
>   EXPORT_SYMBOL(ttm_bo_eviction_valuable);
>   
>   static int ttm_mem_evict_first(struct ttm_bo_device *bdev,
> -				uint32_t mem_type,
> -				const struct ttm_place *place,
> -				bool interruptible,
> -				bool no_wait_gpu)
> +			       struct reservation_object *resv,
> +			       uint32_t mem_type,
> +			       const struct ttm_place *place,
> +			       bool interruptible,
> +			       bool no_wait_gpu)
>   {
>   	struct ttm_bo_global *glob = bdev->glob;
>   	struct ttm_mem_type_manager *man = &bdev->man[mem_type];
> -	struct ttm_buffer_object *bo;
> -	int ret = -EBUSY;
> +	struct ttm_buffer_object *bo = NULL;
> +	bool locked = false;
>   	unsigned i;
> +	int ret;
>   
>   	spin_lock(&glob->lru_lock);
>   	for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
>   		list_for_each_entry(bo, &man->lru[i], lru) {
> -			ret = reservation_object_trylock(bo->resv) ? 0 : -EBUSY;
> -			if (ret)
> -				continue;
> +			if (bo->resv == resv) {
> +				if (list_empty(&bo->ddestroy))
> +					continue;
Do you have further patches for enabling eviction and swapout for 
allocation?

Regards,
David Zhou
> +
> +			} else {
> +				locked = reservation_object_trylock(bo->resv);
> +				if (!locked)
> +					continue;
> +			}
>   
>   			if (place && !bdev->driver->eviction_valuable(bo,
>   								      place)) {
> -				reservation_object_unlock(bo->resv);
> -				ret = -EBUSY;
> +				if (locked)
> +					reservation_object_unlock(bo->resv);
>   				continue;
>   			}
> -
>   			break;
>   		}
>   
> -		if (!ret)
> +		if (&bo->lru != &man->lru[i])
>   			break;
> +		else
> +			bo = NULL;
>   	}
>   
> -	if (ret) {
> +	if (!bo) {
>   		spin_unlock(&glob->lru_lock);
> -		return ret;
> +		return -EBUSY;
>   	}
>   
>   	kref_get(&bo->list_kref);
>   
>   	if (!list_empty(&bo->ddestroy)) {
> -		ret = ttm_bo_cleanup_refs(bo, interruptible, no_wait_gpu, true);
> +		ret = ttm_bo_cleanup_refs(bo, interruptible, no_wait_gpu,
> +					  locked);
>   		kref_put(&bo->list_kref, ttm_bo_release_list);
>   		return ret;
>   	}
> @@ -786,10 +796,11 @@ static int ttm_mem_evict_first(struct ttm_bo_device *bdev,
>   	ttm_bo_del_from_lru(bo);
>   	spin_unlock(&glob->lru_lock);
>   
> -	BUG_ON(ret != 0);
> -
>   	ret = ttm_bo_evict(bo, interruptible, no_wait_gpu);
> -	ttm_bo_unreserve(bo);
> +	if (locked)
> +		ttm_bo_unreserve(bo);
> +	else
> +		ttm_bo_add_to_lru(bo);
>   
>   	kref_put(&bo->list_kref, ttm_bo_release_list);
>   	return ret;
> @@ -853,7 +864,7 @@ static int ttm_bo_mem_force_space(struct ttm_buffer_object *bo,
>   			return ret;
>   		if (mem->mm_node)
>   			break;
> -		ret = ttm_mem_evict_first(bdev, mem_type, place,
> +		ret = ttm_mem_evict_first(bdev, bo->resv, mem_type, place,
>   					  interruptible, no_wait_gpu);
>   		if (unlikely(ret != 0))
>   			return ret;
> @@ -1356,7 +1367,8 @@ static int ttm_bo_force_list_clean(struct ttm_bo_device *bdev,
>   	for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
>   		while (!list_empty(&man->lru[i])) {
>   			spin_unlock(&glob->lru_lock);
> -			ret = ttm_mem_evict_first(bdev, mem_type, NULL, false, false);
> +			ret = ttm_mem_evict_first(bdev, NULL, mem_type, NULL,
> +						  false, false);
>   			if (ret)
>   				return ret;
>   			spin_lock(&glob->lru_lock);
Christian König Nov. 14, 2017, 9:01 a.m. UTC | #2
Am 14.11.2017 um 03:40 schrieb Chunming Zhou:
>
>
> On 2017年11月13日 17:54, Christian König wrote:
>> Deleted BOs with the same reservation object can be reaped even if they
>> can't be reserved.
>>
>> v2: rebase and we still need to remove/add the BO from/to the LRU.
>> v3: fix remove/add one more time, cleanup the logic a bit
>> v4: we should still check if the eviction is valuable
>>
>> Signed-off-by: Christian König <christian.koenig@amd.com>
>> ---
>>   drivers/gpu/drm/ttm/ttm_bo.c | 54 
>> +++++++++++++++++++++++++++-----------------
>>   1 file changed, 33 insertions(+), 21 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
>> index 691646c0f8d3..7b1525d39ea8 100644
>> --- a/drivers/gpu/drm/ttm/ttm_bo.c
>> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
>> @@ -738,47 +738,57 @@ bool ttm_bo_eviction_valuable(struct 
>> ttm_buffer_object *bo,
>>   EXPORT_SYMBOL(ttm_bo_eviction_valuable);
>>     static int ttm_mem_evict_first(struct ttm_bo_device *bdev,
>> -                uint32_t mem_type,
>> -                const struct ttm_place *place,
>> -                bool interruptible,
>> -                bool no_wait_gpu)
>> +                   struct reservation_object *resv,
>> +                   uint32_t mem_type,
>> +                   const struct ttm_place *place,
>> +                   bool interruptible,
>> +                   bool no_wait_gpu)
>>   {
>>       struct ttm_bo_global *glob = bdev->glob;
>>       struct ttm_mem_type_manager *man = &bdev->man[mem_type];
>> -    struct ttm_buffer_object *bo;
>> -    int ret = -EBUSY;
>> +    struct ttm_buffer_object *bo = NULL;
>> +    bool locked = false;
>>       unsigned i;
>> +    int ret;
>>         spin_lock(&glob->lru_lock);
>>       for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
>>           list_for_each_entry(bo, &man->lru[i], lru) {
>> -            ret = reservation_object_trylock(bo->resv) ? 0 : -EBUSY;
>> -            if (ret)
>> -                continue;
>> +            if (bo->resv == resv) {
>> +                if (list_empty(&bo->ddestroy))
>> +                    continue;
> Do you have further patches for enabling eviction and swapout for 
> allocation?

Yeah, going to work on this.

Basically we need to add a context here so that TTM knows what to do. I 
had patches for this already done, but never pushed them because time 
constrains.

But that will probably a few weeks before I have that ready.

Regards,
Christian.

>
> Regards,
> David Zhou
>> +
>> +            } else {
>> +                locked = reservation_object_trylock(bo->resv);
>> +                if (!locked)
>> +                    continue;
>> +            }
>>                 if (place && !bdev->driver->eviction_valuable(bo,
>>                                         place)) {
>> -                reservation_object_unlock(bo->resv);
>> -                ret = -EBUSY;
>> +                if (locked)
>> +                    reservation_object_unlock(bo->resv);
>>                   continue;
>>               }
>> -
>>               break;
>>           }
>>   -        if (!ret)
>> +        if (&bo->lru != &man->lru[i])
>>               break;
>> +        else
>> +            bo = NULL;
>>       }
>>   -    if (ret) {
>> +    if (!bo) {
>>           spin_unlock(&glob->lru_lock);
>> -        return ret;
>> +        return -EBUSY;
>>       }
>>         kref_get(&bo->list_kref);
>>         if (!list_empty(&bo->ddestroy)) {
>> -        ret = ttm_bo_cleanup_refs(bo, interruptible, no_wait_gpu, 
>> true);
>> +        ret = ttm_bo_cleanup_refs(bo, interruptible, no_wait_gpu,
>> +                      locked);
>>           kref_put(&bo->list_kref, ttm_bo_release_list);
>>           return ret;
>>       }
>> @@ -786,10 +796,11 @@ static int ttm_mem_evict_first(struct 
>> ttm_bo_device *bdev,
>>       ttm_bo_del_from_lru(bo);
>>       spin_unlock(&glob->lru_lock);
>>   -    BUG_ON(ret != 0);
>> -
>>       ret = ttm_bo_evict(bo, interruptible, no_wait_gpu);
>> -    ttm_bo_unreserve(bo);
>> +    if (locked)
>> +        ttm_bo_unreserve(bo);
>> +    else
>> +        ttm_bo_add_to_lru(bo);
>>         kref_put(&bo->list_kref, ttm_bo_release_list);
>>       return ret;
>> @@ -853,7 +864,7 @@ static int ttm_bo_mem_force_space(struct 
>> ttm_buffer_object *bo,
>>               return ret;
>>           if (mem->mm_node)
>>               break;
>> -        ret = ttm_mem_evict_first(bdev, mem_type, place,
>> +        ret = ttm_mem_evict_first(bdev, bo->resv, mem_type, place,
>>                         interruptible, no_wait_gpu);
>>           if (unlikely(ret != 0))
>>               return ret;
>> @@ -1356,7 +1367,8 @@ static int ttm_bo_force_list_clean(struct 
>> ttm_bo_device *bdev,
>>       for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
>>           while (!list_empty(&man->lru[i])) {
>>               spin_unlock(&glob->lru_lock);
>> -            ret = ttm_mem_evict_first(bdev, mem_type, NULL, false, 
>> false);
>> +            ret = ttm_mem_evict_first(bdev, NULL, mem_type, NULL,
>> +                          false, false);
>>               if (ret)
>>                   return ret;
>>               spin_lock(&glob->lru_lock);
>
Michel Dänzer Nov. 14, 2017, 10:02 a.m. UTC | #3
On 13/11/17 10:54 AM, Christian König wrote:
> Deleted BOs with the same reservation object can be reaped even if they
> can't be reserved.
> 
> v2: rebase and we still need to remove/add the BO from/to the LRU.
> v3: fix remove/add one more time, cleanup the logic a bit
> v4: we should still check if the eviction is valuable
> 
> Signed-off-by: Christian König <christian.koenig@amd.com>

[...]

> -			ret = reservation_object_trylock(bo->resv) ? 0 : -EBUSY;
> -			if (ret)
> -				continue;
> +			if (bo->resv == resv) {
> +				if (list_empty(&bo->ddestroy))
> +					continue;
> +

Superfluous blank line here.


> +			} else {
> +				locked = reservation_object_trylock(bo->resv);
> +				if (!locked)
> +					continue;
> +			}
>  
>  			if (place && !bdev->driver->eviction_valuable(bo,
>  								      place)) {
> -				reservation_object_unlock(bo->resv);
> -				ret = -EBUSY;
> +				if (locked)
> +					reservation_object_unlock(bo->resv);

I think we need to always set bo = NULL before continue, otherwise we
can end up trying to evict the very last BO even though we didn't
consider it suitable for eviction (and if
bdev->driver->eviction_valuable returned false, with locked = true even
though we already unlocked bo->resv).


> -		if (!ret)
> +		if (&bo->lru != &man->lru[i])

I'm not sure what the purpose of this test is, can you explain?


>  			break;
> +		else
> +			bo = NULL;
>  	}
Christian König Nov. 14, 2017, 12:18 p.m. UTC | #4
Am 14.11.2017 um 11:02 schrieb Michel Dänzer:
> On 13/11/17 10:54 AM, Christian König wrote:
>> Deleted BOs with the same reservation object can be reaped even if they
>> can't be reserved.
>>
>> v2: rebase and we still need to remove/add the BO from/to the LRU.
>> v3: fix remove/add one more time, cleanup the logic a bit
>> v4: we should still check if the eviction is valuable
>>
>> Signed-off-by: Christian König <christian.koenig@amd.com>
> [...]
>
>> -			ret = reservation_object_trylock(bo->resv) ? 0 : -EBUSY;
>> -			if (ret)
>> -				continue;
>> +			if (bo->resv == resv) {
>> +				if (list_empty(&bo->ddestroy))
>> +					continue;
>> +
> Superfluous blank line here.
>
>
>> +			} else {
>> +				locked = reservation_object_trylock(bo->resv);
>> +				if (!locked)
>> +					continue;
>> +			}
>>   
>>   			if (place && !bdev->driver->eviction_valuable(bo,
>>   								      place)) {
>> -				reservation_object_unlock(bo->resv);
>> -				ret = -EBUSY;
>> +				if (locked)
>> +					reservation_object_unlock(bo->resv);
> I think we need to always set bo = NULL before continue, otherwise we
> can end up trying to evict the very last BO even though we didn't
> consider it suitable for eviction (and if
> bdev->driver->eviction_valuable returned false, with locked = true even
> though we already unlocked bo->resv).
>
>
>> -		if (!ret)
>> +		if (&bo->lru != &man->lru[i])
> I'm not sure what the purpose of this test is, can you explain?

It prevents the case you described above.

bo is the element variable of the loop, so we can't set it to NULL 
inside the loop.

Instead we check after the loop if we reached the end of the list, if 
yes we set the bo variable to NULL, otherwise we have found a valid 
entry and can break out of the for loop.

To make it clearer we could also test "i" after the outer loop instead 
of bo for being NULL. That would at least be easier to understand I think.

Christian.

>
>
>>   			break;
>> +		else
>> +			bo = NULL;
>>   	}
>
Michel Dänzer Nov. 14, 2017, 3:35 p.m. UTC | #5
On 14/11/17 01:18 PM, Christian König wrote:
> Am 14.11.2017 um 11:02 schrieb Michel Dänzer:
>> On 13/11/17 10:54 AM, Christian König wrote:
>>>
>>> +            } else {
>>> +                locked = reservation_object_trylock(bo->resv);
>>> +                if (!locked)
>>> +                    continue;
>>> +            }
>>>                 if (place && !bdev->driver->eviction_valuable(bo,
>>>                                         place)) {
>>> -                reservation_object_unlock(bo->resv);
>>> -                ret = -EBUSY;
>>> +                if (locked)
>>> +                    reservation_object_unlock(bo->resv);
>> I think we need to always set bo = NULL before continue, otherwise we
>> can end up trying to evict the very last BO even though we didn't
>> consider it suitable for eviction (and if
>> bdev->driver->eviction_valuable returned false, with locked = true even
>> though we already unlocked bo->resv).
>>
>>
>>> -        if (!ret)
>>> +        if (&bo->lru != &man->lru[i])
>> I'm not sure what the purpose of this test is, can you explain?
> 
> It prevents the case you described above.
> 
> bo is the element variable of the loop, so we can't set it to NULL
> inside the loop.

Yeah, that was a brainfart. :)


> Instead we check after the loop if we reached the end of the list, if
> yes we set the bo variable to NULL, otherwise we have found a valid
> entry and can break out of the for loop.
>
> To make it clearer we could also test "i" after the outer loop instead
> of bo for being NULL. That would at least be easier to understand I think.

That would still require detecting early termination of the inner loop
and breaking out of the outer loop, so it doesn't buy anything.

The clearest solution might be:

	for (i = 0; !bo && i < TTM_MAX_BO_PRIORITY; ++i) {
		struct ttm_buffer_object *candidate;

		list_for_each_entry(candidate, &man->lru[i], lru) {
			[if/continue using candidate instead of bo]

			bo = candidate;
			break;
		}
	}

	if (!bo) {
		[...]


Alternatively, this might clarify the test for early termination:

		/* If the inner loop terminated early, we have our candidate */
		if (&bo->lru != &man->lru[i])
 			break;

		bo = NULL;
	}

(could even move the bo = NULL to the end of the outer loop's for ()
line)


I'll leave it up to you which you choose, either way

Reviewed-and-Tested-by: Michel Dänzer <michel.daenzer@amd.com>
diff mbox

Patch

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index 691646c0f8d3..7b1525d39ea8 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -738,47 +738,57 @@  bool ttm_bo_eviction_valuable(struct ttm_buffer_object *bo,
 EXPORT_SYMBOL(ttm_bo_eviction_valuable);
 
 static int ttm_mem_evict_first(struct ttm_bo_device *bdev,
-				uint32_t mem_type,
-				const struct ttm_place *place,
-				bool interruptible,
-				bool no_wait_gpu)
+			       struct reservation_object *resv,
+			       uint32_t mem_type,
+			       const struct ttm_place *place,
+			       bool interruptible,
+			       bool no_wait_gpu)
 {
 	struct ttm_bo_global *glob = bdev->glob;
 	struct ttm_mem_type_manager *man = &bdev->man[mem_type];
-	struct ttm_buffer_object *bo;
-	int ret = -EBUSY;
+	struct ttm_buffer_object *bo = NULL;
+	bool locked = false;
 	unsigned i;
+	int ret;
 
 	spin_lock(&glob->lru_lock);
 	for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
 		list_for_each_entry(bo, &man->lru[i], lru) {
-			ret = reservation_object_trylock(bo->resv) ? 0 : -EBUSY;
-			if (ret)
-				continue;
+			if (bo->resv == resv) {
+				if (list_empty(&bo->ddestroy))
+					continue;
+
+			} else {
+				locked = reservation_object_trylock(bo->resv);
+				if (!locked)
+					continue;
+			}
 
 			if (place && !bdev->driver->eviction_valuable(bo,
 								      place)) {
-				reservation_object_unlock(bo->resv);
-				ret = -EBUSY;
+				if (locked)
+					reservation_object_unlock(bo->resv);
 				continue;
 			}
-
 			break;
 		}
 
-		if (!ret)
+		if (&bo->lru != &man->lru[i])
 			break;
+		else
+			bo = NULL;
 	}
 
-	if (ret) {
+	if (!bo) {
 		spin_unlock(&glob->lru_lock);
-		return ret;
+		return -EBUSY;
 	}
 
 	kref_get(&bo->list_kref);
 
 	if (!list_empty(&bo->ddestroy)) {
-		ret = ttm_bo_cleanup_refs(bo, interruptible, no_wait_gpu, true);
+		ret = ttm_bo_cleanup_refs(bo, interruptible, no_wait_gpu,
+					  locked);
 		kref_put(&bo->list_kref, ttm_bo_release_list);
 		return ret;
 	}
@@ -786,10 +796,11 @@  static int ttm_mem_evict_first(struct ttm_bo_device *bdev,
 	ttm_bo_del_from_lru(bo);
 	spin_unlock(&glob->lru_lock);
 
-	BUG_ON(ret != 0);
-
 	ret = ttm_bo_evict(bo, interruptible, no_wait_gpu);
-	ttm_bo_unreserve(bo);
+	if (locked)
+		ttm_bo_unreserve(bo);
+	else
+		ttm_bo_add_to_lru(bo);
 
 	kref_put(&bo->list_kref, ttm_bo_release_list);
 	return ret;
@@ -853,7 +864,7 @@  static int ttm_bo_mem_force_space(struct ttm_buffer_object *bo,
 			return ret;
 		if (mem->mm_node)
 			break;
-		ret = ttm_mem_evict_first(bdev, mem_type, place,
+		ret = ttm_mem_evict_first(bdev, bo->resv, mem_type, place,
 					  interruptible, no_wait_gpu);
 		if (unlikely(ret != 0))
 			return ret;
@@ -1356,7 +1367,8 @@  static int ttm_bo_force_list_clean(struct ttm_bo_device *bdev,
 	for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
 		while (!list_empty(&man->lru[i])) {
 			spin_unlock(&glob->lru_lock);
-			ret = ttm_mem_evict_first(bdev, mem_type, NULL, false, false);
+			ret = ttm_mem_evict_first(bdev, NULL, mem_type, NULL,
+						  false, false);
 			if (ret)
 				return ret;
 			spin_lock(&glob->lru_lock);