diff mbox series

[2/2] drm/amdgpu: stop bookkeeping of temporary GTT allocation

Message ID 20210527013051.4302-2-Lang.Yu@amd.com (mailing list archive)
State New, archived
Headers show
Series [1/2] drm/ttm: cleanup and add TTM_PL_FLAG_TEMPORARY | expand

Commit Message

Lang Yu May 27, 2021, 1:30 a.m. UTC
To improve buffer migration performace, stop bookkeeping of
temporary GTT allocation, including allocation for BO evicted
from VRAM and bounce buffer.

Signed-off-by: Lang Yu <Lang.Yu@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c | 16 ++++++++++------
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c     |  4 +++-
 2 files changed, 13 insertions(+), 7 deletions(-)

Comments

Christian König May 27, 2021, 11:51 a.m. UTC | #1
Puttin Marek on CC.

Am 27.05.21 um 03:30 schrieb Lang Yu:
> To improve buffer migration performace, stop bookkeeping of
> temporary GTT allocation, including allocation for BO evicted
> from VRAM and bounce buffer.
>
> Signed-off-by: Lang Yu <Lang.Yu@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c | 16 ++++++++++------
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c     |  4 +++-
>   2 files changed, 13 insertions(+), 7 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
> index 8860545344c7..32fedd495c7f 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
> @@ -111,14 +111,15 @@ static int amdgpu_gtt_mgr_new(struct ttm_resource_manager *man,
>   	struct amdgpu_gtt_node *node;
>   	int r;
>   
> -	spin_lock(&mgr->lock);
> -	if ((&tbo->mem == mem || tbo->mem.mem_type != TTM_PL_TT) &&
> -	    atomic64_read(&mgr->available) < mem->num_pages) {
> +	if (!(mem->placement & TTM_PL_FLAG_TEMPORARY)) {
> +		spin_lock(&mgr->lock);
> +		if (atomic64_read(&mgr->available) < mem->num_pages) {
> +			spin_unlock(&mgr->lock);
> +			return -ENOSPC;
> +		}
> +		atomic64_sub(mem->num_pages, &mgr->available);

After sleeping a night over that I think we need to talk about this part 
here once more.

While temporary GTT allocations can temporary exceed the GTT limitation 
we still need to account them in the case the eviction is interrupted 
for some reason.

In other words what can happen is that we want to move 
VRAM->GTT->SYSTEM, but GTT->SYSTEM never happens because it is 
interrupted in the wait (that's unfortunately rather likely).

To solve this I think we should do the following:
1. Change mgr->available into mgr->used (e.g. invert the value).
2. Always account all GTT BOs to the used space.
3. Only when it is not a temporary allocation bail out.

This way temporary allocations are accounted for, but we still allow 
memory evictions to happen under pressure.

While at it you could also drop taking the spinlock to check the atomic, 
that is pretty much unnecessary.

Regards,
Christian.

>   		spin_unlock(&mgr->lock);
> -		return -ENOSPC;
>   	}
> -	atomic64_sub(mem->num_pages, &mgr->available);
> -	spin_unlock(&mgr->lock);
>   
>   	if (!place->lpfn) {
>   		mem->mm_node = NULL;
> @@ -178,6 +179,9 @@ static void amdgpu_gtt_mgr_del(struct ttm_resource_manager *man,
>   		kfree(node);
>   	}
>   
> +	if (mem->placement & TTM_PL_FLAG_TEMPORARY)
> +		return;
> +
>   	atomic64_add(mem->num_pages, &mgr->available);
>   }
>   
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> index c0aef327292a..129d39392859 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> @@ -152,9 +152,11 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo,
>   			abo->placements[0].lpfn = 0;
>   			abo->placement.busy_placement = &abo->placements[1];
>   			abo->placement.num_busy_placement = 1;
> +			abo->placements[1].flags |= TTM_PL_FLAG_TEMPORARY;
>   		} else {
>   			/* Move to GTT memory */
>   			amdgpu_bo_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_GTT);
> +			abo->placements[0].flags |= TTM_PL_FLAG_TEMPORARY;
>   		}
>   		break;
>   	case TTM_PL_TT:
> @@ -538,7 +540,7 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict,
>   			hop->fpfn = 0;
>   			hop->lpfn = 0;
>   			hop->mem_type = TTM_PL_TT;
> -			hop->flags = 0;
> +			hop->flags |= TTM_PL_FLAG_TEMPORARY;
>   			return -EMULTIHOP;
>   		}
>
Lang Yu May 28, 2021, 9:47 a.m. UTC | #2
[AMD Official Use Only]


Inline.

>-----Original Message-----
>From: Koenig, Christian <Christian.Koenig@amd.com>
>Sent: Thursday, May 27, 2021 7:51 PM
>To: Yu, Lang <Lang.Yu@amd.com>; amd-gfx@lists.freedesktop.org; dri-
>devel@lists.freedesktop.org
>Cc: Huang, Ray <Ray.Huang@amd.com>; Deucher, Alexander
><Alexander.Deucher@amd.com>; Olsak, Marek <Marek.Olsak@amd.com>
>Subject: Re: [PATCH 2/2] drm/amdgpu: stop bookkeeping of temporary GTT
>allocation
>
>Puttin Marek on CC.
>
>Am 27.05.21 um 03:30 schrieb Lang Yu:
>> To improve buffer migration performace, stop bookkeeping of temporary
>> GTT allocation, including allocation for BO evicted from VRAM and
>> bounce buffer.
>>
>> Signed-off-by: Lang Yu <Lang.Yu@amd.com>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c | 16 ++++++++++------
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c     |  4 +++-
>>   2 files changed, 13 insertions(+), 7 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
>> index 8860545344c7..32fedd495c7f 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
>> @@ -111,14 +111,15 @@ static int amdgpu_gtt_mgr_new(struct
>ttm_resource_manager *man,
>>   	struct amdgpu_gtt_node *node;
>>   	int r;
>>
>> -	spin_lock(&mgr->lock);
>> -	if ((&tbo->mem == mem || tbo->mem.mem_type != TTM_PL_TT) &&
>> -	    atomic64_read(&mgr->available) < mem->num_pages) {
>> +	if (!(mem->placement & TTM_PL_FLAG_TEMPORARY)) {
>> +		spin_lock(&mgr->lock);
>> +		if (atomic64_read(&mgr->available) < mem->num_pages) {
>> +			spin_unlock(&mgr->lock);
>> +			return -ENOSPC;
>> +		}
>> +		atomic64_sub(mem->num_pages, &mgr->available);
>
>After sleeping a night over that I think we need to talk about this part here once
>more.
>
>While temporary GTT allocations can temporary exceed the GTT limitation we
>still need to account them in the case the eviction is interrupted for some reason.
>
>In other words what can happen is that we want to move
>VRAM->GTT->SYSTEM, but GTT->SYSTEM never happens because it is
>interrupted in the wait (that's unfortunately rather likely).
>
>To solve this I think we should do the following:
>1. Change mgr->available into mgr->used (e.g. invert the value).
>2. Always account all GTT BOs to the used space.
>3. Only when it is not a temporary allocation bail out.
>
>This way temporary allocations are accounted for, but we still allow
>memory evictions to happen under pressure.
>
>While at it you could also drop taking the spinlock to check the atomic,
>that is pretty much unnecessary.
>
>Regards,
>Christian.
>
[Yu, Lang] Hi Christian,

Yes, it can actually happen that the BO was evicted from VRAM to GTT domain,
but was not moved forward to SYSTEM domain. It resides in GTT domain 
waiting for next time validation or eviction or destruction.

It is reasonable that we count all GTT allocation. 
1, I find if the temporary GTT BO was not counted but used for command submission, 
then we can use more GTT memory than GTT limit for command submission. Is that your concern? 
2, Or if we don't count temporary GTT allocation, that will mess up gtt manager.

In other words, if we don't count it when it resides in GTT domain, what is the consequence? 
Would like to know your concern. Actually it is counted by ttm_pages_allocated. 

If we use "used" instead of "available" in gtt manager, the used size may exceed man size.
We should also deal with gtt mgr debug interface.

Rework the logic like this with your idea:
	
	if ((atomic64_add_return(mem->num_pages, &mgr->used) > man->size) &&
		!(mem->placement & TTM_PL_FLAG_TEMPORARY)) {
			atomic64_sub(mem->num_pages, &mgr->used);
			return -ENOSPC;
	}

Regards,
Lang

>>   		spin_unlock(&mgr->lock);
>> -		return -ENOSPC;
>>   	}
>> -	atomic64_sub(mem->num_pages, &mgr->available);
>> -	spin_unlock(&mgr->lock);
>>
>>   	if (!place->lpfn) {
>>   		mem->mm_node = NULL;
>> @@ -178,6 +179,9 @@ static void amdgpu_gtt_mgr_del(struct
>ttm_resource_manager *man,
>>   		kfree(node);
>>   	}
>>
>> +	if (mem->placement & TTM_PL_FLAG_TEMPORARY)
>> +		return;
>> +
>>   	atomic64_add(mem->num_pages, &mgr->available);
>>   }
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>> index c0aef327292a..129d39392859 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>> @@ -152,9 +152,11 @@ static void amdgpu_evict_flags(struct
>ttm_buffer_object *bo,
>>   			abo->placements[0].lpfn = 0;
>>   			abo->placement.busy_placement = &abo-
>>placements[1];
>>   			abo->placement.num_busy_placement = 1;
>> +			abo->placements[1].flags |=
>TTM_PL_FLAG_TEMPORARY;
>>   		} else {
>>   			/* Move to GTT memory */
>>   			amdgpu_bo_placement_from_domain(abo,
>AMDGPU_GEM_DOMAIN_GTT);
>> +			abo->placements[0].flags |=
>TTM_PL_FLAG_TEMPORARY;
>>   		}
>>   		break;
>>   	case TTM_PL_TT:
>> @@ -538,7 +540,7 @@ static int amdgpu_bo_move(struct ttm_buffer_object
>*bo, bool evict,
>>   			hop->fpfn = 0;
>>   			hop->lpfn = 0;
>>   			hop->mem_type = TTM_PL_TT;
>> -			hop->flags = 0;
>> +			hop->flags |= TTM_PL_FLAG_TEMPORARY;
>>   			return -EMULTIHOP;
>>   		}
>>
Christian König May 28, 2021, 12:23 p.m. UTC | #3
Am 28.05.21 um 11:47 schrieb Yu, Lang:
> [AMD Official Use Only]
>
>
> Inline.
>
>> -----Original Message-----
>> From: Koenig, Christian <Christian.Koenig@amd.com>
>> Sent: Thursday, May 27, 2021 7:51 PM
>> To: Yu, Lang <Lang.Yu@amd.com>; amd-gfx@lists.freedesktop.org; dri-
>> devel@lists.freedesktop.org
>> Cc: Huang, Ray <Ray.Huang@amd.com>; Deucher, Alexander
>> <Alexander.Deucher@amd.com>; Olsak, Marek <Marek.Olsak@amd.com>
>> Subject: Re: [PATCH 2/2] drm/amdgpu: stop bookkeeping of temporary GTT
>> allocation
>>
>> Puttin Marek on CC.
>>
>> Am 27.05.21 um 03:30 schrieb Lang Yu:
>>> To improve buffer migration performace, stop bookkeeping of temporary
>>> GTT allocation, including allocation for BO evicted from VRAM and
>>> bounce buffer.
>>>
>>> Signed-off-by: Lang Yu <Lang.Yu@amd.com>
>>> ---
>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c | 16 ++++++++++------
>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c     |  4 +++-
>>>    2 files changed, 13 insertions(+), 7 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
>>> index 8860545344c7..32fedd495c7f 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
>>> @@ -111,14 +111,15 @@ static int amdgpu_gtt_mgr_new(struct
>> ttm_resource_manager *man,
>>>    	struct amdgpu_gtt_node *node;
>>>    	int r;
>>>
>>> -	spin_lock(&mgr->lock);
>>> -	if ((&tbo->mem == mem || tbo->mem.mem_type != TTM_PL_TT) &&
>>> -	    atomic64_read(&mgr->available) < mem->num_pages) {
>>> +	if (!(mem->placement & TTM_PL_FLAG_TEMPORARY)) {
>>> +		spin_lock(&mgr->lock);
>>> +		if (atomic64_read(&mgr->available) < mem->num_pages) {
>>> +			spin_unlock(&mgr->lock);
>>> +			return -ENOSPC;
>>> +		}
>>> +		atomic64_sub(mem->num_pages, &mgr->available);
>> After sleeping a night over that I think we need to talk about this part here once
>> more.
>>
>> While temporary GTT allocations can temporary exceed the GTT limitation we
>> still need to account them in the case the eviction is interrupted for some reason.
>>
>> In other words what can happen is that we want to move
>> VRAM->GTT->SYSTEM, but GTT->SYSTEM never happens because it is
>> interrupted in the wait (that's unfortunately rather likely).
>>
>> To solve this I think we should do the following:
>> 1. Change mgr->available into mgr->used (e.g. invert the value).
>> 2. Always account all GTT BOs to the used space.
>> 3. Only when it is not a temporary allocation bail out.
>>
>> This way temporary allocations are accounted for, but we still allow
>> memory evictions to happen under pressure.
>>
>> While at it you could also drop taking the spinlock to check the atomic,
>> that is pretty much unnecessary.
>>
>> Regards,
>> Christian.
>>
> [Yu, Lang] Hi Christian,
>
> Yes, it can actually happen that the BO was evicted from VRAM to GTT domain,
> but was not moved forward to SYSTEM domain. It resides in GTT domain
> waiting for next time validation or eviction or destruction.
>
> It is reasonable that we count all GTT allocation.
> 1, I find if the temporary GTT BO was not counted but used for command submission,
> then we can use more GTT memory than GTT limit for command submission. Is that your concern?

Yes, exactly that.

> 2, Or if we don't count temporary GTT allocation, that will mess up gtt manager.
>
> In other words, if we don't count it when it resides in GTT domain, what is the consequence?

The GTT size is the limit how much system memory userspace can 
intentionally allocate.

This works around stupid applications which tend to allocate as much 
memory as possible (without actually needing that much) and then 
triggering the OOM killer.

> Would like to know your concern. Actually it is counted by ttm_pages_allocated.
>
> If we use "used" instead of "available" in gtt manager, the used size may exceed man size.

Yes, that is intentional.

> We should also deal with gtt mgr debug interface.
>
> Rework the logic like this with your idea:
> 	
> 	if ((atomic64_add_return(mem->num_pages, &mgr->used) > man->size) &&
> 		!(mem->placement & TTM_PL_FLAG_TEMPORARY)) {
> 			atomic64_sub(mem->num_pages, &mgr->used);
> 			return -ENOSPC;
> 	}

Yeah, something like that.

Regards,
Christian.

>
> Regards,
> Lang
>
>>>    		spin_unlock(&mgr->lock);
>>> -		return -ENOSPC;
>>>    	}
>>> -	atomic64_sub(mem->num_pages, &mgr->available);
>>> -	spin_unlock(&mgr->lock);
>>>
>>>    	if (!place->lpfn) {
>>>    		mem->mm_node = NULL;
>>> @@ -178,6 +179,9 @@ static void amdgpu_gtt_mgr_del(struct
>> ttm_resource_manager *man,
>>>    		kfree(node);
>>>    	}
>>>
>>> +	if (mem->placement & TTM_PL_FLAG_TEMPORARY)
>>> +		return;
>>> +
>>>    	atomic64_add(mem->num_pages, &mgr->available);
>>>    }
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>>> index c0aef327292a..129d39392859 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>>> @@ -152,9 +152,11 @@ static void amdgpu_evict_flags(struct
>> ttm_buffer_object *bo,
>>>    			abo->placements[0].lpfn = 0;
>>>    			abo->placement.busy_placement = &abo-
>>> placements[1];
>>>    			abo->placement.num_busy_placement = 1;
>>> +			abo->placements[1].flags |=
>> TTM_PL_FLAG_TEMPORARY;
>>>    		} else {
>>>    			/* Move to GTT memory */
>>>    			amdgpu_bo_placement_from_domain(abo,
>> AMDGPU_GEM_DOMAIN_GTT);
>>> +			abo->placements[0].flags |=
>> TTM_PL_FLAG_TEMPORARY;
>>>    		}
>>>    		break;
>>>    	case TTM_PL_TT:
>>> @@ -538,7 +540,7 @@ static int amdgpu_bo_move(struct ttm_buffer_object
>> *bo, bool evict,
>>>    			hop->fpfn = 0;
>>>    			hop->lpfn = 0;
>>>    			hop->mem_type = TTM_PL_TT;
>>> -			hop->flags = 0;
>>> +			hop->flags |= TTM_PL_FLAG_TEMPORARY;
>>>    			return -EMULTIHOP;
>>>    		}
>>>
diff mbox series

Patch

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
index 8860545344c7..32fedd495c7f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
@@ -111,14 +111,15 @@  static int amdgpu_gtt_mgr_new(struct ttm_resource_manager *man,
 	struct amdgpu_gtt_node *node;
 	int r;
 
-	spin_lock(&mgr->lock);
-	if ((&tbo->mem == mem || tbo->mem.mem_type != TTM_PL_TT) &&
-	    atomic64_read(&mgr->available) < mem->num_pages) {
+	if (!(mem->placement & TTM_PL_FLAG_TEMPORARY)) {
+		spin_lock(&mgr->lock);
+		if (atomic64_read(&mgr->available) < mem->num_pages) {
+			spin_unlock(&mgr->lock);
+			return -ENOSPC;
+		}
+		atomic64_sub(mem->num_pages, &mgr->available);
 		spin_unlock(&mgr->lock);
-		return -ENOSPC;
 	}
-	atomic64_sub(mem->num_pages, &mgr->available);
-	spin_unlock(&mgr->lock);
 
 	if (!place->lpfn) {
 		mem->mm_node = NULL;
@@ -178,6 +179,9 @@  static void amdgpu_gtt_mgr_del(struct ttm_resource_manager *man,
 		kfree(node);
 	}
 
+	if (mem->placement & TTM_PL_FLAG_TEMPORARY)
+		return;
+
 	atomic64_add(mem->num_pages, &mgr->available);
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index c0aef327292a..129d39392859 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -152,9 +152,11 @@  static void amdgpu_evict_flags(struct ttm_buffer_object *bo,
 			abo->placements[0].lpfn = 0;
 			abo->placement.busy_placement = &abo->placements[1];
 			abo->placement.num_busy_placement = 1;
+			abo->placements[1].flags |= TTM_PL_FLAG_TEMPORARY;
 		} else {
 			/* Move to GTT memory */
 			amdgpu_bo_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_GTT);
+			abo->placements[0].flags |= TTM_PL_FLAG_TEMPORARY;
 		}
 		break;
 	case TTM_PL_TT:
@@ -538,7 +540,7 @@  static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict,
 			hop->fpfn = 0;
 			hop->lpfn = 0;
 			hop->mem_type = TTM_PL_TT;
-			hop->flags = 0;
+			hop->flags |= TTM_PL_FLAG_TEMPORARY;
 			return -EMULTIHOP;
 		}