diff mbox series

drm/ttm/pool: Revert to clear-on-alloc to honor TTM_TT_FLAG_ZERO_ALLOC

Message ID 20240620160147.29819-1-nirmoy.das@intel.com (mailing list archive)
State New, archived
Headers show
Series drm/ttm/pool: Revert to clear-on-alloc to honor TTM_TT_FLAG_ZERO_ALLOC | expand

Commit Message

Nirmoy Das June 20, 2024, 4:01 p.m. UTC
Currently ttm pool is not honoring TTM_TT_FLAG_ZERO_ALLOC flag and
clearing pages on free. It does help with allocation latency but clearing
happens even if drm driver doesn't passes the flag. If clear on free
is needed then a new flag can be added for that purpose.

Cc: Christian Koenig <christian.koenig@amd.com>
Cc: "Thomas Hellström" <thomas.hellstrom@linux.intel.com>
Cc: Matthew Auld <matthew.auld@intel.com>
Signed-off-by: Nirmoy Das <nirmoy.das@intel.com>
---
 drivers/gpu/drm/ttm/ttm_pool.c | 31 +++++++++++++++++--------------
 1 file changed, 17 insertions(+), 14 deletions(-)

Comments

Christian König June 21, 2024, 2:54 p.m. UTC | #1
Am 20.06.24 um 18:01 schrieb Nirmoy Das:
> Currently ttm pool is not honoring TTM_TT_FLAG_ZERO_ALLOC flag and
> clearing pages on free. It does help with allocation latency but clearing
> happens even if drm driver doesn't passes the flag. If clear on free
> is needed then a new flag can be added for that purpose.

Mhm, thinking more about it that will most likely get push back from 
others as well.

How about the attached patch? We just skip clearing pages when the 
driver set the ZERO_ALLOC flag again before freeing them.

Maybe rename the flag or add a new one for that, but in general that 
looks like the option with the least impact.

Regards,
Christian.

>
> Cc: Christian Koenig <christian.koenig@amd.com>
> Cc: "Thomas Hellström" <thomas.hellstrom@linux.intel.com>
> Cc: Matthew Auld <matthew.auld@intel.com>
> Signed-off-by: Nirmoy Das <nirmoy.das@intel.com>
> ---
>   drivers/gpu/drm/ttm/ttm_pool.c | 31 +++++++++++++++++--------------
>   1 file changed, 17 insertions(+), 14 deletions(-)
>
> diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c
> index 6e1fd6985ffc..cbbd722185ee 100644
> --- a/drivers/gpu/drm/ttm/ttm_pool.c
> +++ b/drivers/gpu/drm/ttm/ttm_pool.c
> @@ -224,15 +224,6 @@ static void ttm_pool_unmap(struct ttm_pool *pool, dma_addr_t dma_addr,
>   /* Give pages into a specific pool_type */
>   static void ttm_pool_type_give(struct ttm_pool_type *pt, struct page *p)
>   {
> -	unsigned int i, num_pages = 1 << pt->order;
> -
> -	for (i = 0; i < num_pages; ++i) {
> -		if (PageHighMem(p))
> -			clear_highpage(p + i);
> -		else
> -			clear_page(page_address(p + i));
> -	}
> -
>   	spin_lock(&pt->lock);
>   	list_add(&p->lru, &pt->pages);
>   	spin_unlock(&pt->lock);
> @@ -240,15 +231,26 @@ static void ttm_pool_type_give(struct ttm_pool_type *pt, struct page *p)
>   }
>   
>   /* Take pages from a specific pool_type, return NULL when nothing available */
> -static struct page *ttm_pool_type_take(struct ttm_pool_type *pt)
> +static struct page *ttm_pool_type_take(struct ttm_pool_type *pt, bool clear)
>   {
>   	struct page *p;
>   
>   	spin_lock(&pt->lock);
>   	p = list_first_entry_or_null(&pt->pages, typeof(*p), lru);
>   	if (p) {
> +		unsigned int i, num_pages = 1 << pt->order;
> +
>   		atomic_long_sub(1 << pt->order, &allocated_pages);
>   		list_del(&p->lru);
> +		if (clear) {
> +			for (i = 0; i < num_pages; ++i) {
> +				if (PageHighMem(p))
> +					clear_highpage(p + i);
> +				else
> +					clear_page(page_address(p + i));
> +			}
> +		}
> +
>   	}
>   	spin_unlock(&pt->lock);
>   
> @@ -279,7 +281,7 @@ static void ttm_pool_type_fini(struct ttm_pool_type *pt)
>   	list_del(&pt->shrinker_list);
>   	spin_unlock(&shrinker_lock);
>   
> -	while ((p = ttm_pool_type_take(pt)))
> +	while ((p = ttm_pool_type_take(pt, false)))
>   		ttm_pool_free_page(pt->pool, pt->caching, pt->order, p);
>   }
>   
> @@ -330,7 +332,7 @@ static unsigned int ttm_pool_shrink(void)
>   	list_move_tail(&pt->shrinker_list, &shrinker_list);
>   	spin_unlock(&shrinker_lock);
>   
> -	p = ttm_pool_type_take(pt);
> +	p = ttm_pool_type_take(pt, false);
>   	if (p) {
>   		ttm_pool_free_page(pt->pool, pt->caching, pt->order, p);
>   		num_pages = 1 << pt->order;
> @@ -457,10 +459,11 @@ int ttm_pool_alloc(struct ttm_pool *pool, struct ttm_tt *tt,
>   	     num_pages;
>   	     order = min_t(unsigned int, order, __fls(num_pages))) {
>   		struct ttm_pool_type *pt;
> +		bool clear = tt->page_flags & TTM_TT_FLAG_ZERO_ALLOC;
>   
>   		page_caching = tt->caching;
>   		pt = ttm_pool_select_type(pool, tt->caching, order);
> -		p = pt ? ttm_pool_type_take(pt) : NULL;
> +		p = pt ? ttm_pool_type_take(pt, clear) : NULL;
>   		if (p) {
>   			r = ttm_pool_apply_caching(caching, pages,
>   						   tt->caching);
> @@ -480,7 +483,7 @@ int ttm_pool_alloc(struct ttm_pool *pool, struct ttm_tt *tt,
>   				if (num_pages < (1 << order))
>   					break;
>   
> -				p = ttm_pool_type_take(pt);
> +				p = ttm_pool_type_take(pt, clear);
>   			} while (p);
>   		}
>
Nirmoy Das June 21, 2024, 3:43 p.m. UTC | #2
Hi Christian,

On 6/21/2024 4:54 PM, Christian König wrote:
> Am 20.06.24 um 18:01 schrieb Nirmoy Das:
>> Currently ttm pool is not honoring TTM_TT_FLAG_ZERO_ALLOC flag and
>> clearing pages on free. It does help with allocation latency but 
>> clearing
>> happens even if drm driver doesn't passes the flag. If clear on free
>> is needed then a new flag can be added for that purpose.
>
> Mhm, thinking more about it that will most likely get push back from 
> others as well.


Agreed, it is diverting a lot from a known behavior.

>
> How about the attached patch? We just skip clearing pages when the 
> driver set the ZERO_ALLOC flag again before freeing them.
>
> Maybe rename the flag or add a new one for that, but in general that 
> looks like the option with the least impact.


I would prefer a few flag (TTM_TT_FLAG_CLEARED_ALLOC ?) which driver can 
set before freeing. I can resend the patch if you are

fine with it.


Regards,

Nirmoy


>
> Regards,
> Christian.
>
>>
>> Cc: Christian Koenig <christian.koenig@amd.com>
>> Cc: "Thomas Hellström" <thomas.hellstrom@linux.intel.com>
>> Cc: Matthew Auld <matthew.auld@intel.com>
>> Signed-off-by: Nirmoy Das <nirmoy.das@intel.com>
>> ---
>>   drivers/gpu/drm/ttm/ttm_pool.c | 31 +++++++++++++++++--------------
>>   1 file changed, 17 insertions(+), 14 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/ttm/ttm_pool.c 
>> b/drivers/gpu/drm/ttm/ttm_pool.c
>> index 6e1fd6985ffc..cbbd722185ee 100644
>> --- a/drivers/gpu/drm/ttm/ttm_pool.c
>> +++ b/drivers/gpu/drm/ttm/ttm_pool.c
>> @@ -224,15 +224,6 @@ static void ttm_pool_unmap(struct ttm_pool 
>> *pool, dma_addr_t dma_addr,
>>   /* Give pages into a specific pool_type */
>>   static void ttm_pool_type_give(struct ttm_pool_type *pt, struct 
>> page *p)
>>   {
>> -    unsigned int i, num_pages = 1 << pt->order;
>> -
>> -    for (i = 0; i < num_pages; ++i) {
>> -        if (PageHighMem(p))
>> -            clear_highpage(p + i);
>> -        else
>> -            clear_page(page_address(p + i));
>> -    }
>> -
>>       spin_lock(&pt->lock);
>>       list_add(&p->lru, &pt->pages);
>>       spin_unlock(&pt->lock);
>> @@ -240,15 +231,26 @@ static void ttm_pool_type_give(struct 
>> ttm_pool_type *pt, struct page *p)
>>   }
>>     /* Take pages from a specific pool_type, return NULL when nothing 
>> available */
>> -static struct page *ttm_pool_type_take(struct ttm_pool_type *pt)
>> +static struct page *ttm_pool_type_take(struct ttm_pool_type *pt, 
>> bool clear)
>>   {
>>       struct page *p;
>>         spin_lock(&pt->lock);
>>       p = list_first_entry_or_null(&pt->pages, typeof(*p), lru);
>>       if (p) {
>> +        unsigned int i, num_pages = 1 << pt->order;
>> +
>>           atomic_long_sub(1 << pt->order, &allocated_pages);
>>           list_del(&p->lru);
>> +        if (clear) {
>> +            for (i = 0; i < num_pages; ++i) {
>> +                if (PageHighMem(p))
>> +                    clear_highpage(p + i);
>> +                else
>> +                    clear_page(page_address(p + i));
>> +            }
>> +        }
>> +
>>       }
>>       spin_unlock(&pt->lock);
>>   @@ -279,7 +281,7 @@ static void ttm_pool_type_fini(struct 
>> ttm_pool_type *pt)
>>       list_del(&pt->shrinker_list);
>>       spin_unlock(&shrinker_lock);
>>   -    while ((p = ttm_pool_type_take(pt)))
>> +    while ((p = ttm_pool_type_take(pt, false)))
>>           ttm_pool_free_page(pt->pool, pt->caching, pt->order, p);
>>   }
>>   @@ -330,7 +332,7 @@ static unsigned int ttm_pool_shrink(void)
>>       list_move_tail(&pt->shrinker_list, &shrinker_list);
>>       spin_unlock(&shrinker_lock);
>>   -    p = ttm_pool_type_take(pt);
>> +    p = ttm_pool_type_take(pt, false);
>>       if (p) {
>>           ttm_pool_free_page(pt->pool, pt->caching, pt->order, p);
>>           num_pages = 1 << pt->order;
>> @@ -457,10 +459,11 @@ int ttm_pool_alloc(struct ttm_pool *pool, 
>> struct ttm_tt *tt,
>>            num_pages;
>>            order = min_t(unsigned int, order, __fls(num_pages))) {
>>           struct ttm_pool_type *pt;
>> +        bool clear = tt->page_flags & TTM_TT_FLAG_ZERO_ALLOC;
>>             page_caching = tt->caching;
>>           pt = ttm_pool_select_type(pool, tt->caching, order);
>> -        p = pt ? ttm_pool_type_take(pt) : NULL;
>> +        p = pt ? ttm_pool_type_take(pt, clear) : NULL;
>>           if (p) {
>>               r = ttm_pool_apply_caching(caching, pages,
>>                              tt->caching);
>> @@ -480,7 +483,7 @@ int ttm_pool_alloc(struct ttm_pool *pool, struct 
>> ttm_tt *tt,
>>                   if (num_pages < (1 << order))
>>                       break;
>>   -                p = ttm_pool_type_take(pt);
>> +                p = ttm_pool_type_take(pt, clear);
>>               } while (p);
>>           }
Christian König June 24, 2024, 8:41 a.m. UTC | #3
Am 21.06.24 um 17:43 schrieb Nirmoy Das:
> Hi Christian,
>
> On 6/21/2024 4:54 PM, Christian König wrote:
>> Am 20.06.24 um 18:01 schrieb Nirmoy Das:
>>> Currently ttm pool is not honoring TTM_TT_FLAG_ZERO_ALLOC flag and
>>> clearing pages on free. It does help with allocation latency but 
>>> clearing
>>> happens even if drm driver doesn't passes the flag. If clear on free
>>> is needed then a new flag can be added for that purpose.
>>
>> Mhm, thinking more about it that will most likely get push back from 
>> others as well.
>
>
> Agreed, it is diverting a lot from a known behavior.
>
>>
>> How about the attached patch? We just skip clearing pages when the 
>> driver set the ZERO_ALLOC flag again before freeing them.
>>
>> Maybe rename the flag or add a new one for that, but in general that 
>> looks like the option with the least impact.
>
>
> I would prefer a few flag (TTM_TT_FLAG_CLEARED_ALLOC ?) which driver 
> can set before freeing. I can resend the patch if you are
>
> fine with it.


Yeah, please go ahead with that. But probably better name it 
TTM_TT_FLAG_CLEARED_ON_FREE.

Thanks,
Christian.

>
>
> Regards,
>
> Nirmoy
>
>
>>
>> Regards,
>> Christian.
>>
>>>
>>> Cc: Christian Koenig <christian.koenig@amd.com>
>>> Cc: "Thomas Hellström" <thomas.hellstrom@linux.intel.com>
>>> Cc: Matthew Auld <matthew.auld@intel.com>
>>> Signed-off-by: Nirmoy Das <nirmoy.das@intel.com>
>>> ---
>>>   drivers/gpu/drm/ttm/ttm_pool.c | 31 +++++++++++++++++--------------
>>>   1 file changed, 17 insertions(+), 14 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/ttm/ttm_pool.c 
>>> b/drivers/gpu/drm/ttm/ttm_pool.c
>>> index 6e1fd6985ffc..cbbd722185ee 100644
>>> --- a/drivers/gpu/drm/ttm/ttm_pool.c
>>> +++ b/drivers/gpu/drm/ttm/ttm_pool.c
>>> @@ -224,15 +224,6 @@ static void ttm_pool_unmap(struct ttm_pool 
>>> *pool, dma_addr_t dma_addr,
>>>   /* Give pages into a specific pool_type */
>>>   static void ttm_pool_type_give(struct ttm_pool_type *pt, struct 
>>> page *p)
>>>   {
>>> -    unsigned int i, num_pages = 1 << pt->order;
>>> -
>>> -    for (i = 0; i < num_pages; ++i) {
>>> -        if (PageHighMem(p))
>>> -            clear_highpage(p + i);
>>> -        else
>>> -            clear_page(page_address(p + i));
>>> -    }
>>> -
>>>       spin_lock(&pt->lock);
>>>       list_add(&p->lru, &pt->pages);
>>>       spin_unlock(&pt->lock);
>>> @@ -240,15 +231,26 @@ static void ttm_pool_type_give(struct 
>>> ttm_pool_type *pt, struct page *p)
>>>   }
>>>     /* Take pages from a specific pool_type, return NULL when 
>>> nothing available */
>>> -static struct page *ttm_pool_type_take(struct ttm_pool_type *pt)
>>> +static struct page *ttm_pool_type_take(struct ttm_pool_type *pt, 
>>> bool clear)
>>>   {
>>>       struct page *p;
>>>         spin_lock(&pt->lock);
>>>       p = list_first_entry_or_null(&pt->pages, typeof(*p), lru);
>>>       if (p) {
>>> +        unsigned int i, num_pages = 1 << pt->order;
>>> +
>>>           atomic_long_sub(1 << pt->order, &allocated_pages);
>>>           list_del(&p->lru);
>>> +        if (clear) {
>>> +            for (i = 0; i < num_pages; ++i) {
>>> +                if (PageHighMem(p))
>>> +                    clear_highpage(p + i);
>>> +                else
>>> +                    clear_page(page_address(p + i));
>>> +            }
>>> +        }
>>> +
>>>       }
>>>       spin_unlock(&pt->lock);
>>>   @@ -279,7 +281,7 @@ static void ttm_pool_type_fini(struct 
>>> ttm_pool_type *pt)
>>>       list_del(&pt->shrinker_list);
>>>       spin_unlock(&shrinker_lock);
>>>   -    while ((p = ttm_pool_type_take(pt)))
>>> +    while ((p = ttm_pool_type_take(pt, false)))
>>>           ttm_pool_free_page(pt->pool, pt->caching, pt->order, p);
>>>   }
>>>   @@ -330,7 +332,7 @@ static unsigned int ttm_pool_shrink(void)
>>>       list_move_tail(&pt->shrinker_list, &shrinker_list);
>>>       spin_unlock(&shrinker_lock);
>>>   -    p = ttm_pool_type_take(pt);
>>> +    p = ttm_pool_type_take(pt, false);
>>>       if (p) {
>>>           ttm_pool_free_page(pt->pool, pt->caching, pt->order, p);
>>>           num_pages = 1 << pt->order;
>>> @@ -457,10 +459,11 @@ int ttm_pool_alloc(struct ttm_pool *pool, 
>>> struct ttm_tt *tt,
>>>            num_pages;
>>>            order = min_t(unsigned int, order, __fls(num_pages))) {
>>>           struct ttm_pool_type *pt;
>>> +        bool clear = tt->page_flags & TTM_TT_FLAG_ZERO_ALLOC;
>>>             page_caching = tt->caching;
>>>           pt = ttm_pool_select_type(pool, tt->caching, order);
>>> -        p = pt ? ttm_pool_type_take(pt) : NULL;
>>> +        p = pt ? ttm_pool_type_take(pt, clear) : NULL;
>>>           if (p) {
>>>               r = ttm_pool_apply_caching(caching, pages,
>>>                              tt->caching);
>>> @@ -480,7 +483,7 @@ int ttm_pool_alloc(struct ttm_pool *pool, struct 
>>> ttm_tt *tt,
>>>                   if (num_pages < (1 << order))
>>>                       break;
>>>   -                p = ttm_pool_type_take(pt);
>>> +                p = ttm_pool_type_take(pt, clear);
>>>               } while (p);
>>>           }
diff mbox series

Patch

diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c
index 6e1fd6985ffc..cbbd722185ee 100644
--- a/drivers/gpu/drm/ttm/ttm_pool.c
+++ b/drivers/gpu/drm/ttm/ttm_pool.c
@@ -224,15 +224,6 @@  static void ttm_pool_unmap(struct ttm_pool *pool, dma_addr_t dma_addr,
 /* Give pages into a specific pool_type */
 static void ttm_pool_type_give(struct ttm_pool_type *pt, struct page *p)
 {
-	unsigned int i, num_pages = 1 << pt->order;
-
-	for (i = 0; i < num_pages; ++i) {
-		if (PageHighMem(p))
-			clear_highpage(p + i);
-		else
-			clear_page(page_address(p + i));
-	}
-
 	spin_lock(&pt->lock);
 	list_add(&p->lru, &pt->pages);
 	spin_unlock(&pt->lock);
@@ -240,15 +231,26 @@  static void ttm_pool_type_give(struct ttm_pool_type *pt, struct page *p)
 }
 
 /* Take pages from a specific pool_type, return NULL when nothing available */
-static struct page *ttm_pool_type_take(struct ttm_pool_type *pt)
+static struct page *ttm_pool_type_take(struct ttm_pool_type *pt, bool clear)
 {
 	struct page *p;
 
 	spin_lock(&pt->lock);
 	p = list_first_entry_or_null(&pt->pages, typeof(*p), lru);
 	if (p) {
+		unsigned int i, num_pages = 1 << pt->order;
+
 		atomic_long_sub(1 << pt->order, &allocated_pages);
 		list_del(&p->lru);
+		if (clear) {
+			for (i = 0; i < num_pages; ++i) {
+				if (PageHighMem(p))
+					clear_highpage(p + i);
+				else
+					clear_page(page_address(p + i));
+			}
+		}
+
 	}
 	spin_unlock(&pt->lock);
 
@@ -279,7 +281,7 @@  static void ttm_pool_type_fini(struct ttm_pool_type *pt)
 	list_del(&pt->shrinker_list);
 	spin_unlock(&shrinker_lock);
 
-	while ((p = ttm_pool_type_take(pt)))
+	while ((p = ttm_pool_type_take(pt, false)))
 		ttm_pool_free_page(pt->pool, pt->caching, pt->order, p);
 }
 
@@ -330,7 +332,7 @@  static unsigned int ttm_pool_shrink(void)
 	list_move_tail(&pt->shrinker_list, &shrinker_list);
 	spin_unlock(&shrinker_lock);
 
-	p = ttm_pool_type_take(pt);
+	p = ttm_pool_type_take(pt, false);
 	if (p) {
 		ttm_pool_free_page(pt->pool, pt->caching, pt->order, p);
 		num_pages = 1 << pt->order;
@@ -457,10 +459,11 @@  int ttm_pool_alloc(struct ttm_pool *pool, struct ttm_tt *tt,
 	     num_pages;
 	     order = min_t(unsigned int, order, __fls(num_pages))) {
 		struct ttm_pool_type *pt;
+		bool clear = tt->page_flags & TTM_TT_FLAG_ZERO_ALLOC;
 
 		page_caching = tt->caching;
 		pt = ttm_pool_select_type(pool, tt->caching, order);
-		p = pt ? ttm_pool_type_take(pt) : NULL;
+		p = pt ? ttm_pool_type_take(pt, clear) : NULL;
 		if (p) {
 			r = ttm_pool_apply_caching(caching, pages,
 						   tt->caching);
@@ -480,7 +483,7 @@  int ttm_pool_alloc(struct ttm_pool *pool, struct ttm_tt *tt,
 				if (num_pages < (1 << order))
 					break;
 
-				p = ttm_pool_type_take(pt);
+				p = ttm_pool_type_take(pt, clear);
 			} while (p);
 		}