diff mbox series

[v6,1/2] drm/ttm: Add a flag to allow drivers to skip clear-on-free

Message ID 20240816135154.19678-1-nirmoy.das@intel.com (mailing list archive)
State New, archived
Headers show
Series [v6,1/2] drm/ttm: Add a flag to allow drivers to skip clear-on-free | expand

Commit Message

Nirmoy Das Aug. 16, 2024, 1:51 p.m. UTC
Add TTM_TT_FLAG_CLEARED_ON_FREE, which DRM drivers can set before
releasing backing stores if they want to skip clear-on-free.

Cc: Matthew Auld <matthew.auld@intel.com>
Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Suggested-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Nirmoy Das <nirmoy.das@intel.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/ttm/ttm_pool.c | 18 +++++++++++-------
 include/drm/ttm/ttm_tt.h       |  6 +++++-
 2 files changed, 16 insertions(+), 8 deletions(-)

Comments

Thomas Hellstrom Aug. 20, 2024, 1:33 p.m. UTC | #1
Hi, Nirmoy, Christian

On Fri, 2024-08-16 at 15:51 +0200, Nirmoy Das wrote:
> Add TTM_TT_FLAG_CLEARED_ON_FREE, which DRM drivers can set before
> releasing backing stores if they want to skip clear-on-free.
> 
> Cc: Matthew Auld <matthew.auld@intel.com>
> Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
> Suggested-by: Christian König <christian.koenig@amd.com>
> Signed-off-by: Nirmoy Das <nirmoy.das@intel.com>
> Reviewed-by: Christian König <christian.koenig@amd.com>

What happens if two devices share the same global TTM pool
 type and one that does its own clearing. Wouldn't there be a pretty
high chance that the the device that doesn't clear its own pages
allocate non-cleared memory from the pool?

/Thomas

> ---
>  drivers/gpu/drm/ttm/ttm_pool.c | 18 +++++++++++-------
>  include/drm/ttm/ttm_tt.h       |  6 +++++-
>  2 files changed, 16 insertions(+), 8 deletions(-)
> 
> diff --git a/drivers/gpu/drm/ttm/ttm_pool.c
> b/drivers/gpu/drm/ttm/ttm_pool.c
> index 8504dbe19c1a..935ab3cfd046 100644
> --- a/drivers/gpu/drm/ttm/ttm_pool.c
> +++ b/drivers/gpu/drm/ttm/ttm_pool.c
> @@ -222,15 +222,18 @@ static void ttm_pool_unmap(struct ttm_pool
> *pool, dma_addr_t dma_addr,
>  }
>  
>  /* Give pages into a specific pool_type */
> -static void ttm_pool_type_give(struct ttm_pool_type *pt, struct page
> *p)
> +static void ttm_pool_type_give(struct ttm_pool_type *pt, struct page
> *p,
> +			       bool cleared)
>  {
>  	unsigned int i, num_pages = 1 << pt->order;
>  
> -	for (i = 0; i < num_pages; ++i) {
> -		if (PageHighMem(p))
> -			clear_highpage(p + i);
> -		else
> -			clear_page(page_address(p + i));
> +	if (!cleared) {
> +		for (i = 0; i < num_pages; ++i) {
> +			if (PageHighMem(p))
> +				clear_highpage(p + i);
> +			else
> +				clear_page(page_address(p + i));
> +		}
>  	}
>  
>  	spin_lock(&pt->lock);
> @@ -394,6 +397,7 @@ static void ttm_pool_free_range(struct ttm_pool
> *pool, struct ttm_tt *tt,
>  				pgoff_t start_page, pgoff_t
> end_page)
>  {
>  	struct page **pages = &tt->pages[start_page];
> +	bool cleared = tt->page_flags & TTM_TT_FLAG_CLEARED_ON_FREE;
>  	unsigned int order;
>  	pgoff_t i, nr;
>  
> @@ -407,7 +411,7 @@ static void ttm_pool_free_range(struct ttm_pool
> *pool, struct ttm_tt *tt,
>  
>  		pt = ttm_pool_select_type(pool, caching, order);
>  		if (pt)
> -			ttm_pool_type_give(pt, *pages);
> +			ttm_pool_type_give(pt, *pages, cleared);
>  		else
>  			ttm_pool_free_page(pool, caching, order,
> *pages);
>  	}
> diff --git a/include/drm/ttm/ttm_tt.h b/include/drm/ttm/ttm_tt.h
> index 2b9d856ff388..cfaf49de2419 100644
> --- a/include/drm/ttm/ttm_tt.h
> +++ b/include/drm/ttm/ttm_tt.h
> @@ -85,6 +85,9 @@ struct ttm_tt {
>  	 * fault handling abuses the DMA api a bit and dma_map_attrs
> can't be
>  	 * used to assure pgprot always matches.
>  	 *
> +	 * TTM_TT_FLAG_CLEARED_ON_FREE: Set this if a drm driver
> handles
> +	 * clearing backing store
> +	 *
>  	 * TTM_TT_FLAG_PRIV_POPULATED: TTM internal only. DO NOT
> USE. This is
>  	 * set by TTM after ttm_tt_populate() has successfully
> returned, and is
>  	 * then unset when TTM calls ttm_tt_unpopulate().
> @@ -94,8 +97,9 @@ struct ttm_tt {
>  #define TTM_TT_FLAG_EXTERNAL		BIT(2)
>  #define TTM_TT_FLAG_EXTERNAL_MAPPABLE	BIT(3)
>  #define TTM_TT_FLAG_DECRYPTED		BIT(4)
> +#define TTM_TT_FLAG_CLEARED_ON_FREE	BIT(5)
>  
> -#define TTM_TT_FLAG_PRIV_POPULATED	BIT(5)
> +#define TTM_TT_FLAG_PRIV_POPULATED	BIT(6)
>  	uint32_t page_flags;
>  	/** @num_pages: Number of pages in the page array. */
>  	uint32_t num_pages;
Nirmoy Das Aug. 20, 2024, 2:06 p.m. UTC | #2
On 8/20/2024 3:33 PM, Thomas Hellström wrote:
> Hi, Nirmoy, Christian
>
> On Fri, 2024-08-16 at 15:51 +0200, Nirmoy Das wrote:
>> Add TTM_TT_FLAG_CLEARED_ON_FREE, which DRM drivers can set before
>> releasing backing stores if they want to skip clear-on-free.
>>
>> Cc: Matthew Auld <matthew.auld@intel.com>
>> Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
>> Suggested-by: Christian König <christian.koenig@amd.com>
>> Signed-off-by: Nirmoy Das <nirmoy.das@intel.com>
>> Reviewed-by: Christian König <christian.koenig@amd.com>
> What happens if two devices share the same global TTM pool
>   type and one that does its own clearing. Wouldn't there be a pretty
> high chance that the the device that doesn't clear its own pages
> allocate non-cleared memory from the pool?


You are right, mixing such devices will poison the global pool. 
Unfortunately, I fully concentrated on single device

use-case. This is problematic mainly because on XE, we are doing clear 
on alloc so from ttm prospective the flag is correct.


A quick option would be to limit this for non-pooled allocations on XE 
or even create a separate pool for XE when have this 
gpu-system-page-clear is enabled.

Thanks for catching this, I will test above options and send patches to 
fix the xe commit.


Regards,

Nirmoy


>
> /Thomas
>
>> ---
>>   drivers/gpu/drm/ttm/ttm_pool.c | 18 +++++++++++-------
>>   include/drm/ttm/ttm_tt.h       |  6 +++++-
>>   2 files changed, 16 insertions(+), 8 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/ttm/ttm_pool.c
>> b/drivers/gpu/drm/ttm/ttm_pool.c
>> index 8504dbe19c1a..935ab3cfd046 100644
>> --- a/drivers/gpu/drm/ttm/ttm_pool.c
>> +++ b/drivers/gpu/drm/ttm/ttm_pool.c
>> @@ -222,15 +222,18 @@ static void ttm_pool_unmap(struct ttm_pool
>> *pool, dma_addr_t dma_addr,
>>   }
>>   
>>   /* Give pages into a specific pool_type */
>> -static void ttm_pool_type_give(struct ttm_pool_type *pt, struct page
>> *p)
>> +static void ttm_pool_type_give(struct ttm_pool_type *pt, struct page
>> *p,
>> +			       bool cleared)
>>   {
>>   	unsigned int i, num_pages = 1 << pt->order;
>>   
>> -	for (i = 0; i < num_pages; ++i) {
>> -		if (PageHighMem(p))
>> -			clear_highpage(p + i);
>> -		else
>> -			clear_page(page_address(p + i));
>> +	if (!cleared) {
>> +		for (i = 0; i < num_pages; ++i) {
>> +			if (PageHighMem(p))
>> +				clear_highpage(p + i);
>> +			else
>> +				clear_page(page_address(p + i));
>> +		}
>>   	}
>>   
>>   	spin_lock(&pt->lock);
>> @@ -394,6 +397,7 @@ static void ttm_pool_free_range(struct ttm_pool
>> *pool, struct ttm_tt *tt,
>>   				pgoff_t start_page, pgoff_t
>> end_page)
>>   {
>>   	struct page **pages = &tt->pages[start_page];
>> +	bool cleared = tt->page_flags & TTM_TT_FLAG_CLEARED_ON_FREE;
>>   	unsigned int order;
>>   	pgoff_t i, nr;
>>   
>> @@ -407,7 +411,7 @@ static void ttm_pool_free_range(struct ttm_pool
>> *pool, struct ttm_tt *tt,
>>   
>>   		pt = ttm_pool_select_type(pool, caching, order);
>>   		if (pt)
>> -			ttm_pool_type_give(pt, *pages);
>> +			ttm_pool_type_give(pt, *pages, cleared);
>>   		else
>>   			ttm_pool_free_page(pool, caching, order,
>> *pages);
>>   	}
>> diff --git a/include/drm/ttm/ttm_tt.h b/include/drm/ttm/ttm_tt.h
>> index 2b9d856ff388..cfaf49de2419 100644
>> --- a/include/drm/ttm/ttm_tt.h
>> +++ b/include/drm/ttm/ttm_tt.h
>> @@ -85,6 +85,9 @@ struct ttm_tt {
>>   	 * fault handling abuses the DMA api a bit and dma_map_attrs
>> can't be
>>   	 * used to assure pgprot always matches.
>>   	 *
>> +	 * TTM_TT_FLAG_CLEARED_ON_FREE: Set this if a drm driver
>> handles
>> +	 * clearing backing store
>> +	 *
>>   	 * TTM_TT_FLAG_PRIV_POPULATED: TTM internal only. DO NOT
>> USE. This is
>>   	 * set by TTM after ttm_tt_populate() has successfully
>> returned, and is
>>   	 * then unset when TTM calls ttm_tt_unpopulate().
>> @@ -94,8 +97,9 @@ struct ttm_tt {
>>   #define TTM_TT_FLAG_EXTERNAL		BIT(2)
>>   #define TTM_TT_FLAG_EXTERNAL_MAPPABLE	BIT(3)
>>   #define TTM_TT_FLAG_DECRYPTED		BIT(4)
>> +#define TTM_TT_FLAG_CLEARED_ON_FREE	BIT(5)
>>   
>> -#define TTM_TT_FLAG_PRIV_POPULATED	BIT(5)
>> +#define TTM_TT_FLAG_PRIV_POPULATED	BIT(6)
>>   	uint32_t page_flags;
>>   	/** @num_pages: Number of pages in the page array. */
>>   	uint32_t num_pages;
Christian König Aug. 20, 2024, 3:30 p.m. UTC | #3
Am 20.08.24 um 15:33 schrieb Thomas Hellström:
> Hi, Nirmoy, Christian
>
> On Fri, 2024-08-16 at 15:51 +0200, Nirmoy Das wrote:
>> Add TTM_TT_FLAG_CLEARED_ON_FREE, which DRM drivers can set before
>> releasing backing stores if they want to skip clear-on-free.
>>
>> Cc: Matthew Auld <matthew.auld@intel.com>
>> Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
>> Suggested-by: Christian König <christian.koenig@amd.com>
>> Signed-off-by: Nirmoy Das <nirmoy.das@intel.com>
>> Reviewed-by: Christian König <christian.koenig@amd.com>
> What happens if two devices share the same global TTM pool
>   type and one that does its own clearing. Wouldn't there be a pretty
> high chance that the the device that doesn't clear its own pages
> allocate non-cleared memory from the pool?

That's completely unproblematic. The flag indicates that the released 
pages are already cleared, if that isn't the case then the flag 
shouldn't be set on the TT object.

If one device clear it's pages and another device doesn't clear it's 
pages then we would just clear the pages of the device which doesn't do 
it with a hardware DMA.

Regards,
Christian.

>
> /Thomas
>
>> ---
>>   drivers/gpu/drm/ttm/ttm_pool.c | 18 +++++++++++-------
>>   include/drm/ttm/ttm_tt.h       |  6 +++++-
>>   2 files changed, 16 insertions(+), 8 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/ttm/ttm_pool.c
>> b/drivers/gpu/drm/ttm/ttm_pool.c
>> index 8504dbe19c1a..935ab3cfd046 100644
>> --- a/drivers/gpu/drm/ttm/ttm_pool.c
>> +++ b/drivers/gpu/drm/ttm/ttm_pool.c
>> @@ -222,15 +222,18 @@ static void ttm_pool_unmap(struct ttm_pool
>> *pool, dma_addr_t dma_addr,
>>   }
>>   
>>   /* Give pages into a specific pool_type */
>> -static void ttm_pool_type_give(struct ttm_pool_type *pt, struct page
>> *p)
>> +static void ttm_pool_type_give(struct ttm_pool_type *pt, struct page
>> *p,
>> +			       bool cleared)
>>   {
>>   	unsigned int i, num_pages = 1 << pt->order;
>>   
>> -	for (i = 0; i < num_pages; ++i) {
>> -		if (PageHighMem(p))
>> -			clear_highpage(p + i);
>> -		else
>> -			clear_page(page_address(p + i));
>> +	if (!cleared) {
>> +		for (i = 0; i < num_pages; ++i) {
>> +			if (PageHighMem(p))
>> +				clear_highpage(p + i);
>> +			else
>> +				clear_page(page_address(p + i));
>> +		}
>>   	}
>>   
>>   	spin_lock(&pt->lock);
>> @@ -394,6 +397,7 @@ static void ttm_pool_free_range(struct ttm_pool
>> *pool, struct ttm_tt *tt,
>>   				pgoff_t start_page, pgoff_t
>> end_page)
>>   {
>>   	struct page **pages = &tt->pages[start_page];
>> +	bool cleared = tt->page_flags & TTM_TT_FLAG_CLEARED_ON_FREE;
>>   	unsigned int order;
>>   	pgoff_t i, nr;
>>   
>> @@ -407,7 +411,7 @@ static void ttm_pool_free_range(struct ttm_pool
>> *pool, struct ttm_tt *tt,
>>   
>>   		pt = ttm_pool_select_type(pool, caching, order);
>>   		if (pt)
>> -			ttm_pool_type_give(pt, *pages);
>> +			ttm_pool_type_give(pt, *pages, cleared);
>>   		else
>>   			ttm_pool_free_page(pool, caching, order,
>> *pages);
>>   	}
>> diff --git a/include/drm/ttm/ttm_tt.h b/include/drm/ttm/ttm_tt.h
>> index 2b9d856ff388..cfaf49de2419 100644
>> --- a/include/drm/ttm/ttm_tt.h
>> +++ b/include/drm/ttm/ttm_tt.h
>> @@ -85,6 +85,9 @@ struct ttm_tt {
>>   	 * fault handling abuses the DMA api a bit and dma_map_attrs
>> can't be
>>   	 * used to assure pgprot always matches.
>>   	 *
>> +	 * TTM_TT_FLAG_CLEARED_ON_FREE: Set this if a drm driver
>> handles
>> +	 * clearing backing store
>> +	 *
>>   	 * TTM_TT_FLAG_PRIV_POPULATED: TTM internal only. DO NOT
>> USE. This is
>>   	 * set by TTM after ttm_tt_populate() has successfully
>> returned, and is
>>   	 * then unset when TTM calls ttm_tt_unpopulate().
>> @@ -94,8 +97,9 @@ struct ttm_tt {
>>   #define TTM_TT_FLAG_EXTERNAL		BIT(2)
>>   #define TTM_TT_FLAG_EXTERNAL_MAPPABLE	BIT(3)
>>   #define TTM_TT_FLAG_DECRYPTED		BIT(4)
>> +#define TTM_TT_FLAG_CLEARED_ON_FREE	BIT(5)
>>   
>> -#define TTM_TT_FLAG_PRIV_POPULATED	BIT(5)
>> +#define TTM_TT_FLAG_PRIV_POPULATED	BIT(6)
>>   	uint32_t page_flags;
>>   	/** @num_pages: Number of pages in the page array. */
>>   	uint32_t num_pages;
Thomas Hellstrom Aug. 20, 2024, 3:45 p.m. UTC | #4
On Tue, 2024-08-20 at 17:30 +0200, Christian König wrote:
> Am 20.08.24 um 15:33 schrieb Thomas Hellström:
> > Hi, Nirmoy, Christian
> > 
> > On Fri, 2024-08-16 at 15:51 +0200, Nirmoy Das wrote:
> > > Add TTM_TT_FLAG_CLEARED_ON_FREE, which DRM drivers can set before
> > > releasing backing stores if they want to skip clear-on-free.
> > > 
> > > Cc: Matthew Auld <matthew.auld@intel.com>
> > > Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
> > > Suggested-by: Christian König <christian.koenig@amd.com>
> > > Signed-off-by: Nirmoy Das <nirmoy.das@intel.com>
> > > Reviewed-by: Christian König <christian.koenig@amd.com>
> > What happens if two devices share the same global TTM pool
> >   type and one that does its own clearing. Wouldn't there be a
> > pretty
> > high chance that the the device that doesn't clear its own pages
> > allocate non-cleared memory from the pool?
> 
> That's completely unproblematic. The flag indicates that the released
> pages are already cleared, if that isn't the case then the flag 
> shouldn't be set on the TT object.

Yeah, this patch is OK, but the way the follow-up xe patch uses it is
problematic since, AFAICT, xe dma clears on alloc, meaning the pool
pages are not cleared after use.

/Thomas

> 
> If one device clear it's pages and another device doesn't clear it's 
> pages then we would just clear the pages of the device which doesn't
> do 
> it with a hardware DMA.
> 
> Regards,
> Christian.
> 
> > 
> > /Thomas
> > 
> > > ---
> > >   drivers/gpu/drm/ttm/ttm_pool.c | 18 +++++++++++-------
> > >   include/drm/ttm/ttm_tt.h       |  6 +++++-
> > >   2 files changed, 16 insertions(+), 8 deletions(-)
> > > 
> > > diff --git a/drivers/gpu/drm/ttm/ttm_pool.c
> > > b/drivers/gpu/drm/ttm/ttm_pool.c
> > > index 8504dbe19c1a..935ab3cfd046 100644
> > > --- a/drivers/gpu/drm/ttm/ttm_pool.c
> > > +++ b/drivers/gpu/drm/ttm/ttm_pool.c
> > > @@ -222,15 +222,18 @@ static void ttm_pool_unmap(struct ttm_pool
> > > *pool, dma_addr_t dma_addr,
> > >   }
> > >   
> > >   /* Give pages into a specific pool_type */
> > > -static void ttm_pool_type_give(struct ttm_pool_type *pt, struct
> > > page
> > > *p)
> > > +static void ttm_pool_type_give(struct ttm_pool_type *pt, struct
> > > page
> > > *p,
> > > +			       bool cleared)
> > >   {
> > >   	unsigned int i, num_pages = 1 << pt->order;
> > >   
> > > -	for (i = 0; i < num_pages; ++i) {
> > > -		if (PageHighMem(p))
> > > -			clear_highpage(p + i);
> > > -		else
> > > -			clear_page(page_address(p + i));
> > > +	if (!cleared) {
> > > +		for (i = 0; i < num_pages; ++i) {
> > > +			if (PageHighMem(p))
> > > +				clear_highpage(p + i);
> > > +			else
> > > +				clear_page(page_address(p + i));
> > > +		}
> > >   	}
> > >   
> > >   	spin_lock(&pt->lock);
> > > @@ -394,6 +397,7 @@ static void ttm_pool_free_range(struct
> > > ttm_pool
> > > *pool, struct ttm_tt *tt,
> > >   				pgoff_t start_page, pgoff_t
> > > end_page)
> > >   {
> > >   	struct page **pages = &tt->pages[start_page];
> > > +	bool cleared = tt->page_flags &
> > > TTM_TT_FLAG_CLEARED_ON_FREE;
> > >   	unsigned int order;
> > >   	pgoff_t i, nr;
> > >   
> > > @@ -407,7 +411,7 @@ static void ttm_pool_free_range(struct
> > > ttm_pool
> > > *pool, struct ttm_tt *tt,
> > >   
> > >   		pt = ttm_pool_select_type(pool, caching, order);
> > >   		if (pt)
> > > -			ttm_pool_type_give(pt, *pages);
> > > +			ttm_pool_type_give(pt, *pages, cleared);
> > >   		else
> > >   			ttm_pool_free_page(pool, caching, order,
> > > *pages);
> > >   	}
> > > diff --git a/include/drm/ttm/ttm_tt.h b/include/drm/ttm/ttm_tt.h
> > > index 2b9d856ff388..cfaf49de2419 100644
> > > --- a/include/drm/ttm/ttm_tt.h
> > > +++ b/include/drm/ttm/ttm_tt.h
> > > @@ -85,6 +85,9 @@ struct ttm_tt {
> > >   	 * fault handling abuses the DMA api a bit and
> > > dma_map_attrs
> > > can't be
> > >   	 * used to assure pgprot always matches.
> > >   	 *
> > > +	 * TTM_TT_FLAG_CLEARED_ON_FREE: Set this if a drm driver
> > > handles
> > > +	 * clearing backing store
> > > +	 *
> > >   	 * TTM_TT_FLAG_PRIV_POPULATED: TTM internal only. DO NOT
> > > USE. This is
> > >   	 * set by TTM after ttm_tt_populate() has successfully
> > > returned, and is
> > >   	 * then unset when TTM calls ttm_tt_unpopulate().
> > > @@ -94,8 +97,9 @@ struct ttm_tt {
> > >   #define TTM_TT_FLAG_EXTERNAL		BIT(2)
> > >   #define TTM_TT_FLAG_EXTERNAL_MAPPABLE	BIT(3)
> > >   #define TTM_TT_FLAG_DECRYPTED		BIT(4)
> > > +#define TTM_TT_FLAG_CLEARED_ON_FREE	BIT(5)
> > >   
> > > -#define TTM_TT_FLAG_PRIV_POPULATED	BIT(5)
> > > +#define TTM_TT_FLAG_PRIV_POPULATED	BIT(6)
> > >   	uint32_t page_flags;
> > >   	/** @num_pages: Number of pages in the page array. */
> > >   	uint32_t num_pages;
>
Christian König Aug. 20, 2024, 3:47 p.m. UTC | #5
Am 20.08.24 um 17:45 schrieb Thomas Hellström:
> On Tue, 2024-08-20 at 17:30 +0200, Christian König wrote:
>> Am 20.08.24 um 15:33 schrieb Thomas Hellström:
>>> Hi, Nirmoy, Christian
>>>
>>> On Fri, 2024-08-16 at 15:51 +0200, Nirmoy Das wrote:
>>>> Add TTM_TT_FLAG_CLEARED_ON_FREE, which DRM drivers can set before
>>>> releasing backing stores if they want to skip clear-on-free.
>>>>
>>>> Cc: Matthew Auld <matthew.auld@intel.com>
>>>> Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
>>>> Suggested-by: Christian König <christian.koenig@amd.com>
>>>> Signed-off-by: Nirmoy Das <nirmoy.das@intel.com>
>>>> Reviewed-by: Christian König <christian.koenig@amd.com>
>>> What happens if two devices share the same global TTM pool
>>>    type and one that does its own clearing. Wouldn't there be a
>>> pretty
>>> high chance that the the device that doesn't clear its own pages
>>> allocate non-cleared memory from the pool?
>> That's completely unproblematic. The flag indicates that the released
>> pages are already cleared, if that isn't the case then the flag
>> shouldn't be set on the TT object.
> Yeah, this patch is OK, but the way the follow-up xe patch uses it is
> problematic since, AFAICT, xe dma clears on alloc, meaning the pool
> pages are not cleared after use.

Yeah that is clearly invalid behavior.

Regards,
Christian.

>
> /Thomas
>
>> If one device clear it's pages and another device doesn't clear it's
>> pages then we would just clear the pages of the device which doesn't
>> do
>> it with a hardware DMA.
>>
>> Regards,
>> Christian.
>>
>>> /Thomas
>>>
>>>> ---
>>>>    drivers/gpu/drm/ttm/ttm_pool.c | 18 +++++++++++-------
>>>>    include/drm/ttm/ttm_tt.h       |  6 +++++-
>>>>    2 files changed, 16 insertions(+), 8 deletions(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/ttm/ttm_pool.c
>>>> b/drivers/gpu/drm/ttm/ttm_pool.c
>>>> index 8504dbe19c1a..935ab3cfd046 100644
>>>> --- a/drivers/gpu/drm/ttm/ttm_pool.c
>>>> +++ b/drivers/gpu/drm/ttm/ttm_pool.c
>>>> @@ -222,15 +222,18 @@ static void ttm_pool_unmap(struct ttm_pool
>>>> *pool, dma_addr_t dma_addr,
>>>>    }
>>>>    
>>>>    /* Give pages into a specific pool_type */
>>>> -static void ttm_pool_type_give(struct ttm_pool_type *pt, struct
>>>> page
>>>> *p)
>>>> +static void ttm_pool_type_give(struct ttm_pool_type *pt, struct
>>>> page
>>>> *p,
>>>> +			       bool cleared)
>>>>    {
>>>>    	unsigned int i, num_pages = 1 << pt->order;
>>>>    
>>>> -	for (i = 0; i < num_pages; ++i) {
>>>> -		if (PageHighMem(p))
>>>> -			clear_highpage(p + i);
>>>> -		else
>>>> -			clear_page(page_address(p + i));
>>>> +	if (!cleared) {
>>>> +		for (i = 0; i < num_pages; ++i) {
>>>> +			if (PageHighMem(p))
>>>> +				clear_highpage(p + i);
>>>> +			else
>>>> +				clear_page(page_address(p + i));
>>>> +		}
>>>>    	}
>>>>    
>>>>    	spin_lock(&pt->lock);
>>>> @@ -394,6 +397,7 @@ static void ttm_pool_free_range(struct
>>>> ttm_pool
>>>> *pool, struct ttm_tt *tt,
>>>>    				pgoff_t start_page, pgoff_t
>>>> end_page)
>>>>    {
>>>>    	struct page **pages = &tt->pages[start_page];
>>>> +	bool cleared = tt->page_flags &
>>>> TTM_TT_FLAG_CLEARED_ON_FREE;
>>>>    	unsigned int order;
>>>>    	pgoff_t i, nr;
>>>>    
>>>> @@ -407,7 +411,7 @@ static void ttm_pool_free_range(struct
>>>> ttm_pool
>>>> *pool, struct ttm_tt *tt,
>>>>    
>>>>    		pt = ttm_pool_select_type(pool, caching, order);
>>>>    		if (pt)
>>>> -			ttm_pool_type_give(pt, *pages);
>>>> +			ttm_pool_type_give(pt, *pages, cleared);
>>>>    		else
>>>>    			ttm_pool_free_page(pool, caching, order,
>>>> *pages);
>>>>    	}
>>>> diff --git a/include/drm/ttm/ttm_tt.h b/include/drm/ttm/ttm_tt.h
>>>> index 2b9d856ff388..cfaf49de2419 100644
>>>> --- a/include/drm/ttm/ttm_tt.h
>>>> +++ b/include/drm/ttm/ttm_tt.h
>>>> @@ -85,6 +85,9 @@ struct ttm_tt {
>>>>    	 * fault handling abuses the DMA api a bit and
>>>> dma_map_attrs
>>>> can't be
>>>>    	 * used to assure pgprot always matches.
>>>>    	 *
>>>> +	 * TTM_TT_FLAG_CLEARED_ON_FREE: Set this if a drm driver
>>>> handles
>>>> +	 * clearing backing store
>>>> +	 *
>>>>    	 * TTM_TT_FLAG_PRIV_POPULATED: TTM internal only. DO NOT
>>>> USE. This is
>>>>    	 * set by TTM after ttm_tt_populate() has successfully
>>>> returned, and is
>>>>    	 * then unset when TTM calls ttm_tt_unpopulate().
>>>> @@ -94,8 +97,9 @@ struct ttm_tt {
>>>>    #define TTM_TT_FLAG_EXTERNAL		BIT(2)
>>>>    #define TTM_TT_FLAG_EXTERNAL_MAPPABLE	BIT(3)
>>>>    #define TTM_TT_FLAG_DECRYPTED		BIT(4)
>>>> +#define TTM_TT_FLAG_CLEARED_ON_FREE	BIT(5)
>>>>    
>>>> -#define TTM_TT_FLAG_PRIV_POPULATED	BIT(5)
>>>> +#define TTM_TT_FLAG_PRIV_POPULATED	BIT(6)
>>>>    	uint32_t page_flags;
>>>>    	/** @num_pages: Number of pages in the page array. */
>>>>    	uint32_t num_pages;
Nirmoy Das Aug. 20, 2024, 4:46 p.m. UTC | #6
Hi Thomas, Christian,

On 8/20/2024 5:47 PM, Christian König wrote:
> Am 20.08.24 um 17:45 schrieb Thomas Hellström:
>> On Tue, 2024-08-20 at 17:30 +0200, Christian König wrote:
>>> Am 20.08.24 um 15:33 schrieb Thomas Hellström:
>>>> Hi, Nirmoy, Christian
>>>>
>>>> On Fri, 2024-08-16 at 15:51 +0200, Nirmoy Das wrote:
>>>>> Add TTM_TT_FLAG_CLEARED_ON_FREE, which DRM drivers can set before
>>>>> releasing backing stores if they want to skip clear-on-free.
>>>>>
>>>>> Cc: Matthew Auld <matthew.auld@intel.com>
>>>>> Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
>>>>> Suggested-by: Christian König <christian.koenig@amd.com>
>>>>> Signed-off-by: Nirmoy Das <nirmoy.das@intel.com>
>>>>> Reviewed-by: Christian König <christian.koenig@amd.com>
>>>> What happens if two devices share the same global TTM pool
>>>>    type and one that does its own clearing. Wouldn't there be a
>>>> pretty
>>>> high chance that the the device that doesn't clear its own pages
>>>> allocate non-cleared memory from the pool?
>>> That's completely unproblematic. The flag indicates that the released
>>> pages are already cleared, if that isn't the case then the flag
>>> shouldn't be set on the TT object.
>> Yeah, this patch is OK, but the way the follow-up xe patch uses it is
>> problematic since, AFAICT, xe dma clears on alloc, meaning the pool
>> pages are not cleared after use.
>
> Yeah that is clearly invalid behavior.


I was only thinking about one device use-case which won't leak any data 
though I am now miss-using the flag.

If I skip dma clear for pooled BO then this flag  is not really needed. 
Shall I revert the this and usage of TTM_TT_FLAG_CLEARED_ON_FREE

and re-introduce it after I get a working clear on free implementation 
for XE?


Regards,

Nirmoy


>
> Regards,
> Christian.
>
>>
>> /Thomas
>>
>>> If one device clear it's pages and another device doesn't clear it's
>>> pages then we would just clear the pages of the device which doesn't
>>> do
>>> it with a hardware DMA.
>>>
>>> Regards,
>>> Christian.
>>>
>>>> /Thomas
>>>>
>>>>> ---
>>>>>    drivers/gpu/drm/ttm/ttm_pool.c | 18 +++++++++++-------
>>>>>    include/drm/ttm/ttm_tt.h       |  6 +++++-
>>>>>    2 files changed, 16 insertions(+), 8 deletions(-)
>>>>>
>>>>> diff --git a/drivers/gpu/drm/ttm/ttm_pool.c
>>>>> b/drivers/gpu/drm/ttm/ttm_pool.c
>>>>> index 8504dbe19c1a..935ab3cfd046 100644
>>>>> --- a/drivers/gpu/drm/ttm/ttm_pool.c
>>>>> +++ b/drivers/gpu/drm/ttm/ttm_pool.c
>>>>> @@ -222,15 +222,18 @@ static void ttm_pool_unmap(struct ttm_pool
>>>>> *pool, dma_addr_t dma_addr,
>>>>>    }
>>>>>       /* Give pages into a specific pool_type */
>>>>> -static void ttm_pool_type_give(struct ttm_pool_type *pt, struct
>>>>> page
>>>>> *p)
>>>>> +static void ttm_pool_type_give(struct ttm_pool_type *pt, struct
>>>>> page
>>>>> *p,
>>>>> +                   bool cleared)
>>>>>    {
>>>>>        unsigned int i, num_pages = 1 << pt->order;
>>>>>    -    for (i = 0; i < num_pages; ++i) {
>>>>> -        if (PageHighMem(p))
>>>>> -            clear_highpage(p + i);
>>>>> -        else
>>>>> -            clear_page(page_address(p + i));
>>>>> +    if (!cleared) {
>>>>> +        for (i = 0; i < num_pages; ++i) {
>>>>> +            if (PageHighMem(p))
>>>>> +                clear_highpage(p + i);
>>>>> +            else
>>>>> +                clear_page(page_address(p + i));
>>>>> +        }
>>>>>        }
>>>>>           spin_lock(&pt->lock);
>>>>> @@ -394,6 +397,7 @@ static void ttm_pool_free_range(struct
>>>>> ttm_pool
>>>>> *pool, struct ttm_tt *tt,
>>>>>                    pgoff_t start_page, pgoff_t
>>>>> end_page)
>>>>>    {
>>>>>        struct page **pages = &tt->pages[start_page];
>>>>> +    bool cleared = tt->page_flags &
>>>>> TTM_TT_FLAG_CLEARED_ON_FREE;
>>>>>        unsigned int order;
>>>>>        pgoff_t i, nr;
>>>>>    @@ -407,7 +411,7 @@ static void ttm_pool_free_range(struct
>>>>> ttm_pool
>>>>> *pool, struct ttm_tt *tt,
>>>>>               pt = ttm_pool_select_type(pool, caching, order);
>>>>>            if (pt)
>>>>> -            ttm_pool_type_give(pt, *pages);
>>>>> +            ttm_pool_type_give(pt, *pages, cleared);
>>>>>            else
>>>>>                ttm_pool_free_page(pool, caching, order,
>>>>> *pages);
>>>>>        }
>>>>> diff --git a/include/drm/ttm/ttm_tt.h b/include/drm/ttm/ttm_tt.h
>>>>> index 2b9d856ff388..cfaf49de2419 100644
>>>>> --- a/include/drm/ttm/ttm_tt.h
>>>>> +++ b/include/drm/ttm/ttm_tt.h
>>>>> @@ -85,6 +85,9 @@ struct ttm_tt {
>>>>>         * fault handling abuses the DMA api a bit and
>>>>> dma_map_attrs
>>>>> can't be
>>>>>         * used to assure pgprot always matches.
>>>>>         *
>>>>> +     * TTM_TT_FLAG_CLEARED_ON_FREE: Set this if a drm driver
>>>>> handles
>>>>> +     * clearing backing store
>>>>> +     *
>>>>>         * TTM_TT_FLAG_PRIV_POPULATED: TTM internal only. DO NOT
>>>>> USE. This is
>>>>>         * set by TTM after ttm_tt_populate() has successfully
>>>>> returned, and is
>>>>>         * then unset when TTM calls ttm_tt_unpopulate().
>>>>> @@ -94,8 +97,9 @@ struct ttm_tt {
>>>>>    #define TTM_TT_FLAG_EXTERNAL        BIT(2)
>>>>>    #define TTM_TT_FLAG_EXTERNAL_MAPPABLE    BIT(3)
>>>>>    #define TTM_TT_FLAG_DECRYPTED        BIT(4)
>>>>> +#define TTM_TT_FLAG_CLEARED_ON_FREE    BIT(5)
>>>>>    -#define TTM_TT_FLAG_PRIV_POPULATED    BIT(5)
>>>>> +#define TTM_TT_FLAG_PRIV_POPULATED    BIT(6)
>>>>>        uint32_t page_flags;
>>>>>        /** @num_pages: Number of pages in the page array. */
>>>>>        uint32_t num_pages;
>
Christian König Aug. 21, 2024, 7:47 a.m. UTC | #7
Am 20.08.24 um 18:46 schrieb Nirmoy Das:
> Hi Thomas, Christian,
>
> On 8/20/2024 5:47 PM, Christian König wrote:
>> Am 20.08.24 um 17:45 schrieb Thomas Hellström:
>>> On Tue, 2024-08-20 at 17:30 +0200, Christian König wrote:
>>>> Am 20.08.24 um 15:33 schrieb Thomas Hellström:
>>>>> Hi, Nirmoy, Christian
>>>>>
>>>>> On Fri, 2024-08-16 at 15:51 +0200, Nirmoy Das wrote:
>>>>>> Add TTM_TT_FLAG_CLEARED_ON_FREE, which DRM drivers can set before
>>>>>> releasing backing stores if they want to skip clear-on-free.
>>>>>>
>>>>>> Cc: Matthew Auld <matthew.auld@intel.com>
>>>>>> Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
>>>>>> Suggested-by: Christian König <christian.koenig@amd.com>
>>>>>> Signed-off-by: Nirmoy Das <nirmoy.das@intel.com>
>>>>>> Reviewed-by: Christian König <christian.koenig@amd.com>
>>>>> What happens if two devices share the same global TTM pool
>>>>>    type and one that does its own clearing. Wouldn't there be a
>>>>> pretty
>>>>> high chance that the the device that doesn't clear its own pages
>>>>> allocate non-cleared memory from the pool?
>>>> That's completely unproblematic. The flag indicates that the released
>>>> pages are already cleared, if that isn't the case then the flag
>>>> shouldn't be set on the TT object.
>>> Yeah, this patch is OK, but the way the follow-up xe patch uses it is
>>> problematic since, AFAICT, xe dma clears on alloc, meaning the pool
>>> pages are not cleared after use.
>>
>> Yeah that is clearly invalid behavior.
>
>
> I was only thinking about one device use-case which won't leak any 
> data though I am now miss-using the flag.
>
> If I skip dma clear for pooled BO then this flag  is not really 
> needed. Shall I revert the this and usage of TTM_TT_FLAG_CLEARED_ON_FREE
>
> and re-introduce it after I get a working clear on free implementation 
> for XE?

Yes absolutely.

I though that I made it clear that the handling should be that the 
driver clears the pages and *then* sets the TTM_TT_FLAG_CLEARED_ON_FREE 
flag.

So if you don't have the handling implemented like that then that's 
clearly invalid behavior.

Regards,
Christian.

>
>
> Regards,
>
> Nirmoy
>
>
>>
>> Regards,
>> Christian.
>>
>>>
>>> /Thomas
>>>
>>>> If one device clear it's pages and another device doesn't clear it's
>>>> pages then we would just clear the pages of the device which doesn't
>>>> do
>>>> it with a hardware DMA.
>>>>
>>>> Regards,
>>>> Christian.
>>>>
>>>>> /Thomas
>>>>>
>>>>>> ---
>>>>>>    drivers/gpu/drm/ttm/ttm_pool.c | 18 +++++++++++-------
>>>>>>    include/drm/ttm/ttm_tt.h       |  6 +++++-
>>>>>>    2 files changed, 16 insertions(+), 8 deletions(-)
>>>>>>
>>>>>> diff --git a/drivers/gpu/drm/ttm/ttm_pool.c
>>>>>> b/drivers/gpu/drm/ttm/ttm_pool.c
>>>>>> index 8504dbe19c1a..935ab3cfd046 100644
>>>>>> --- a/drivers/gpu/drm/ttm/ttm_pool.c
>>>>>> +++ b/drivers/gpu/drm/ttm/ttm_pool.c
>>>>>> @@ -222,15 +222,18 @@ static void ttm_pool_unmap(struct ttm_pool
>>>>>> *pool, dma_addr_t dma_addr,
>>>>>>    }
>>>>>>       /* Give pages into a specific pool_type */
>>>>>> -static void ttm_pool_type_give(struct ttm_pool_type *pt, struct
>>>>>> page
>>>>>> *p)
>>>>>> +static void ttm_pool_type_give(struct ttm_pool_type *pt, struct
>>>>>> page
>>>>>> *p,
>>>>>> +                   bool cleared)
>>>>>>    {
>>>>>>        unsigned int i, num_pages = 1 << pt->order;
>>>>>>    -    for (i = 0; i < num_pages; ++i) {
>>>>>> -        if (PageHighMem(p))
>>>>>> -            clear_highpage(p + i);
>>>>>> -        else
>>>>>> -            clear_page(page_address(p + i));
>>>>>> +    if (!cleared) {
>>>>>> +        for (i = 0; i < num_pages; ++i) {
>>>>>> +            if (PageHighMem(p))
>>>>>> +                clear_highpage(p + i);
>>>>>> +            else
>>>>>> +                clear_page(page_address(p + i));
>>>>>> +        }
>>>>>>        }
>>>>>>           spin_lock(&pt->lock);
>>>>>> @@ -394,6 +397,7 @@ static void ttm_pool_free_range(struct
>>>>>> ttm_pool
>>>>>> *pool, struct ttm_tt *tt,
>>>>>>                    pgoff_t start_page, pgoff_t
>>>>>> end_page)
>>>>>>    {
>>>>>>        struct page **pages = &tt->pages[start_page];
>>>>>> +    bool cleared = tt->page_flags &
>>>>>> TTM_TT_FLAG_CLEARED_ON_FREE;
>>>>>>        unsigned int order;
>>>>>>        pgoff_t i, nr;
>>>>>>    @@ -407,7 +411,7 @@ static void ttm_pool_free_range(struct
>>>>>> ttm_pool
>>>>>> *pool, struct ttm_tt *tt,
>>>>>>               pt = ttm_pool_select_type(pool, caching, order);
>>>>>>            if (pt)
>>>>>> -            ttm_pool_type_give(pt, *pages);
>>>>>> +            ttm_pool_type_give(pt, *pages, cleared);
>>>>>>            else
>>>>>>                ttm_pool_free_page(pool, caching, order,
>>>>>> *pages);
>>>>>>        }
>>>>>> diff --git a/include/drm/ttm/ttm_tt.h b/include/drm/ttm/ttm_tt.h
>>>>>> index 2b9d856ff388..cfaf49de2419 100644
>>>>>> --- a/include/drm/ttm/ttm_tt.h
>>>>>> +++ b/include/drm/ttm/ttm_tt.h
>>>>>> @@ -85,6 +85,9 @@ struct ttm_tt {
>>>>>>         * fault handling abuses the DMA api a bit and
>>>>>> dma_map_attrs
>>>>>> can't be
>>>>>>         * used to assure pgprot always matches.
>>>>>>         *
>>>>>> +     * TTM_TT_FLAG_CLEARED_ON_FREE: Set this if a drm driver
>>>>>> handles
>>>>>> +     * clearing backing store
>>>>>> +     *
>>>>>>         * TTM_TT_FLAG_PRIV_POPULATED: TTM internal only. DO NOT
>>>>>> USE. This is
>>>>>>         * set by TTM after ttm_tt_populate() has successfully
>>>>>> returned, and is
>>>>>>         * then unset when TTM calls ttm_tt_unpopulate().
>>>>>> @@ -94,8 +97,9 @@ struct ttm_tt {
>>>>>>    #define TTM_TT_FLAG_EXTERNAL        BIT(2)
>>>>>>    #define TTM_TT_FLAG_EXTERNAL_MAPPABLE    BIT(3)
>>>>>>    #define TTM_TT_FLAG_DECRYPTED        BIT(4)
>>>>>> +#define TTM_TT_FLAG_CLEARED_ON_FREE    BIT(5)
>>>>>>    -#define TTM_TT_FLAG_PRIV_POPULATED    BIT(5)
>>>>>> +#define TTM_TT_FLAG_PRIV_POPULATED    BIT(6)
>>>>>>        uint32_t page_flags;
>>>>>>        /** @num_pages: Number of pages in the page array. */
>>>>>>        uint32_t num_pages;
>>
Thomas Hellstrom Aug. 21, 2024, 8:08 a.m. UTC | #8
On Wed, 2024-08-21 at 09:47 +0200, Christian König wrote:
> Am 20.08.24 um 18:46 schrieb Nirmoy Das:
> > Hi Thomas, Christian,
> > 
> > On 8/20/2024 5:47 PM, Christian König wrote:
> > > Am 20.08.24 um 17:45 schrieb Thomas Hellström:
> > > > On Tue, 2024-08-20 at 17:30 +0200, Christian König wrote:
> > > > > Am 20.08.24 um 15:33 schrieb Thomas Hellström:
> > > > > > Hi, Nirmoy, Christian
> > > > > > 
> > > > > > On Fri, 2024-08-16 at 15:51 +0200, Nirmoy Das wrote:
> > > > > > > Add TTM_TT_FLAG_CLEARED_ON_FREE, which DRM drivers can
> > > > > > > set before
> > > > > > > releasing backing stores if they want to skip clear-on-
> > > > > > > free.
> > > > > > > 
> > > > > > > Cc: Matthew Auld <matthew.auld@intel.com>
> > > > > > > Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
> > > > > > > Suggested-by: Christian König <christian.koenig@amd.com>
> > > > > > > Signed-off-by: Nirmoy Das <nirmoy.das@intel.com>
> > > > > > > Reviewed-by: Christian König <christian.koenig@amd.com>
> > > > > > What happens if two devices share the same global TTM pool
> > > > > >    type and one that does its own clearing. Wouldn't there
> > > > > > be a
> > > > > > pretty
> > > > > > high chance that the the device that doesn't clear its own
> > > > > > pages
> > > > > > allocate non-cleared memory from the pool?
> > > > > That's completely unproblematic. The flag indicates that the
> > > > > released
> > > > > pages are already cleared, if that isn't the case then the
> > > > > flag
> > > > > shouldn't be set on the TT object.
> > > > Yeah, this patch is OK, but the way the follow-up xe patch uses
> > > > it is
> > > > problematic since, AFAICT, xe dma clears on alloc, meaning the
> > > > pool
> > > > pages are not cleared after use.
> > > 
> > > Yeah that is clearly invalid behavior.
> > 
> > 
> > I was only thinking about one device use-case which won't leak any 
> > data though I am now miss-using the flag.
> > 
> > If I skip dma clear for pooled BO then this flag  is not really 
> > needed. Shall I revert the this and usage of
> > TTM_TT_FLAG_CLEARED_ON_FREE
> > 
> > and re-introduce it after I get a working clear on free
> > implementation 
> > for XE?
> 
> Yes absolutely.
> 
> I though that I made it clear that the handling should be that the 
> driver clears the pages and *then* sets the
> TTM_TT_FLAG_CLEARED_ON_FREE 
> flag.
> 
> So if you don't have the handling implemented like that then that's 
> clearly invalid behavior.
> 
> Regards,
> Christian.

I agree.
Revert and re-introduce as needed, and obtain an ack from Christian to
merge through drm-xe-next before re-introduction so that it doesn't
clash with anything planned elsewhere.

Thanks,
Thomas



> 
> > 
> > 
> > Regards,
> > 
> > Nirmoy
> > 
> > 
> > > 
> > > Regards,
> > > Christian.
> > > 
> > > > 
> > > > /Thomas
> > > > 
> > > > > If one device clear it's pages and another device doesn't
> > > > > clear it's
> > > > > pages then we would just clear the pages of the device which
> > > > > doesn't
> > > > > do
> > > > > it with a hardware DMA.
> > > > > 
> > > > > Regards,
> > > > > Christian.
> > > > > 
> > > > > > /Thomas
> > > > > > 
> > > > > > > ---
> > > > > > >    drivers/gpu/drm/ttm/ttm_pool.c | 18 +++++++++++-------
> > > > > > >    include/drm/ttm/ttm_tt.h       |  6 +++++-
> > > > > > >    2 files changed, 16 insertions(+), 8 deletions(-)
> > > > > > > 
> > > > > > > diff --git a/drivers/gpu/drm/ttm/ttm_pool.c
> > > > > > > b/drivers/gpu/drm/ttm/ttm_pool.c
> > > > > > > index 8504dbe19c1a..935ab3cfd046 100644
> > > > > > > --- a/drivers/gpu/drm/ttm/ttm_pool.c
> > > > > > > +++ b/drivers/gpu/drm/ttm/ttm_pool.c
> > > > > > > @@ -222,15 +222,18 @@ static void ttm_pool_unmap(struct
> > > > > > > ttm_pool
> > > > > > > *pool, dma_addr_t dma_addr,
> > > > > > >    }
> > > > > > >       /* Give pages into a specific pool_type */
> > > > > > > -static void ttm_pool_type_give(struct ttm_pool_type *pt,
> > > > > > > struct
> > > > > > > page
> > > > > > > *p)
> > > > > > > +static void ttm_pool_type_give(struct ttm_pool_type *pt,
> > > > > > > struct
> > > > > > > page
> > > > > > > *p,
> > > > > > > +                   bool cleared)
> > > > > > >    {
> > > > > > >        unsigned int i, num_pages = 1 << pt->order;
> > > > > > >    -    for (i = 0; i < num_pages; ++i) {
> > > > > > > -        if (PageHighMem(p))
> > > > > > > -            clear_highpage(p + i);
> > > > > > > -        else
> > > > > > > -            clear_page(page_address(p + i));
> > > > > > > +    if (!cleared) {
> > > > > > > +        for (i = 0; i < num_pages; ++i) {
> > > > > > > +            if (PageHighMem(p))
> > > > > > > +                clear_highpage(p + i);
> > > > > > > +            else
> > > > > > > +                clear_page(page_address(p + i));
> > > > > > > +        }
> > > > > > >        }
> > > > > > >           spin_lock(&pt->lock);
> > > > > > > @@ -394,6 +397,7 @@ static void
> > > > > > > ttm_pool_free_range(struct
> > > > > > > ttm_pool
> > > > > > > *pool, struct ttm_tt *tt,
> > > > > > >                    pgoff_t start_page, pgoff_t
> > > > > > > end_page)
> > > > > > >    {
> > > > > > >        struct page **pages = &tt->pages[start_page];
> > > > > > > +    bool cleared = tt->page_flags &
> > > > > > > TTM_TT_FLAG_CLEARED_ON_FREE;
> > > > > > >        unsigned int order;
> > > > > > >        pgoff_t i, nr;
> > > > > > >    @@ -407,7 +411,7 @@ static void
> > > > > > > ttm_pool_free_range(struct
> > > > > > > ttm_pool
> > > > > > > *pool, struct ttm_tt *tt,
> > > > > > >               pt = ttm_pool_select_type(pool, caching,
> > > > > > > order);
> > > > > > >            if (pt)
> > > > > > > -            ttm_pool_type_give(pt, *pages);
> > > > > > > +            ttm_pool_type_give(pt, *pages, cleared);
> > > > > > >            else
> > > > > > >                ttm_pool_free_page(pool, caching, order,
> > > > > > > *pages);
> > > > > > >        }
> > > > > > > diff --git a/include/drm/ttm/ttm_tt.h
> > > > > > > b/include/drm/ttm/ttm_tt.h
> > > > > > > index 2b9d856ff388..cfaf49de2419 100644
> > > > > > > --- a/include/drm/ttm/ttm_tt.h
> > > > > > > +++ b/include/drm/ttm/ttm_tt.h
> > > > > > > @@ -85,6 +85,9 @@ struct ttm_tt {
> > > > > > >         * fault handling abuses the DMA api a bit and
> > > > > > > dma_map_attrs
> > > > > > > can't be
> > > > > > >         * used to assure pgprot always matches.
> > > > > > >         *
> > > > > > > +     * TTM_TT_FLAG_CLEARED_ON_FREE: Set this if a drm
> > > > > > > driver
> > > > > > > handles
> > > > > > > +     * clearing backing store
> > > > > > > +     *
> > > > > > >         * TTM_TT_FLAG_PRIV_POPULATED: TTM internal only.
> > > > > > > DO NOT
> > > > > > > USE. This is
> > > > > > >         * set by TTM after ttm_tt_populate() has
> > > > > > > successfully
> > > > > > > returned, and is
> > > > > > >         * then unset when TTM calls ttm_tt_unpopulate().
> > > > > > > @@ -94,8 +97,9 @@ struct ttm_tt {
> > > > > > >    #define TTM_TT_FLAG_EXTERNAL        BIT(2)
> > > > > > >    #define TTM_TT_FLAG_EXTERNAL_MAPPABLE    BIT(3)
> > > > > > >    #define TTM_TT_FLAG_DECRYPTED        BIT(4)
> > > > > > > +#define TTM_TT_FLAG_CLEARED_ON_FREE    BIT(5)
> > > > > > >    -#define TTM_TT_FLAG_PRIV_POPULATED    BIT(5)
> > > > > > > +#define TTM_TT_FLAG_PRIV_POPULATED    BIT(6)
> > > > > > >        uint32_t page_flags;
> > > > > > >        /** @num_pages: Number of pages in the page array.
> > > > > > > */
> > > > > > >        uint32_t num_pages;
> > > 
>
Nirmoy Das Aug. 21, 2024, 10:22 a.m. UTC | #9
On 8/21/2024 10:08 AM, Thomas Hellström wrote:
> On Wed, 2024-08-21 at 09:47 +0200, Christian König wrote:
>> Am 20.08.24 um 18:46 schrieb Nirmoy Das:
>>> Hi Thomas, Christian,
>>>
>>> On 8/20/2024 5:47 PM, Christian König wrote:
>>>> Am 20.08.24 um 17:45 schrieb Thomas Hellström:
>>>>> On Tue, 2024-08-20 at 17:30 +0200, Christian König wrote:
>>>>>> Am 20.08.24 um 15:33 schrieb Thomas Hellström:
>>>>>>> Hi, Nirmoy, Christian
>>>>>>>
>>>>>>> On Fri, 2024-08-16 at 15:51 +0200, Nirmoy Das wrote:
>>>>>>>> Add TTM_TT_FLAG_CLEARED_ON_FREE, which DRM drivers can
>>>>>>>> set before
>>>>>>>> releasing backing stores if they want to skip clear-on-
>>>>>>>> free.
>>>>>>>>
>>>>>>>> Cc: Matthew Auld<matthew.auld@intel.com>
>>>>>>>> Cc: Thomas Hellström<thomas.hellstrom@linux.intel.com>
>>>>>>>> Suggested-by: Christian König<christian.koenig@amd.com>
>>>>>>>> Signed-off-by: Nirmoy Das<nirmoy.das@intel.com>
>>>>>>>> Reviewed-by: Christian König<christian.koenig@amd.com>
>>>>>>> What happens if two devices share the same global TTM pool
>>>>>>>     type and one that does its own clearing. Wouldn't there
>>>>>>> be a
>>>>>>> pretty
>>>>>>> high chance that the the device that doesn't clear its own
>>>>>>> pages
>>>>>>> allocate non-cleared memory from the pool?
>>>>>> That's completely unproblematic. The flag indicates that the
>>>>>> released
>>>>>> pages are already cleared, if that isn't the case then the
>>>>>> flag
>>>>>> shouldn't be set on the TT object.
>>>>> Yeah, this patch is OK, but the way the follow-up xe patch uses
>>>>> it is
>>>>> problematic since, AFAICT, xe dma clears on alloc, meaning the
>>>>> pool
>>>>> pages are not cleared after use.
>>>> Yeah that is clearly invalid behavior.
>>>
>>> I was only thinking about one device use-case which won't leak any
>>> data though I am now miss-using the flag.
>>>
>>> If I skip dma clear for pooled BO then this flag  is not really
>>> needed. Shall I revert the this and usage of
>>> TTM_TT_FLAG_CLEARED_ON_FREE
>>>
>>> and re-introduce it after I get a working clear on free
>>> implementation
>>> for XE?
>> Yes absolutely.
>>
>> I though that I made it clear that the handling should be that the
>> driver clears the pages and *then* sets the
>> TTM_TT_FLAG_CLEARED_ON_FREE
>> flag.
>>
>> So if you don't have the handling implemented like that then that's
>> clearly invalid behavior.
>>
>> Regards,
>> Christian.
> I agree.
> Revert and re-introduce as needed, and obtain an ack from Christian to
> merge through drm-xe-next before re-introduction so that it doesn't
> clash with anything planned elsewhere.


Sent a series to revert the usages TTM_TT_FLAG_CLEARED_ON_FREE.

Thanks both of you for your time and patience,

Nirmoy

>
> Thanks,
> Thomas
>
>
>
>>>
>>> Regards,
>>>
>>> Nirmoy
>>>
>>>
>>>> Regards,
>>>> Christian.
>>>>
>>>>> /Thomas
>>>>>
>>>>>> If one device clear it's pages and another device doesn't
>>>>>> clear it's
>>>>>> pages then we would just clear the pages of the device which
>>>>>> doesn't
>>>>>> do
>>>>>> it with a hardware DMA.
>>>>>>
>>>>>> Regards,
>>>>>> Christian.
>>>>>>
>>>>>>> /Thomas
>>>>>>>
>>>>>>>> ---
>>>>>>>>     drivers/gpu/drm/ttm/ttm_pool.c | 18 +++++++++++-------
>>>>>>>>     include/drm/ttm/ttm_tt.h       |  6 +++++-
>>>>>>>>     2 files changed, 16 insertions(+), 8 deletions(-)
>>>>>>>>
>>>>>>>> diff --git a/drivers/gpu/drm/ttm/ttm_pool.c
>>>>>>>> b/drivers/gpu/drm/ttm/ttm_pool.c
>>>>>>>> index 8504dbe19c1a..935ab3cfd046 100644
>>>>>>>> --- a/drivers/gpu/drm/ttm/ttm_pool.c
>>>>>>>> +++ b/drivers/gpu/drm/ttm/ttm_pool.c
>>>>>>>> @@ -222,15 +222,18 @@ static void ttm_pool_unmap(struct
>>>>>>>> ttm_pool
>>>>>>>> *pool, dma_addr_t dma_addr,
>>>>>>>>     }
>>>>>>>>        /* Give pages into a specific pool_type */
>>>>>>>> -static void ttm_pool_type_give(struct ttm_pool_type *pt,
>>>>>>>> struct
>>>>>>>> page
>>>>>>>> *p)
>>>>>>>> +static void ttm_pool_type_give(struct ttm_pool_type *pt,
>>>>>>>> struct
>>>>>>>> page
>>>>>>>> *p,
>>>>>>>> +                   bool cleared)
>>>>>>>>     {
>>>>>>>>         unsigned int i, num_pages = 1 << pt->order;
>>>>>>>>     -    for (i = 0; i < num_pages; ++i) {
>>>>>>>> -        if (PageHighMem(p))
>>>>>>>> -            clear_highpage(p + i);
>>>>>>>> -        else
>>>>>>>> -            clear_page(page_address(p + i));
>>>>>>>> +    if (!cleared) {
>>>>>>>> +        for (i = 0; i < num_pages; ++i) {
>>>>>>>> +            if (PageHighMem(p))
>>>>>>>> +                clear_highpage(p + i);
>>>>>>>> +            else
>>>>>>>> +                clear_page(page_address(p + i));
>>>>>>>> +        }
>>>>>>>>         }
>>>>>>>>            spin_lock(&pt->lock);
>>>>>>>> @@ -394,6 +397,7 @@ static void
>>>>>>>> ttm_pool_free_range(struct
>>>>>>>> ttm_pool
>>>>>>>> *pool, struct ttm_tt *tt,
>>>>>>>>                     pgoff_t start_page, pgoff_t
>>>>>>>> end_page)
>>>>>>>>     {
>>>>>>>>         struct page **pages = &tt->pages[start_page];
>>>>>>>> +    bool cleared = tt->page_flags &
>>>>>>>> TTM_TT_FLAG_CLEARED_ON_FREE;
>>>>>>>>         unsigned int order;
>>>>>>>>         pgoff_t i, nr;
>>>>>>>>     @@ -407,7 +411,7 @@ static void
>>>>>>>> ttm_pool_free_range(struct
>>>>>>>> ttm_pool
>>>>>>>> *pool, struct ttm_tt *tt,
>>>>>>>>                pt = ttm_pool_select_type(pool, caching,
>>>>>>>> order);
>>>>>>>>             if (pt)
>>>>>>>> -            ttm_pool_type_give(pt, *pages);
>>>>>>>> +            ttm_pool_type_give(pt, *pages, cleared);
>>>>>>>>             else
>>>>>>>>                 ttm_pool_free_page(pool, caching, order,
>>>>>>>> *pages);
>>>>>>>>         }
>>>>>>>> diff --git a/include/drm/ttm/ttm_tt.h
>>>>>>>> b/include/drm/ttm/ttm_tt.h
>>>>>>>> index 2b9d856ff388..cfaf49de2419 100644
>>>>>>>> --- a/include/drm/ttm/ttm_tt.h
>>>>>>>> +++ b/include/drm/ttm/ttm_tt.h
>>>>>>>> @@ -85,6 +85,9 @@ struct ttm_tt {
>>>>>>>>          * fault handling abuses the DMA api a bit and
>>>>>>>> dma_map_attrs
>>>>>>>> can't be
>>>>>>>>          * used to assure pgprot always matches.
>>>>>>>>          *
>>>>>>>> +     * TTM_TT_FLAG_CLEARED_ON_FREE: Set this if a drm
>>>>>>>> driver
>>>>>>>> handles
>>>>>>>> +     * clearing backing store
>>>>>>>> +     *
>>>>>>>>          * TTM_TT_FLAG_PRIV_POPULATED: TTM internal only.
>>>>>>>> DO NOT
>>>>>>>> USE. This is
>>>>>>>>          * set by TTM after ttm_tt_populate() has
>>>>>>>> successfully
>>>>>>>> returned, and is
>>>>>>>>          * then unset when TTM calls ttm_tt_unpopulate().
>>>>>>>> @@ -94,8 +97,9 @@ struct ttm_tt {
>>>>>>>>     #define TTM_TT_FLAG_EXTERNAL        BIT(2)
>>>>>>>>     #define TTM_TT_FLAG_EXTERNAL_MAPPABLE    BIT(3)
>>>>>>>>     #define TTM_TT_FLAG_DECRYPTED        BIT(4)
>>>>>>>> +#define TTM_TT_FLAG_CLEARED_ON_FREE    BIT(5)
>>>>>>>>     -#define TTM_TT_FLAG_PRIV_POPULATED    BIT(5)
>>>>>>>> +#define TTM_TT_FLAG_PRIV_POPULATED    BIT(6)
>>>>>>>>         uint32_t page_flags;
>>>>>>>>         /** @num_pages: Number of pages in the page array.
>>>>>>>> */
>>>>>>>>         uint32_t num_pages;
diff mbox series

Patch

diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c
index 8504dbe19c1a..935ab3cfd046 100644
--- a/drivers/gpu/drm/ttm/ttm_pool.c
+++ b/drivers/gpu/drm/ttm/ttm_pool.c
@@ -222,15 +222,18 @@  static void ttm_pool_unmap(struct ttm_pool *pool, dma_addr_t dma_addr,
 }
 
 /* Give pages into a specific pool_type */
-static void ttm_pool_type_give(struct ttm_pool_type *pt, struct page *p)
+static void ttm_pool_type_give(struct ttm_pool_type *pt, struct page *p,
+			       bool cleared)
 {
 	unsigned int i, num_pages = 1 << pt->order;
 
-	for (i = 0; i < num_pages; ++i) {
-		if (PageHighMem(p))
-			clear_highpage(p + i);
-		else
-			clear_page(page_address(p + i));
+	if (!cleared) {
+		for (i = 0; i < num_pages; ++i) {
+			if (PageHighMem(p))
+				clear_highpage(p + i);
+			else
+				clear_page(page_address(p + i));
+		}
 	}
 
 	spin_lock(&pt->lock);
@@ -394,6 +397,7 @@  static void ttm_pool_free_range(struct ttm_pool *pool, struct ttm_tt *tt,
 				pgoff_t start_page, pgoff_t end_page)
 {
 	struct page **pages = &tt->pages[start_page];
+	bool cleared = tt->page_flags & TTM_TT_FLAG_CLEARED_ON_FREE;
 	unsigned int order;
 	pgoff_t i, nr;
 
@@ -407,7 +411,7 @@  static void ttm_pool_free_range(struct ttm_pool *pool, struct ttm_tt *tt,
 
 		pt = ttm_pool_select_type(pool, caching, order);
 		if (pt)
-			ttm_pool_type_give(pt, *pages);
+			ttm_pool_type_give(pt, *pages, cleared);
 		else
 			ttm_pool_free_page(pool, caching, order, *pages);
 	}
diff --git a/include/drm/ttm/ttm_tt.h b/include/drm/ttm/ttm_tt.h
index 2b9d856ff388..cfaf49de2419 100644
--- a/include/drm/ttm/ttm_tt.h
+++ b/include/drm/ttm/ttm_tt.h
@@ -85,6 +85,9 @@  struct ttm_tt {
 	 * fault handling abuses the DMA api a bit and dma_map_attrs can't be
 	 * used to assure pgprot always matches.
 	 *
+	 * TTM_TT_FLAG_CLEARED_ON_FREE: Set this if a drm driver handles
+	 * clearing backing store
+	 *
 	 * TTM_TT_FLAG_PRIV_POPULATED: TTM internal only. DO NOT USE. This is
 	 * set by TTM after ttm_tt_populate() has successfully returned, and is
 	 * then unset when TTM calls ttm_tt_unpopulate().
@@ -94,8 +97,9 @@  struct ttm_tt {
 #define TTM_TT_FLAG_EXTERNAL		BIT(2)
 #define TTM_TT_FLAG_EXTERNAL_MAPPABLE	BIT(3)
 #define TTM_TT_FLAG_DECRYPTED		BIT(4)
+#define TTM_TT_FLAG_CLEARED_ON_FREE	BIT(5)
 
-#define TTM_TT_FLAG_PRIV_POPULATED	BIT(5)
+#define TTM_TT_FLAG_PRIV_POPULATED	BIT(6)
 	uint32_t page_flags;
 	/** @num_pages: Number of pages in the page array. */
 	uint32_t num_pages;