diff mbox series

[2/4] drm/xe: add bind time pat index to xe_bo structure

Message ID 20240130193652.374270-3-juhapekka.heikkila@gmail.com (mailing list archive)
State New, archived
Headers show
Series Enable ccs compressed framebuffers on Xe2 | expand

Commit Message

Juha-Pekka Heikkila Jan. 30, 2024, 7:36 p.m. UTC
Add BO bind time pat index member to xe_bo structure and store
pat index from xe_vma to xe_bo.

Signed-off-by: Juha-Pekka Heikkila <juhapekka.heikkila@gmail.com>
---
 drivers/gpu/drm/xe/xe_bo_types.h | 12 ++++++++++++
 drivers/gpu/drm/xe/xe_pt.c       | 22 ++++++++++++++++++----
 2 files changed, 30 insertions(+), 4 deletions(-)

Comments

Matt Roper Jan. 31, 2024, 6:56 p.m. UTC | #1
On Tue, Jan 30, 2024 at 09:36:50PM +0200, Juha-Pekka Heikkila wrote:
> Add BO bind time pat index member to xe_bo structure and store
> pat index from xe_vma to xe_bo.
> 
> Signed-off-by: Juha-Pekka Heikkila <juhapekka.heikkila@gmail.com>
> ---
>  drivers/gpu/drm/xe/xe_bo_types.h | 12 ++++++++++++
>  drivers/gpu/drm/xe/xe_pt.c       | 22 ++++++++++++++++++----
>  2 files changed, 30 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/gpu/drm/xe/xe_bo_types.h b/drivers/gpu/drm/xe/xe_bo_types.h
> index 14ef13b7b421..6d599f1e846b 100644
> --- a/drivers/gpu/drm/xe/xe_bo_types.h
> +++ b/drivers/gpu/drm/xe/xe_bo_types.h
> @@ -91,6 +91,18 @@ struct xe_bo {
>  
>  	/** @vram_userfault_link: Link into @mem_access.vram_userfault.list */
>  		struct list_head vram_userfault_link;
> +
> +	/**
> +	 * @pat_index: The pat index requested when bind this BO
> +	 */
> +	u16 pat_index;
> +
> +	/**
> +	 * @has_sealed_pat_index: The pat index is sealed because this BO is
> +	 * pinned as framebuffer. This is to prevent flipping compression
> +	 * on/off from framebuffers while in use.
> +	 */
> +	bool has_sealed_pat_index;
>  };
>  
>  #define intel_bo_to_drm_bo(bo) (&(bo)->ttm.base)
> diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c
> index de1030a47588..c72cb75d993c 100644
> --- a/drivers/gpu/drm/xe/xe_pt.c
> +++ b/drivers/gpu/drm/xe/xe_pt.c
> @@ -1208,10 +1208,11 @@ __xe_pt_bind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queue
>  	struct dma_fence *fence;
>  	struct invalidation_fence *ifence = NULL;
>  	struct xe_range_fence *rfence;
> +	struct xe_bo *bo = xe_vma_bo(vma);
>  	int err;
>  
>  	bind_pt_update.locked = false;
> -	xe_bo_assert_held(xe_vma_bo(vma));
> +	xe_bo_assert_held(bo);
>  	xe_vm_assert_held(vm);
>  
>  	vm_dbg(&xe_vma_vm(vma)->xe->drm,
> @@ -1252,8 +1253,21 @@ __xe_pt_bind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queue
>  		return ERR_PTR(-ENOMEM);
>  	}
>  
> +	/*
> +	 * On Xe2 BO which was pinned as framebuffer before with different
> +	 * PAT index cannot be bound with different PAT index. This is
> +	 * to prevent switching CCS on/off from framebuffers on the fly
> +	 * with Xe2.
> +	 */

I haven't been following all the discussion here, but why is switching
it on/off a problem?  On Xe2 can't we just always turn on decompression
(assuming they were 4-tile)?

Even if a content producer puts data into the buffer using a
non-compression PAT index, my understanding is that the FlatCCS metadata
for that part of the buffer still gets updated appropriately (to 0000 or
whatever the code is for "uncompressed block").  If the decompression
bit in PLANE_CTL basically translates to "pay attention to FlatCCS" vs
"ignore FlatCCS" it shouldn't matter whether the data is truly
compressed or not, right?  Since the FlatCCS area that corresponds to a
buffer is still correct even when non-compressed PAT is used (I think),
is there a reason to turn off decompression for 4-tile?

Am I overlooking something?


Matt

> +	if (bo) {
> +		if (bo->has_sealed_pat_index && bo->pat_index != vma->pat_index)
> +			return ERR_PTR(-EINVAL);
> +
> +		bo->pat_index = vma->pat_index;
> +	}
> +
>  	fence = xe_migrate_update_pgtables(tile->migrate,
> -					   vm, xe_vma_bo(vma), q,
> +					   vm, bo, q,
>  					   entries, num_entries,
>  					   syncs, num_syncs,
>  					   &bind_pt_update.base);
> @@ -1287,8 +1301,8 @@ __xe_pt_bind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queue
>  				   DMA_RESV_USAGE_KERNEL :
>  				   DMA_RESV_USAGE_BOOKKEEP);
>  
> -		if (!xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm)
> -			dma_resv_add_fence(xe_vma_bo(vma)->ttm.base.resv, fence,
> +		if (!xe_vma_has_no_bo(vma) && !bo->vm)
> +			dma_resv_add_fence(bo->ttm.base.resv, fence,
>  					   DMA_RESV_USAGE_BOOKKEEP);
>  		xe_pt_commit_bind(vma, entries, num_entries, rebind,
>  				  bind_pt_update.locked ? &deferred : NULL);
> -- 
> 2.25.1
>
Juha-Pekka Heikkila Feb. 1, 2024, 2:17 p.m. UTC | #2
On 31.1.2024 20.56, Matt Roper wrote:
> On Tue, Jan 30, 2024 at 09:36:50PM +0200, Juha-Pekka Heikkila wrote:
>> Add BO bind time pat index member to xe_bo structure and store
>> pat index from xe_vma to xe_bo.
>>
>> Signed-off-by: Juha-Pekka Heikkila <juhapekka.heikkila@gmail.com>
>> ---
>>   drivers/gpu/drm/xe/xe_bo_types.h | 12 ++++++++++++
>>   drivers/gpu/drm/xe/xe_pt.c       | 22 ++++++++++++++++++----
>>   2 files changed, 30 insertions(+), 4 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/xe/xe_bo_types.h b/drivers/gpu/drm/xe/xe_bo_types.h
>> index 14ef13b7b421..6d599f1e846b 100644
>> --- a/drivers/gpu/drm/xe/xe_bo_types.h
>> +++ b/drivers/gpu/drm/xe/xe_bo_types.h
>> @@ -91,6 +91,18 @@ struct xe_bo {
>>   
>>   	/** @vram_userfault_link: Link into @mem_access.vram_userfault.list */
>>   		struct list_head vram_userfault_link;
>> +
>> +	/**
>> +	 * @pat_index: The pat index requested when bind this BO
>> +	 */
>> +	u16 pat_index;
>> +
>> +	/**
>> +	 * @has_sealed_pat_index: The pat index is sealed because this BO is
>> +	 * pinned as framebuffer. This is to prevent flipping compression
>> +	 * on/off from framebuffers while in use.
>> +	 */
>> +	bool has_sealed_pat_index;
>>   };
>>   
>>   #define intel_bo_to_drm_bo(bo) (&(bo)->ttm.base)
>> diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c
>> index de1030a47588..c72cb75d993c 100644
>> --- a/drivers/gpu/drm/xe/xe_pt.c
>> +++ b/drivers/gpu/drm/xe/xe_pt.c
>> @@ -1208,10 +1208,11 @@ __xe_pt_bind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queue
>>   	struct dma_fence *fence;
>>   	struct invalidation_fence *ifence = NULL;
>>   	struct xe_range_fence *rfence;
>> +	struct xe_bo *bo = xe_vma_bo(vma);
>>   	int err;
>>   
>>   	bind_pt_update.locked = false;
>> -	xe_bo_assert_held(xe_vma_bo(vma));
>> +	xe_bo_assert_held(bo);
>>   	xe_vm_assert_held(vm);
>>   
>>   	vm_dbg(&xe_vma_vm(vma)->xe->drm,
>> @@ -1252,8 +1253,21 @@ __xe_pt_bind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queue
>>   		return ERR_PTR(-ENOMEM);
>>   	}
>>   
>> +	/*
>> +	 * On Xe2 BO which was pinned as framebuffer before with different
>> +	 * PAT index cannot be bound with different PAT index. This is
>> +	 * to prevent switching CCS on/off from framebuffers on the fly
>> +	 * with Xe2.
>> +	 */
> 
> I haven't been following all the discussion here, but why is switching
> it on/off a problem?  On Xe2 can't we just always turn on decompression
> (assuming they were 4-tile)?
> 
> Even if a content producer puts data into the buffer using a
> non-compression PAT index, my understanding is that the FlatCCS metadata
> for that part of the buffer still gets updated appropriately (to 0000 or
> whatever the code is for "uncompressed block").  If the decompression
> bit in PLANE_CTL basically translates to "pay attention to FlatCCS" vs
> "ignore FlatCCS" it shouldn't matter whether the data is truly
> compressed or not, right?  Since the FlatCCS area that corresponds to a
> buffer is still correct even when non-compressed PAT is used (I think),
> is there a reason to turn off decompression for 4-tile?
> 
> Am I overlooking something?

Hi Matt,

you got it correct for the case of tile4, on patch 4/4 of this set I put 
  decompression on unconditionally for tile4 for display when on xe2.

Problems come when we're not on tile4 but linear/x-tile where display 
engine doesn't support decompression for these. These PAT indexes for 
BOs are set by user space so I will not be allowed to change it and can 
only deny changing pat index for BO if it was already accepted as good 
configuration for display. That has_sealed_pat_index is set when 
framebuffer is pinned.

Decompression for linear and x-tile is marked as not supported and I 
have sas document for xe2 compression where is said sw must disable 
compression for linear/x-tile.

/Juha-Pekka

> 
>> +	if (bo) {
>> +		if (bo->has_sealed_pat_index && bo->pat_index != vma->pat_index)
>> +			return ERR_PTR(-EINVAL);
>> +
>> +		bo->pat_index = vma->pat_index;
>> +	}
>> +
>>   	fence = xe_migrate_update_pgtables(tile->migrate,
>> -					   vm, xe_vma_bo(vma), q,
>> +					   vm, bo, q,
>>   					   entries, num_entries,
>>   					   syncs, num_syncs,
>>   					   &bind_pt_update.base);
>> @@ -1287,8 +1301,8 @@ __xe_pt_bind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queue
>>   				   DMA_RESV_USAGE_KERNEL :
>>   				   DMA_RESV_USAGE_BOOKKEEP);
>>   
>> -		if (!xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm)
>> -			dma_resv_add_fence(xe_vma_bo(vma)->ttm.base.resv, fence,
>> +		if (!xe_vma_has_no_bo(vma) && !bo->vm)
>> +			dma_resv_add_fence(bo->ttm.base.resv, fence,
>>   					   DMA_RESV_USAGE_BOOKKEEP);
>>   		xe_pt_commit_bind(vma, entries, num_entries, rebind,
>>   				  bind_pt_update.locked ? &deferred : NULL);
>> -- 
>> 2.25.1
>>
>
Juha-Pekka Heikkila Feb. 1, 2024, 3:02 p.m. UTC | #3
On 1.2.2024 16.17, Juha-Pekka Heikkila wrote:
> On 31.1.2024 20.56, Matt Roper wrote:
>> On Tue, Jan 30, 2024 at 09:36:50PM +0200, Juha-Pekka Heikkila wrote:
>>> Add BO bind time pat index member to xe_bo structure and store
>>> pat index from xe_vma to xe_bo.
>>>
>>> Signed-off-by: Juha-Pekka Heikkila <juhapekka.heikkila@gmail.com>
>>> ---
>>>   drivers/gpu/drm/xe/xe_bo_types.h | 12 ++++++++++++
>>>   drivers/gpu/drm/xe/xe_pt.c       | 22 ++++++++++++++++++----
>>>   2 files changed, 30 insertions(+), 4 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/xe/xe_bo_types.h 
>>> b/drivers/gpu/drm/xe/xe_bo_types.h
>>> index 14ef13b7b421..6d599f1e846b 100644
>>> --- a/drivers/gpu/drm/xe/xe_bo_types.h
>>> +++ b/drivers/gpu/drm/xe/xe_bo_types.h
>>> @@ -91,6 +91,18 @@ struct xe_bo {
>>>       /** @vram_userfault_link: Link into 
>>> @mem_access.vram_userfault.list */
>>>           struct list_head vram_userfault_link;
>>> +
>>> +    /**
>>> +     * @pat_index: The pat index requested when bind this BO
>>> +     */
>>> +    u16 pat_index;
>>> +
>>> +    /**
>>> +     * @has_sealed_pat_index: The pat index is sealed because this 
>>> BO is
>>> +     * pinned as framebuffer. This is to prevent flipping compression
>>> +     * on/off from framebuffers while in use.
>>> +     */
>>> +    bool has_sealed_pat_index;
>>>   };
>>>   #define intel_bo_to_drm_bo(bo) (&(bo)->ttm.base)
>>> diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c
>>> index de1030a47588..c72cb75d993c 100644
>>> --- a/drivers/gpu/drm/xe/xe_pt.c
>>> +++ b/drivers/gpu/drm/xe/xe_pt.c
>>> @@ -1208,10 +1208,11 @@ __xe_pt_bind_vma(struct xe_tile *tile, struct 
>>> xe_vma *vma, struct xe_exec_queue
>>>       struct dma_fence *fence;
>>>       struct invalidation_fence *ifence = NULL;
>>>       struct xe_range_fence *rfence;
>>> +    struct xe_bo *bo = xe_vma_bo(vma);
>>>       int err;
>>>       bind_pt_update.locked = false;
>>> -    xe_bo_assert_held(xe_vma_bo(vma));
>>> +    xe_bo_assert_held(bo);
>>>       xe_vm_assert_held(vm);
>>>       vm_dbg(&xe_vma_vm(vma)->xe->drm,
>>> @@ -1252,8 +1253,21 @@ __xe_pt_bind_vma(struct xe_tile *tile, struct 
>>> xe_vma *vma, struct xe_exec_queue
>>>           return ERR_PTR(-ENOMEM);
>>>       }
>>> +    /*
>>> +     * On Xe2 BO which was pinned as framebuffer before with different
>>> +     * PAT index cannot be bound with different PAT index. This is
>>> +     * to prevent switching CCS on/off from framebuffers on the fly
>>> +     * with Xe2.
>>> +     */
>>
>> I haven't been following all the discussion here, but why is switching
>> it on/off a problem?  On Xe2 can't we just always turn on decompression
>> (assuming they were 4-tile)?
>>
>> Even if a content producer puts data into the buffer using a
>> non-compression PAT index, my understanding is that the FlatCCS metadata
>> for that part of the buffer still gets updated appropriately (to 0000 or
>> whatever the code is for "uncompressed block").  If the decompression
>> bit in PLANE_CTL basically translates to "pay attention to FlatCCS" vs
>> "ignore FlatCCS" it shouldn't matter whether the data is truly
>> compressed or not, right?  Since the FlatCCS area that corresponds to a
>> buffer is still correct even when non-compressed PAT is used (I think),
>> is there a reason to turn off decompression for 4-tile?
>>
>> Am I overlooking something?
> 
> Hi Matt,
> 
> you got it correct for the case of tile4, on patch 4/4 of this set I put 
>   decompression on unconditionally for tile4 for display when on xe2.
> 
> Problems come when we're not on tile4 but linear/x-tile where display 
> engine doesn't support decompression for these. These PAT indexes for 
> BOs are set by user space so I will not be allowed to change it and can 
> only deny changing pat index for BO if it was already accepted as good 
> configuration for display. That has_sealed_pat_index is set when 
> framebuffer is pinned.
> 
> Decompression for linear and x-tile is marked as not supported and I 
> have sas document for xe2 compression where is said sw must disable 
> compression for linear/x-tile.
> 

I did just talk with Ville and we did agree we could just drop these 
checks. By default everything will be decompressed and user space will 
need explicitly to enable compression. Linear and x-tile will misrender 
if they're compressed and with patch 4/4 from this set tile4 will work 
in all cases.

>>
>>> +    if (bo) {
>>> +        if (bo->has_sealed_pat_index && bo->pat_index != 
>>> vma->pat_index)
>>> +            return ERR_PTR(-EINVAL);
>>> +
>>> +        bo->pat_index = vma->pat_index;
>>> +    }
>>> +
>>>       fence = xe_migrate_update_pgtables(tile->migrate,
>>> -                       vm, xe_vma_bo(vma), q,
>>> +                       vm, bo, q,
>>>                          entries, num_entries,
>>>                          syncs, num_syncs,
>>>                          &bind_pt_update.base);
>>> @@ -1287,8 +1301,8 @@ __xe_pt_bind_vma(struct xe_tile *tile, struct 
>>> xe_vma *vma, struct xe_exec_queue
>>>                      DMA_RESV_USAGE_KERNEL :
>>>                      DMA_RESV_USAGE_BOOKKEEP);
>>> -        if (!xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm)
>>> -            dma_resv_add_fence(xe_vma_bo(vma)->ttm.base.resv, fence,
>>> +        if (!xe_vma_has_no_bo(vma) && !bo->vm)
>>> +            dma_resv_add_fence(bo->ttm.base.resv, fence,
>>>                          DMA_RESV_USAGE_BOOKKEEP);
>>>           xe_pt_commit_bind(vma, entries, num_entries, rebind,
>>>                     bind_pt_update.locked ? &deferred : NULL);
>>> -- 
>>> 2.25.1
>>>
>>
>
diff mbox series

Patch

diff --git a/drivers/gpu/drm/xe/xe_bo_types.h b/drivers/gpu/drm/xe/xe_bo_types.h
index 14ef13b7b421..6d599f1e846b 100644
--- a/drivers/gpu/drm/xe/xe_bo_types.h
+++ b/drivers/gpu/drm/xe/xe_bo_types.h
@@ -91,6 +91,18 @@  struct xe_bo {
 
 	/** @vram_userfault_link: Link into @mem_access.vram_userfault.list */
 		struct list_head vram_userfault_link;
+
+	/**
+	 * @pat_index: The pat index requested when bind this BO
+	 */
+	u16 pat_index;
+
+	/**
+	 * @has_sealed_pat_index: The pat index is sealed because this BO is
+	 * pinned as framebuffer. This is to prevent flipping compression
+	 * on/off from framebuffers while in use.
+	 */
+	bool has_sealed_pat_index;
 };
 
 #define intel_bo_to_drm_bo(bo) (&(bo)->ttm.base)
diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c
index de1030a47588..c72cb75d993c 100644
--- a/drivers/gpu/drm/xe/xe_pt.c
+++ b/drivers/gpu/drm/xe/xe_pt.c
@@ -1208,10 +1208,11 @@  __xe_pt_bind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queue
 	struct dma_fence *fence;
 	struct invalidation_fence *ifence = NULL;
 	struct xe_range_fence *rfence;
+	struct xe_bo *bo = xe_vma_bo(vma);
 	int err;
 
 	bind_pt_update.locked = false;
-	xe_bo_assert_held(xe_vma_bo(vma));
+	xe_bo_assert_held(bo);
 	xe_vm_assert_held(vm);
 
 	vm_dbg(&xe_vma_vm(vma)->xe->drm,
@@ -1252,8 +1253,21 @@  __xe_pt_bind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queue
 		return ERR_PTR(-ENOMEM);
 	}
 
+	/*
+	 * On Xe2 BO which was pinned as framebuffer before with different
+	 * PAT index cannot be bound with different PAT index. This is
+	 * to prevent switching CCS on/off from framebuffers on the fly
+	 * with Xe2.
+	 */
+	if (bo) {
+		if (bo->has_sealed_pat_index && bo->pat_index != vma->pat_index)
+			return ERR_PTR(-EINVAL);
+
+		bo->pat_index = vma->pat_index;
+	}
+
 	fence = xe_migrate_update_pgtables(tile->migrate,
-					   vm, xe_vma_bo(vma), q,
+					   vm, bo, q,
 					   entries, num_entries,
 					   syncs, num_syncs,
 					   &bind_pt_update.base);
@@ -1287,8 +1301,8 @@  __xe_pt_bind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queue
 				   DMA_RESV_USAGE_KERNEL :
 				   DMA_RESV_USAGE_BOOKKEEP);
 
-		if (!xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm)
-			dma_resv_add_fence(xe_vma_bo(vma)->ttm.base.resv, fence,
+		if (!xe_vma_has_no_bo(vma) && !bo->vm)
+			dma_resv_add_fence(bo->ttm.base.resv, fence,
 					   DMA_RESV_USAGE_BOOKKEEP);
 		xe_pt_commit_bind(vma, entries, num_entries, rebind,
 				  bind_pt_update.locked ? &deferred : NULL);