diff mbox series

[1/2] drm/xe: Fix DSB buffer coherency

Message ID 20240913114754.7956-2-maarten.lankhorst@linux.intel.com (mailing list archive)
State New
Headers show
Series drm/xe: Re-enable DSB. | expand

Commit Message

Maarten Lankhorst Sept. 13, 2024, 11:47 a.m. UTC
Add the scanout flag to force WC caching, and add the memory barrier
where needed.

Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
---
 drivers/gpu/drm/xe/display/xe_dsb_buffer.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

Comments

Matthew Auld Sept. 13, 2024, 12:04 p.m. UTC | #1
On 13/09/2024 12:47, Maarten Lankhorst wrote:
> Add the scanout flag to force WC caching, and add the memory barrier
> where needed.
> 
> Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
> ---
>   drivers/gpu/drm/xe/display/xe_dsb_buffer.c | 5 +++--
>   1 file changed, 3 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/gpu/drm/xe/display/xe_dsb_buffer.c b/drivers/gpu/drm/xe/display/xe_dsb_buffer.c
> index f99d901a3214f..f7949bf5426af 100644
> --- a/drivers/gpu/drm/xe/display/xe_dsb_buffer.c
> +++ b/drivers/gpu/drm/xe/display/xe_dsb_buffer.c
> @@ -48,11 +48,12 @@ bool intel_dsb_buffer_create(struct intel_crtc *crtc, struct intel_dsb_buffer *d
>   	if (!vma)
>   		return false;
>   
> +	/* Set scanout flag for WC mapping */
>   	obj = xe_bo_create_pin_map(xe, xe_device_get_root_tile(xe),
>   				   NULL, PAGE_ALIGN(size),
>   				   ttm_bo_type_kernel,
>   				   XE_BO_FLAG_VRAM_IF_DGFX(xe_device_get_root_tile(xe)) |
> -				   XE_BO_FLAG_GGTT);
> +				   XE_BO_FLAG_SCANOUT | XE_BO_FLAG_GGTT);
>   	if (IS_ERR(obj)) {
>   		kfree(vma);
>   		return false;
> @@ -73,5 +74,5 @@ void intel_dsb_buffer_cleanup(struct intel_dsb_buffer *dsb_buf)
>   
>   void intel_dsb_buffer_flush_map(struct intel_dsb_buffer *dsb_buf)
>   {
> -	/* TODO: add xe specific flush_map() for dsb buffer object. */
> +	xe_device_wmb(dsb_buf->vma->bo->tile->xe);

Kind of orthogonal, but we could maybe also move the l2 flush here? I 
assume it's better to flush once at the end.

Reviewed-by: Matthew Auld <matthew.auld@intel.com>

>   }
Ville Syrjälä Sept. 13, 2024, 5:12 p.m. UTC | #2
On Fri, Sep 13, 2024 at 01:47:53PM +0200, Maarten Lankhorst wrote:
> Add the scanout flag to force WC caching, and add the memory barrier
> where needed.
> 
> Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
> ---
>  drivers/gpu/drm/xe/display/xe_dsb_buffer.c | 5 +++--
>  1 file changed, 3 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/gpu/drm/xe/display/xe_dsb_buffer.c b/drivers/gpu/drm/xe/display/xe_dsb_buffer.c
> index f99d901a3214f..f7949bf5426af 100644
> --- a/drivers/gpu/drm/xe/display/xe_dsb_buffer.c
> +++ b/drivers/gpu/drm/xe/display/xe_dsb_buffer.c
> @@ -48,11 +48,12 @@ bool intel_dsb_buffer_create(struct intel_crtc *crtc, struct intel_dsb_buffer *d
>  	if (!vma)
>  		return false;
>  
> +	/* Set scanout flag for WC mapping */
>  	obj = xe_bo_create_pin_map(xe, xe_device_get_root_tile(xe),
>  				   NULL, PAGE_ALIGN(size),
>  				   ttm_bo_type_kernel,
>  				   XE_BO_FLAG_VRAM_IF_DGFX(xe_device_get_root_tile(xe)) |
> -				   XE_BO_FLAG_GGTT);
> +				   XE_BO_FLAG_SCANOUT | XE_BO_FLAG_GGTT);
>  	if (IS_ERR(obj)) {
>  		kfree(vma);
>  		return false;
> @@ -73,5 +74,5 @@ void intel_dsb_buffer_cleanup(struct intel_dsb_buffer *dsb_buf)
>  
>  void intel_dsb_buffer_flush_map(struct intel_dsb_buffer *dsb_buf)
>  {
> -	/* TODO: add xe specific flush_map() for dsb buffer object. */
> +	xe_device_wmb(dsb_buf->vma->bo->tile->xe);

MMIO itself should be sufficient to flush the WC buffer.
But I guess no real harm in hammering it a bit harder.

>  }
> -- 
> 2.45.2
Maarten Lankhorst Sept. 13, 2024, 5:58 p.m. UTC | #3
Den 2024-09-13 kl. 14:04, skrev Matthew Auld:
> On 13/09/2024 12:47, Maarten Lankhorst wrote:
>> Add the scanout flag to force WC caching, and add the memory barrier
>> where needed.
>>
>> Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
>> ---
>>   drivers/gpu/drm/xe/display/xe_dsb_buffer.c | 5 +++--
>>   1 file changed, 3 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/xe/display/xe_dsb_buffer.c b/drivers/gpu/drm/xe/display/xe_dsb_buffer.c
>> index f99d901a3214f..f7949bf5426af 100644
>> --- a/drivers/gpu/drm/xe/display/xe_dsb_buffer.c
>> +++ b/drivers/gpu/drm/xe/display/xe_dsb_buffer.c
>> @@ -48,11 +48,12 @@ bool intel_dsb_buffer_create(struct intel_crtc *crtc, struct intel_dsb_buffer *d
>>       if (!vma)
>>           return false;
>>   +    /* Set scanout flag for WC mapping */
>>       obj = xe_bo_create_pin_map(xe, xe_device_get_root_tile(xe),
>>                      NULL, PAGE_ALIGN(size),
>>                      ttm_bo_type_kernel,
>>                      XE_BO_FLAG_VRAM_IF_DGFX(xe_device_get_root_tile(xe)) |
>> -                   XE_BO_FLAG_GGTT);
>> +                   XE_BO_FLAG_SCANOUT | XE_BO_FLAG_GGTT);
>>       if (IS_ERR(obj)) {
>>           kfree(vma);
>>           return false;
>> @@ -73,5 +74,5 @@ void intel_dsb_buffer_cleanup(struct intel_dsb_buffer *dsb_buf)
>>     void intel_dsb_buffer_flush_map(struct intel_dsb_buffer *dsb_buf)
>>   {
>> -    /* TODO: add xe specific flush_map() for dsb buffer object. */
>> +    xe_device_wmb(dsb_buf->vma->bo->tile->xe);
> 
> Kind of orthogonal, but we could maybe also move the l2 flush here? I assume it's better to flush once at the end.
Eww, I didn't see that one. I totally would have if I saw it, the amount of calls for a single 4 byte write would remove any point of using DSB on BMG otherwise.

I'll send a followup patch. :)
> Reviewed-by: Matthew Auld <matthew.auld@intel.com>
> 
>>   }
Maarten Lankhorst Sept. 13, 2024, 6:44 p.m. UTC | #4
Den 2024-09-13 kl. 19:12, skrev Ville Syrjälä:
> On Fri, Sep 13, 2024 at 01:47:53PM +0200, Maarten Lankhorst wrote:
>> Add the scanout flag to force WC caching, and add the memory barrier
>> where needed.
>>
>> Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
>> ---
>>  drivers/gpu/drm/xe/display/xe_dsb_buffer.c | 5 +++--
>>  1 file changed, 3 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/xe/display/xe_dsb_buffer.c b/drivers/gpu/drm/xe/display/xe_dsb_buffer.c
>> index f99d901a3214f..f7949bf5426af 100644
>> --- a/drivers/gpu/drm/xe/display/xe_dsb_buffer.c
>> +++ b/drivers/gpu/drm/xe/display/xe_dsb_buffer.c
>> @@ -48,11 +48,12 @@ bool intel_dsb_buffer_create(struct intel_crtc *crtc, struct intel_dsb_buffer *d
>>  	if (!vma)
>>  		return false;
>>  
>> +	/* Set scanout flag for WC mapping */
>>  	obj = xe_bo_create_pin_map(xe, xe_device_get_root_tile(xe),
>>  				   NULL, PAGE_ALIGN(size),
>>  				   ttm_bo_type_kernel,
>>  				   XE_BO_FLAG_VRAM_IF_DGFX(xe_device_get_root_tile(xe)) |
>> -				   XE_BO_FLAG_GGTT);
>> +				   XE_BO_FLAG_SCANOUT | XE_BO_FLAG_GGTT);
>>  	if (IS_ERR(obj)) {
>>  		kfree(vma);
>>  		return false;
>> @@ -73,5 +74,5 @@ void intel_dsb_buffer_cleanup(struct intel_dsb_buffer *dsb_buf)
>>  
>>  void intel_dsb_buffer_flush_map(struct intel_dsb_buffer *dsb_buf)
>>  {
>> -	/* TODO: add xe specific flush_map() for dsb buffer object. */
>> +	xe_device_wmb(dsb_buf->vma->bo->tile->xe);
> 
> MMIO itself should be sufficient to flush the WC buffer.
> But I guess no real harm in hammering it a bit harder.

You would say that, I still saw a spurious DSB timeout without the flush. :)

"Memory mapped I/O usually takes place through memory locations that are part of
a window in the CPU's memory space that has different properties assigned than
the usual RAM directed window.

Amongst these properties is usually the fact that such accesses bypass the
caching entirely and go directly to the device buses.  This means MMIO accesses
may, in effect, overtake accesses to cached memory that were emitted earlier."

Since the memory is write combined, the memory barrier itself is sufficient
and no further invalidation is required.

Just the workaround should be fine. The l2 flush should be moved after the mb as well.
I'll do that in a followup patch.

>>  }
>> -- 
>> 2.45.2
>
Ville Syrjälä Sept. 16, 2024, 2 p.m. UTC | #5
On Fri, Sep 13, 2024 at 08:44:01PM +0200, Maarten Lankhorst wrote:
> 
> 
> Den 2024-09-13 kl. 19:12, skrev Ville Syrjälä:
> > On Fri, Sep 13, 2024 at 01:47:53PM +0200, Maarten Lankhorst wrote:
> >> Add the scanout flag to force WC caching, and add the memory barrier
> >> where needed.
> >>
> >> Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
> >> ---
> >>  drivers/gpu/drm/xe/display/xe_dsb_buffer.c | 5 +++--
> >>  1 file changed, 3 insertions(+), 2 deletions(-)
> >>
> >> diff --git a/drivers/gpu/drm/xe/display/xe_dsb_buffer.c b/drivers/gpu/drm/xe/display/xe_dsb_buffer.c
> >> index f99d901a3214f..f7949bf5426af 100644
> >> --- a/drivers/gpu/drm/xe/display/xe_dsb_buffer.c
> >> +++ b/drivers/gpu/drm/xe/display/xe_dsb_buffer.c
> >> @@ -48,11 +48,12 @@ bool intel_dsb_buffer_create(struct intel_crtc *crtc, struct intel_dsb_buffer *d
> >>  	if (!vma)
> >>  		return false;
> >>  
> >> +	/* Set scanout flag for WC mapping */
> >>  	obj = xe_bo_create_pin_map(xe, xe_device_get_root_tile(xe),
> >>  				   NULL, PAGE_ALIGN(size),
> >>  				   ttm_bo_type_kernel,
> >>  				   XE_BO_FLAG_VRAM_IF_DGFX(xe_device_get_root_tile(xe)) |
> >> -				   XE_BO_FLAG_GGTT);
> >> +				   XE_BO_FLAG_SCANOUT | XE_BO_FLAG_GGTT);
> >>  	if (IS_ERR(obj)) {
> >>  		kfree(vma);
> >>  		return false;
> >> @@ -73,5 +74,5 @@ void intel_dsb_buffer_cleanup(struct intel_dsb_buffer *dsb_buf)
> >>  
> >>  void intel_dsb_buffer_flush_map(struct intel_dsb_buffer *dsb_buf)
> >>  {
> >> -	/* TODO: add xe specific flush_map() for dsb buffer object. */
> >> +	xe_device_wmb(dsb_buf->vma->bo->tile->xe);
> > 
> > MMIO itself should be sufficient to flush the WC buffer.
> > But I guess no real harm in hammering it a bit harder.
> 
> You would say that, I still saw a spurious DSB timeout without the flush. :)
> 
> "Memory mapped I/O usually takes place through memory locations that are part of
> a window in the CPU's memory space that has different properties assigned than
> the usual RAM directed window.
> 
> Amongst these properties is usually the fact that such accesses bypass the
> caching entirely and go directly to the device buses.  This means MMIO accesses
> may, in effect, overtake accesses to cached memory that were emitted earlier."

WC != cached

Any uncached access is supposed to flush the WC buffer,
same as sfence.

Sounds like you have some other issue and the sfence just happens
to work around it somehow. Either that or the CPU is broken.

> 
> Since the memory is write combined, the memory barrier itself is sufficient
> and no further invalidation is required.
> 
> Just the workaround should be fine. The l2 flush should be moved after the mb as well.
> I'll do that in a followup patch.
> 
> >>  }
> >> -- 
> >> 2.45.2
> >
Ville Syrjälä Sept. 17, 2024, 10:15 p.m. UTC | #6
On Fri, Sep 13, 2024 at 01:47:53PM +0200, Maarten Lankhorst wrote:
> Add the scanout flag to force WC caching, and add the memory barrier
> where needed.
> 
> Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
> ---
>  drivers/gpu/drm/xe/display/xe_dsb_buffer.c | 5 +++--
>  1 file changed, 3 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/gpu/drm/xe/display/xe_dsb_buffer.c b/drivers/gpu/drm/xe/display/xe_dsb_buffer.c
> index f99d901a3214f..f7949bf5426af 100644
> --- a/drivers/gpu/drm/xe/display/xe_dsb_buffer.c
> +++ b/drivers/gpu/drm/xe/display/xe_dsb_buffer.c
> @@ -48,11 +48,12 @@ bool intel_dsb_buffer_create(struct intel_crtc *crtc, struct intel_dsb_buffer *d
>  	if (!vma)
>  		return false;
>  
> +	/* Set scanout flag for WC mapping */
>  	obj = xe_bo_create_pin_map(xe, xe_device_get_root_tile(xe),
>  				   NULL, PAGE_ALIGN(size),
>  				   ttm_bo_type_kernel,
>  				   XE_BO_FLAG_VRAM_IF_DGFX(xe_device_get_root_tile(xe)) |
> -				   XE_BO_FLAG_GGTT);
> +				   XE_BO_FLAG_SCANOUT | XE_BO_FLAG_GGTT);
>  	if (IS_ERR(obj)) {
>  		kfree(vma);
>  		return false;
> @@ -73,5 +74,5 @@ void intel_dsb_buffer_cleanup(struct intel_dsb_buffer *dsb_buf)
>  
>  void intel_dsb_buffer_flush_map(struct intel_dsb_buffer *dsb_buf)
>  {
> -	/* TODO: add xe specific flush_map() for dsb buffer object. */
> +	xe_device_wmb(dsb_buf->vma->bo->tile->xe);

With some kind of comment added that this may be needed on
!x86 architectures to flush the WC buffer:

Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com>

>  }
> -- 
> 2.45.2
diff mbox series

Patch

diff --git a/drivers/gpu/drm/xe/display/xe_dsb_buffer.c b/drivers/gpu/drm/xe/display/xe_dsb_buffer.c
index f99d901a3214f..f7949bf5426af 100644
--- a/drivers/gpu/drm/xe/display/xe_dsb_buffer.c
+++ b/drivers/gpu/drm/xe/display/xe_dsb_buffer.c
@@ -48,11 +48,12 @@  bool intel_dsb_buffer_create(struct intel_crtc *crtc, struct intel_dsb_buffer *d
 	if (!vma)
 		return false;
 
+	/* Set scanout flag for WC mapping */
 	obj = xe_bo_create_pin_map(xe, xe_device_get_root_tile(xe),
 				   NULL, PAGE_ALIGN(size),
 				   ttm_bo_type_kernel,
 				   XE_BO_FLAG_VRAM_IF_DGFX(xe_device_get_root_tile(xe)) |
-				   XE_BO_FLAG_GGTT);
+				   XE_BO_FLAG_SCANOUT | XE_BO_FLAG_GGTT);
 	if (IS_ERR(obj)) {
 		kfree(vma);
 		return false;
@@ -73,5 +74,5 @@  void intel_dsb_buffer_cleanup(struct intel_dsb_buffer *dsb_buf)
 
 void intel_dsb_buffer_flush_map(struct intel_dsb_buffer *dsb_buf)
 {
-	/* TODO: add xe specific flush_map() for dsb buffer object. */
+	xe_device_wmb(dsb_buf->vma->bo->tile->xe);
 }