diff mbox series

[2/4] drm/i915/dsb: Correct DSB command buffer cache coherency settings

Message ID 20231009132204.15098-2-ville.syrjala@linux.intel.com (mailing list archive)
State New, archived
Headers show
Series [1/4] drm/i915/dsb: Allocate command buffer from local memory | expand

Commit Message

Ville Syrjälä Oct. 9, 2023, 1:22 p.m. UTC
From: Ville Syrjälä <ville.syrjala@linux.intel.com>

The display engine does not snoop the caches so shoukd to mark
the DSB command buffer as I915_CACHE_NONE.
i915_gem_object_create_internal() always gives us I915_CACHE_LLC
on LLC platforms. And to make things 100% correct we should also
clflush at the end, if necessary.

Note that currently this is a non-issue as we always write the
command buffer through a WC mapping, so a cache flush is not actually
needed. But we might actually want to consider a WB mapping since
we also end up reading from the command buffer (in the indexed
reg write handling). Either that or we should do something else
to avoid those reads (might actually be even more sensible on DGFX
since we end up reading over PCIe). But we should measure the overhead
first...

Anyways, no real harm in adding the belts and suspenders here so
that the code will work correctly regardless of how we map the
buffer. If we do get a WC mapping (as we request)
i915_gem_object_flush_map() will be a nop. Well, apart form
a wmb() which may just flush the WC buffer a bit earlier
than would otherwise happen (at the latest the mmio accesses
would trigger the WC flush).

Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
---
 drivers/gpu/drm/i915/display/intel_dsb.c | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

Comments

Shankar, Uma Oct. 12, 2023, 9:40 p.m. UTC | #1
> -----Original Message-----
> From: Intel-gfx <intel-gfx-bounces@lists.freedesktop.org> On Behalf Of Ville
> Syrjala
> Sent: Monday, October 9, 2023 6:52 PM
> To: intel-gfx@lists.freedesktop.org
> Subject: [Intel-gfx] [PATCH 2/4] drm/i915/dsb: Correct DSB command buffer
> cache coherency settings
> 
> From: Ville Syrjälä <ville.syrjala@linux.intel.com>
> 
> The display engine does not snoop the caches so shoukd to mark the DSB

Nit: Typo here

I am not sure on the cache behaviour so someone from core can also ack.
To me , looks logically correct.
Reviewed-by: Uma Shankar <uma.shankar@intel.com>

> command buffer as I915_CACHE_NONE.
> i915_gem_object_create_internal() always gives us I915_CACHE_LLC on LLC
> platforms. And to make things 100% correct we should also clflush at the end, if
> necessary.
> 
> Note that currently this is a non-issue as we always write the command buffer
> through a WC mapping, so a cache flush is not actually needed. But we might
> actually want to consider a WB mapping since we also end up reading from the
> command buffer (in the indexed reg write handling). Either that or we should do
> something else to avoid those reads (might actually be even more sensible on
> DGFX since we end up reading over PCIe). But we should measure the overhead
> first...
> 
> Anyways, no real harm in adding the belts and suspenders here so that the code
> will work correctly regardless of how we map the buffer. If we do get a WC
> mapping (as we request)
> i915_gem_object_flush_map() will be a nop. Well, apart form a wmb() which may
> just flush the WC buffer a bit earlier than would otherwise happen (at the latest
> the mmio accesses would trigger the WC flush).
> 
> Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
> ---
>  drivers/gpu/drm/i915/display/intel_dsb.c | 15 +++++++++++----
>  1 file changed, 11 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/display/intel_dsb.c
> b/drivers/gpu/drm/i915/display/intel_dsb.c
> index 7410ba3126f9..78b6fe24dcd8 100644
> --- a/drivers/gpu/drm/i915/display/intel_dsb.c
> +++ b/drivers/gpu/drm/i915/display/intel_dsb.c
> @@ -316,6 +316,8 @@ void intel_dsb_finish(struct intel_dsb *dsb)
>  				   DSB_FORCE_DEWAKE, 0);
> 
>  	intel_dsb_align_tail(dsb);
> +
> +	i915_gem_object_flush_map(dsb->vma->obj);
>  }
> 
>  static int intel_dsb_dewake_scanline(const struct intel_crtc_state *crtc_state)
> @@ -462,13 +464,18 @@ struct intel_dsb *intel_dsb_prepare(const struct
> intel_crtc_state *crtc_state,
>  	/* ~1 qword per instruction, full cachelines */
>  	size = ALIGN(max_cmds * 8, CACHELINE_BYTES);
> 
> -	if (HAS_LMEM(i915))
> +	if (HAS_LMEM(i915)) {
>  		obj = i915_gem_object_create_lmem(i915, PAGE_ALIGN(size),
> 
> I915_BO_ALLOC_CONTIGUOUS);
> -	else
> +		if (IS_ERR(obj))
> +			goto out_put_rpm;
> +	} else {
>  		obj = i915_gem_object_create_internal(i915, PAGE_ALIGN(size));
> -	if (IS_ERR(obj))
> -		goto out_put_rpm;
> +		if (IS_ERR(obj))
> +			goto out_put_rpm;
> +
> +		i915_gem_object_set_cache_coherency(obj,
> I915_CACHE_NONE);
> +	}
> 
>  	vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 0);
>  	if (IS_ERR(vma)) {
> --
> 2.41.0
Ville Syrjälä Oct. 13, 2023, 1:48 p.m. UTC | #2
On Thu, Oct 12, 2023 at 09:40:23PM +0000, Shankar, Uma wrote:
> 
> 
> > -----Original Message-----
> > From: Intel-gfx <intel-gfx-bounces@lists.freedesktop.org> On Behalf Of Ville
> > Syrjala
> > Sent: Monday, October 9, 2023 6:52 PM
> > To: intel-gfx@lists.freedesktop.org
> > Subject: [Intel-gfx] [PATCH 2/4] drm/i915/dsb: Correct DSB command buffer
> > cache coherency settings
> > 
> > From: Ville Syrjälä <ville.syrjala@linux.intel.com>
> > 
> > The display engine does not snoop the caches so shoukd to mark the DSB
> 
> Nit: Typo here
> 
> I am not sure on the cache behaviour so someone from core can also ack.
> To me , looks logically correct.
> Reviewed-by: Uma Shankar <uma.shankar@intel.com>

Thanks. This series is now merged. Fingers crossed DSB will behave
nicely...

> 
> > command buffer as I915_CACHE_NONE.
> > i915_gem_object_create_internal() always gives us I915_CACHE_LLC on LLC
> > platforms. And to make things 100% correct we should also clflush at the end, if
> > necessary.
> > 
> > Note that currently this is a non-issue as we always write the command buffer
> > through a WC mapping, so a cache flush is not actually needed. But we might
> > actually want to consider a WB mapping since we also end up reading from the
> > command buffer (in the indexed reg write handling). Either that or we should do
> > something else to avoid those reads (might actually be even more sensible on
> > DGFX since we end up reading over PCIe). But we should measure the overhead
> > first...
> > 
> > Anyways, no real harm in adding the belts and suspenders here so that the code
> > will work correctly regardless of how we map the buffer. If we do get a WC
> > mapping (as we request)
> > i915_gem_object_flush_map() will be a nop. Well, apart form a wmb() which may
> > just flush the WC buffer a bit earlier than would otherwise happen (at the latest
> > the mmio accesses would trigger the WC flush).
> > 
> > Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
> > ---
> >  drivers/gpu/drm/i915/display/intel_dsb.c | 15 +++++++++++----
> >  1 file changed, 11 insertions(+), 4 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/display/intel_dsb.c
> > b/drivers/gpu/drm/i915/display/intel_dsb.c
> > index 7410ba3126f9..78b6fe24dcd8 100644
> > --- a/drivers/gpu/drm/i915/display/intel_dsb.c
> > +++ b/drivers/gpu/drm/i915/display/intel_dsb.c
> > @@ -316,6 +316,8 @@ void intel_dsb_finish(struct intel_dsb *dsb)
> >  				   DSB_FORCE_DEWAKE, 0);
> > 
> >  	intel_dsb_align_tail(dsb);
> > +
> > +	i915_gem_object_flush_map(dsb->vma->obj);
> >  }
> > 
> >  static int intel_dsb_dewake_scanline(const struct intel_crtc_state *crtc_state)
> > @@ -462,13 +464,18 @@ struct intel_dsb *intel_dsb_prepare(const struct
> > intel_crtc_state *crtc_state,
> >  	/* ~1 qword per instruction, full cachelines */
> >  	size = ALIGN(max_cmds * 8, CACHELINE_BYTES);
> > 
> > -	if (HAS_LMEM(i915))
> > +	if (HAS_LMEM(i915)) {
> >  		obj = i915_gem_object_create_lmem(i915, PAGE_ALIGN(size),
> > 
> > I915_BO_ALLOC_CONTIGUOUS);
> > -	else
> > +		if (IS_ERR(obj))
> > +			goto out_put_rpm;
> > +	} else {
> >  		obj = i915_gem_object_create_internal(i915, PAGE_ALIGN(size));
> > -	if (IS_ERR(obj))
> > -		goto out_put_rpm;
> > +		if (IS_ERR(obj))
> > +			goto out_put_rpm;
> > +
> > +		i915_gem_object_set_cache_coherency(obj,
> > I915_CACHE_NONE);
> > +	}
> > 
> >  	vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 0);
> >  	if (IS_ERR(vma)) {
> > --
> > 2.41.0
>
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/display/intel_dsb.c b/drivers/gpu/drm/i915/display/intel_dsb.c
index 7410ba3126f9..78b6fe24dcd8 100644
--- a/drivers/gpu/drm/i915/display/intel_dsb.c
+++ b/drivers/gpu/drm/i915/display/intel_dsb.c
@@ -316,6 +316,8 @@  void intel_dsb_finish(struct intel_dsb *dsb)
 				   DSB_FORCE_DEWAKE, 0);
 
 	intel_dsb_align_tail(dsb);
+
+	i915_gem_object_flush_map(dsb->vma->obj);
 }
 
 static int intel_dsb_dewake_scanline(const struct intel_crtc_state *crtc_state)
@@ -462,13 +464,18 @@  struct intel_dsb *intel_dsb_prepare(const struct intel_crtc_state *crtc_state,
 	/* ~1 qword per instruction, full cachelines */
 	size = ALIGN(max_cmds * 8, CACHELINE_BYTES);
 
-	if (HAS_LMEM(i915))
+	if (HAS_LMEM(i915)) {
 		obj = i915_gem_object_create_lmem(i915, PAGE_ALIGN(size),
 						  I915_BO_ALLOC_CONTIGUOUS);
-	else
+		if (IS_ERR(obj))
+			goto out_put_rpm;
+	} else {
 		obj = i915_gem_object_create_internal(i915, PAGE_ALIGN(size));
-	if (IS_ERR(obj))
-		goto out_put_rpm;
+		if (IS_ERR(obj))
+			goto out_put_rpm;
+
+		i915_gem_object_set_cache_coherency(obj, I915_CACHE_NONE);
+	}
 
 	vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 0);
 	if (IS_ERR(vma)) {