diff mbox

[3/4] drm/i915: Insert a full mb() before reading the seqno from the status page

Message ID 1349807080-9005-3-git-send-email-chris@chris-wilson.co.uk (mailing list archive)
State New, archived
Headers show

Commit Message

Chris Wilson Oct. 9, 2012, 6:24 p.m. UTC
Hopefully this will reduce a few of the missed IRQ warnings.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/intel_ringbuffer.c |    8 +++++++-
 drivers/gpu/drm/i915/intel_ringbuffer.h |    2 --
 2 files changed, 7 insertions(+), 3 deletions(-)

Comments

Jesse Barnes Oct. 11, 2012, 7:46 p.m. UTC | #1
On Tue,  9 Oct 2012 19:24:39 +0100
Chris Wilson <chris@chris-wilson.co.uk> wrote:

> Hopefully this will reduce a few of the missed IRQ warnings.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>  drivers/gpu/drm/i915/intel_ringbuffer.c |    8 +++++++-
>  drivers/gpu/drm/i915/intel_ringbuffer.h |    2 --
>  2 files changed, 7 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
> index e069e69..133beb6 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> @@ -704,14 +704,18 @@ gen6_ring_get_seqno(struct intel_ring_buffer *ring, bool lazy_coherency)
>  	/* Workaround to force correct ordering between irq and seqno writes on
>  	 * ivb (and maybe also on snb) by reading from a CS register (like
>  	 * ACTHD) before reading the status page. */
> -	if (!lazy_coherency)
> +	if (!lazy_coherency) {
>  		intel_ring_get_active_head(ring);
> +		mb();
> +	}
>  	return intel_read_status_page(ring, I915_GEM_HWS_INDEX);
>  }
>  
>  static u32
>  ring_get_seqno(struct intel_ring_buffer *ring, bool lazy_coherency)
>  {
> +	if (!lazy_coherency)
> +		mb();
>  	return intel_read_status_page(ring, I915_GEM_HWS_INDEX);
>  }
>  
> @@ -719,6 +723,8 @@ static u32
>  pc_render_get_seqno(struct intel_ring_buffer *ring, bool lazy_coherency)
>  {
>  	struct pipe_control *pc = ring->private;
> +	if (!lazy_coherency)
> +		mb();
>  	return pc->cpu_page[0];
>  }
>  
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index 2ea7a31..40b252e 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -160,8 +160,6 @@ static inline u32
>  intel_read_status_page(struct intel_ring_buffer *ring,
>  		       int reg)
>  {
> -	/* Ensure that the compiler doesn't optimize away the load. */
> -	barrier();
>  	return ring->status_page.page_addr[reg];
>  }
>  

This looks a bit more voodoo-y.  Theoretically an mb() on the CPU side
should have nothing to do with what the GPU just wrote to the status
page.  It'll slow down the read a bit but shouldn't affect coherence at
all...  An MMIO read from the GPU otoh should flush any stubborn DMA
buffers.
Chris Wilson Oct. 19, 2012, 8:40 p.m. UTC | #2
On Thu, 11 Oct 2012 12:46:00 -0700, Jesse Barnes <jbarnes@virtuousgeek.org> wrote:
> On Tue,  9 Oct 2012 19:24:39 +0100
> Chris Wilson <chris@chris-wilson.co.uk> wrote:
> 
> > Hopefully this will reduce a few of the missed IRQ warnings.
> > 
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > ---
> >  drivers/gpu/drm/i915/intel_ringbuffer.c |    8 +++++++-
> >  drivers/gpu/drm/i915/intel_ringbuffer.h |    2 --
> >  2 files changed, 7 insertions(+), 3 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
> > index e069e69..133beb6 100644
> > --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> > +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> > @@ -704,14 +704,18 @@ gen6_ring_get_seqno(struct intel_ring_buffer *ring, bool lazy_coherency)
> >  	/* Workaround to force correct ordering between irq and seqno writes on
> >  	 * ivb (and maybe also on snb) by reading from a CS register (like
> >  	 * ACTHD) before reading the status page. */
> > -	if (!lazy_coherency)
> > +	if (!lazy_coherency) {
> >  		intel_ring_get_active_head(ring);
> > +		mb();
> > +	}
> >  	return intel_read_status_page(ring, I915_GEM_HWS_INDEX);
> >  }
> >  
> >  static u32
> >  ring_get_seqno(struct intel_ring_buffer *ring, bool lazy_coherency)
> >  {
> > +	if (!lazy_coherency)
> > +		mb();
> >  	return intel_read_status_page(ring, I915_GEM_HWS_INDEX);
> >  }
> >  
> > @@ -719,6 +723,8 @@ static u32
> >  pc_render_get_seqno(struct intel_ring_buffer *ring, bool lazy_coherency)
> >  {
> >  	struct pipe_control *pc = ring->private;
> > +	if (!lazy_coherency)
> > +		mb();
> >  	return pc->cpu_page[0];
> >  }
> >  
> > diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> > index 2ea7a31..40b252e 100644
> > --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> > +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> > @@ -160,8 +160,6 @@ static inline u32
> >  intel_read_status_page(struct intel_ring_buffer *ring,
> >  		       int reg)
> >  {
> > -	/* Ensure that the compiler doesn't optimize away the load. */
> > -	barrier();
> >  	return ring->status_page.page_addr[reg];
> >  }
> >  
> 
> This looks a bit more voodoo-y.  Theoretically an mb() on the CPU side
> should have nothing to do with what the GPU just wrote to the status
> page.  It'll slow down the read a bit but shouldn't affect coherence at
> all...  An MMIO read from the GPU otoh should flush any stubborn DMA
> buffers.

Absolutely convinced? Aren't we here more worried about the view of the
shared cache from any particular core and so need to treat this as an
SMP programming problem, in which case we do need to worry about memory
barriers around dependent reads and writes between processors?

But it is definitely more voodoo...
-Chris
Jesse Barnes Oct. 19, 2012, 8:52 p.m. UTC | #3
On Fri, 19 Oct 2012 21:40:17 +0100
Chris Wilson <chris@chris-wilson.co.uk> wrote:

> On Thu, 11 Oct 2012 12:46:00 -0700, Jesse Barnes <jbarnes@virtuousgeek.org> wrote:
> > On Tue,  9 Oct 2012 19:24:39 +0100
> > Chris Wilson <chris@chris-wilson.co.uk> wrote:
> > 
> > > Hopefully this will reduce a few of the missed IRQ warnings.
> > > 
> > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > > ---
> > >  drivers/gpu/drm/i915/intel_ringbuffer.c |    8 +++++++-
> > >  drivers/gpu/drm/i915/intel_ringbuffer.h |    2 --
> > >  2 files changed, 7 insertions(+), 3 deletions(-)
> > > 
> > > diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
> > > index e069e69..133beb6 100644
> > > --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> > > +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> > > @@ -704,14 +704,18 @@ gen6_ring_get_seqno(struct intel_ring_buffer *ring, bool lazy_coherency)
> > >  	/* Workaround to force correct ordering between irq and seqno writes on
> > >  	 * ivb (and maybe also on snb) by reading from a CS register (like
> > >  	 * ACTHD) before reading the status page. */
> > > -	if (!lazy_coherency)
> > > +	if (!lazy_coherency) {
> > >  		intel_ring_get_active_head(ring);
> > > +		mb();
> > > +	}
> > >  	return intel_read_status_page(ring, I915_GEM_HWS_INDEX);
> > >  }
> > >  
> > >  static u32
> > >  ring_get_seqno(struct intel_ring_buffer *ring, bool lazy_coherency)
> > >  {
> > > +	if (!lazy_coherency)
> > > +		mb();
> > >  	return intel_read_status_page(ring, I915_GEM_HWS_INDEX);
> > >  }
> > >  
> > > @@ -719,6 +723,8 @@ static u32
> > >  pc_render_get_seqno(struct intel_ring_buffer *ring, bool lazy_coherency)
> > >  {
> > >  	struct pipe_control *pc = ring->private;
> > > +	if (!lazy_coherency)
> > > +		mb();
> > >  	return pc->cpu_page[0];
> > >  }
> > >  
> > > diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> > > index 2ea7a31..40b252e 100644
> > > --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> > > +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> > > @@ -160,8 +160,6 @@ static inline u32
> > >  intel_read_status_page(struct intel_ring_buffer *ring,
> > >  		       int reg)
> > >  {
> > > -	/* Ensure that the compiler doesn't optimize away the load. */
> > > -	barrier();
> > >  	return ring->status_page.page_addr[reg];
> > >  }
> > >  
> > 
> > This looks a bit more voodoo-y.  Theoretically an mb() on the CPU side
> > should have nothing to do with what the GPU just wrote to the status
> > page.  It'll slow down the read a bit but shouldn't affect coherence at
> > all...  An MMIO read from the GPU otoh should flush any stubborn DMA
> > buffers.
> 
> Absolutely convinced? Aren't we here more worried about the view of the
> shared cache from any particular core and so need to treat this as an
> SMP programming problem, in which case we do need to worry about memory
> barriers around dependent reads and writes between processors?
> 
> But it is definitely more voodoo...

If it's an SMP issue, barriers won't help, we need actual
synchronization in the form of locks or something.

My current theory is that while cached and uncached memory accesses are
strongly ordered (i.e. appear in program order from a given CPU), they
don't necessarily synchronize that way against each other, especially
when WC mappings are in play.  So in those cases, fences will be safer
to use before any subsequent access that depends on a previous access
of a different type.
Chris Wilson Jan. 19, 2013, 12:02 p.m. UTC | #4
On Fri, 19 Oct 2012 13:52:49 -0700, Jesse Barnes <jbarnes@virtuousgeek.org> wrote:
> On Fri, 19 Oct 2012 21:40:17 +0100
> Chris Wilson <chris@chris-wilson.co.uk> wrote:
> 
> > On Thu, 11 Oct 2012 12:46:00 -0700, Jesse Barnes <jbarnes@virtuousgeek.org> wrote:
> > > On Tue,  9 Oct 2012 19:24:39 +0100
> > > Chris Wilson <chris@chris-wilson.co.uk> wrote:
> > > 
> > > > Hopefully this will reduce a few of the missed IRQ warnings.
> > > > 
> > > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > > > ---
> > > >  drivers/gpu/drm/i915/intel_ringbuffer.c |    8 +++++++-
> > > >  drivers/gpu/drm/i915/intel_ringbuffer.h |    2 --
> > > >  2 files changed, 7 insertions(+), 3 deletions(-)
> > > > 
> > > > diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
> > > > index e069e69..133beb6 100644
> > > > --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> > > > +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> > > > @@ -704,14 +704,18 @@ gen6_ring_get_seqno(struct intel_ring_buffer *ring, bool lazy_coherency)
> > > >  	/* Workaround to force correct ordering between irq and seqno writes on
> > > >  	 * ivb (and maybe also on snb) by reading from a CS register (like
> > > >  	 * ACTHD) before reading the status page. */
> > > > -	if (!lazy_coherency)
> > > > +	if (!lazy_coherency) {
> > > >  		intel_ring_get_active_head(ring);
> > > > +		mb();
> > > > +	}
> > > >  	return intel_read_status_page(ring, I915_GEM_HWS_INDEX);
> > > >  }
> > > >  
> > > >  static u32
> > > >  ring_get_seqno(struct intel_ring_buffer *ring, bool lazy_coherency)
> > > >  {
> > > > +	if (!lazy_coherency)
> > > > +		mb();
> > > >  	return intel_read_status_page(ring, I915_GEM_HWS_INDEX);
> > > >  }
> > > >  
> > > > @@ -719,6 +723,8 @@ static u32
> > > >  pc_render_get_seqno(struct intel_ring_buffer *ring, bool lazy_coherency)
> > > >  {
> > > >  	struct pipe_control *pc = ring->private;
> > > > +	if (!lazy_coherency)
> > > > +		mb();
> > > >  	return pc->cpu_page[0];
> > > >  }
> > > >  
> > > > diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> > > > index 2ea7a31..40b252e 100644
> > > > --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> > > > +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> > > > @@ -160,8 +160,6 @@ static inline u32
> > > >  intel_read_status_page(struct intel_ring_buffer *ring,
> > > >  		       int reg)
> > > >  {
> > > > -	/* Ensure that the compiler doesn't optimize away the load. */
> > > > -	barrier();
> > > >  	return ring->status_page.page_addr[reg];
> > > >  }
> > > >  
> > > 
> > > This looks a bit more voodoo-y.  Theoretically an mb() on the CPU side
> > > should have nothing to do with what the GPU just wrote to the status
> > > page.  It'll slow down the read a bit but shouldn't affect coherence at
> > > all...  An MMIO read from the GPU otoh should flush any stubborn DMA
> > > buffers.
> > 
> > Absolutely convinced? Aren't we here more worried about the view of the
> > shared cache from any particular core and so need to treat this as an
> > SMP programming problem, in which case we do need to worry about memory
> > barriers around dependent reads and writes between processors?
> > 
> > But it is definitely more voodoo...
> 
> If it's an SMP issue, barriers won't help, we need actual
> synchronization in the form of locks or something.

Glad you agree. How are locks implemented? :)

Irrespectively of the contentious patches Daniel thought would be a good
idea, we need the first 2 to fix the mb() around fences. Poke.
-Chris
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index e069e69..133beb6 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -704,14 +704,18 @@  gen6_ring_get_seqno(struct intel_ring_buffer *ring, bool lazy_coherency)
 	/* Workaround to force correct ordering between irq and seqno writes on
 	 * ivb (and maybe also on snb) by reading from a CS register (like
 	 * ACTHD) before reading the status page. */
-	if (!lazy_coherency)
+	if (!lazy_coherency) {
 		intel_ring_get_active_head(ring);
+		mb();
+	}
 	return intel_read_status_page(ring, I915_GEM_HWS_INDEX);
 }
 
 static u32
 ring_get_seqno(struct intel_ring_buffer *ring, bool lazy_coherency)
 {
+	if (!lazy_coherency)
+		mb();
 	return intel_read_status_page(ring, I915_GEM_HWS_INDEX);
 }
 
@@ -719,6 +723,8 @@  static u32
 pc_render_get_seqno(struct intel_ring_buffer *ring, bool lazy_coherency)
 {
 	struct pipe_control *pc = ring->private;
+	if (!lazy_coherency)
+		mb();
 	return pc->cpu_page[0];
 }
 
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 2ea7a31..40b252e 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -160,8 +160,6 @@  static inline u32
 intel_read_status_page(struct intel_ring_buffer *ring,
 		       int reg)
 {
-	/* Ensure that the compiler doesn't optimize away the load. */
-	barrier();
 	return ring->status_page.page_addr[reg];
 }