diff mbox

[09/10] drm/i915: flush system agent TLBs on SNB

Message ID 1350956055-3224-10-git-send-email-ben@bwidawsk.net (mailing list archive)
State New, archived
Headers show

Commit Message

Ben Widawsky Oct. 23, 2012, 1:34 a.m. UTC
This allows us to map the PTEs WC. I've not done thorough testing or
performance measurements with this patch, but it should be decent.

This is based on a patch from Jesse with the original commit message
> I've only lightly tested this so far, but the corruption seems to be
> gone if I write the GFX_FLSH_CNTL reg after binding an object.  This
> register should control the TLB for the system agent, which is what CPU
> mapped objects will go through.

It has been updated for the new AGP-less code by me, and included with
it is feedback from the original patch.

CC: Jesse Barnes <jbarnes@virtuousgeek.org>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
---
 drivers/gpu/drm/i915/i915_gem_gtt.c | 5 +++--
 drivers/gpu/drm/i915/i915_reg.h     | 2 ++
 2 files changed, 5 insertions(+), 2 deletions(-)

Comments

Jesse Barnes Oct. 23, 2012, 2:17 p.m. UTC | #1
On Mon, 22 Oct 2012 18:34:14 -0700
Ben Widawsky <ben@bwidawsk.net> wrote:

> This allows us to map the PTEs WC. I've not done thorough testing or
> performance measurements with this patch, but it should be decent.
> 
> This is based on a patch from Jesse with the original commit message
> > I've only lightly tested this so far, but the corruption seems to be
> > gone if I write the GFX_FLSH_CNTL reg after binding an object.  This
> > register should control the TLB for the system agent, which is what CPU
> > mapped objects will go through.
> 
> It has been updated for the new AGP-less code by me, and included with
> it is feedback from the original patch.
> 
> CC: Jesse Barnes <jbarnes@virtuousgeek.org>
> Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
> ---
>  drivers/gpu/drm/i915/i915_gem_gtt.c | 5 +++--
>  drivers/gpu/drm/i915/i915_reg.h     | 2 ++
>  2 files changed, 5 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> index 16fe960..e5f0a7f 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> @@ -391,6 +391,7 @@ static void gen6_ggtt_bind_object(struct drm_i915_gem_object *obj,
>  	}
>  
>  out:
> +	I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
>  	/* XXX: This serves as a posting read preserving the way the old code
>  	 * works. It's not clear if this is strictly necessary or just voodoo
>  	 * based on what I've tried to gather from the docs.
> @@ -593,8 +594,8 @@ int i915_gem_gtt_init(struct drm_device *dev)
>  	if (ret)
>  		return ret;
>  
> -	dev_priv->mm.gtt->gtt = ioremap(gtt_bus_addr,
> -					dev_priv->mm.gtt->gtt_total_entries * sizeof(gtt_pte_t));
> +	dev_priv->mm.gtt->gtt = ioremap_wc(gtt_bus_addr,
> +					   dev_priv->mm.gtt->gtt_total_entries * sizeof(gtt_pte_t));
>  
>  	/* GMADR is the PCI aperture used by SW to access tiled GFX surfaces in a linear fashion. */
>  	DRM_DEBUG_DRIVER("GMADR size = %dM\n", dev_priv->mm.gtt->gtt_mappable_entries >> 8);
> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
> index 1d54328..e8c578f 100644
> --- a/drivers/gpu/drm/i915/i915_reg.h
> +++ b/drivers/gpu/drm/i915/i915_reg.h
> @@ -683,6 +683,8 @@
>  #define   CM0_RC_OP_FLUSH_DISABLE (1<<0)
>  #define BB_ADDR		0x02140 /* 8 bytes */
>  #define GFX_FLSH_CNTL	0x02170 /* 915+ only */
> +#define GFX_FLSH_CNTL_GEN6	0x101008
> +#define   GFX_FLSH_CNTL_EN	(1<<0)
>  #define ECOSKPD		0x021d0
>  #define   ECO_GATING_CX_ONLY	(1<<3)
>  #define   ECO_FLIP_DONE		(1<<0)

Looks good.  Has anyone tried this on gen3-5?

Reviewed-by: Jesse Barnes <jbarnes@virtuousgeek.org>
Daniel Vetter Oct. 23, 2012, 2:28 p.m. UTC | #2
On Tue, Oct 23, 2012 at 07:17:53AM -0700, Jesse Barnes wrote:
> On Mon, 22 Oct 2012 18:34:14 -0700
> Ben Widawsky <ben@bwidawsk.net> wrote:
> 
> > This allows us to map the PTEs WC. I've not done thorough testing or
> > performance measurements with this patch, but it should be decent.
> > 
> > This is based on a patch from Jesse with the original commit message
> > > I've only lightly tested this so far, but the corruption seems to be
> > > gone if I write the GFX_FLSH_CNTL reg after binding an object.  This
> > > register should control the TLB for the system agent, which is what CPU
> > > mapped objects will go through.
> > 
> > It has been updated for the new AGP-less code by me, and included with
> > it is feedback from the original patch.
> > 
> > CC: Jesse Barnes <jbarnes@virtuousgeek.org>
> > Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
> > ---
> >  drivers/gpu/drm/i915/i915_gem_gtt.c | 5 +++--
> >  drivers/gpu/drm/i915/i915_reg.h     | 2 ++
> >  2 files changed, 5 insertions(+), 2 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> > index 16fe960..e5f0a7f 100644
> > --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> > +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> > @@ -391,6 +391,7 @@ static void gen6_ggtt_bind_object(struct drm_i915_gem_object *obj,
> >  	}
> >  
> >  out:
> > +	I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
> >  	/* XXX: This serves as a posting read preserving the way the old code
> >  	 * works. It's not clear if this is strictly necessary or just voodoo
> >  	 * based on what I've tried to gather from the docs.
> > @@ -593,8 +594,8 @@ int i915_gem_gtt_init(struct drm_device *dev)
> >  	if (ret)
> >  		return ret;
> >  
> > -	dev_priv->mm.gtt->gtt = ioremap(gtt_bus_addr,
> > -					dev_priv->mm.gtt->gtt_total_entries * sizeof(gtt_pte_t));
> > +	dev_priv->mm.gtt->gtt = ioremap_wc(gtt_bus_addr,
> > +					   dev_priv->mm.gtt->gtt_total_entries * sizeof(gtt_pte_t));
> >  
> >  	/* GMADR is the PCI aperture used by SW to access tiled GFX surfaces in a linear fashion. */
> >  	DRM_DEBUG_DRIVER("GMADR size = %dM\n", dev_priv->mm.gtt->gtt_mappable_entries >> 8);
> > diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
> > index 1d54328..e8c578f 100644
> > --- a/drivers/gpu/drm/i915/i915_reg.h
> > +++ b/drivers/gpu/drm/i915/i915_reg.h
> > @@ -683,6 +683,8 @@
> >  #define   CM0_RC_OP_FLUSH_DISABLE (1<<0)
> >  #define BB_ADDR		0x02140 /* 8 bytes */
> >  #define GFX_FLSH_CNTL	0x02170 /* 915+ only */
> > +#define GFX_FLSH_CNTL_GEN6	0x101008
> > +#define   GFX_FLSH_CNTL_EN	(1<<0)
> >  #define ECOSKPD		0x021d0
> >  #define   ECO_GATING_CX_ONLY	(1<<3)
> >  #define   ECO_FLIP_DONE		(1<<0)
> 
> Looks good.  Has anyone tried this on gen3-5?

pre-gen6 still use the intel_gtt code, since we need to for the next
decade or so to keep ums (and crappy xvmc on gen3) going.
-Daniel
Jesse Barnes Oct. 23, 2012, 2:35 p.m. UTC | #3
On Tue, 23 Oct 2012 16:28:53 +0200
Daniel Vetter <daniel@ffwll.ch> wrote:

> On Tue, Oct 23, 2012 at 07:17:53AM -0700, Jesse Barnes wrote:
> > On Mon, 22 Oct 2012 18:34:14 -0700
> > Ben Widawsky <ben@bwidawsk.net> wrote:
> > 
> > > This allows us to map the PTEs WC. I've not done thorough testing or
> > > performance measurements with this patch, but it should be decent.
> > > 
> > > This is based on a patch from Jesse with the original commit message
> > > > I've only lightly tested this so far, but the corruption seems to be
> > > > gone if I write the GFX_FLSH_CNTL reg after binding an object.  This
> > > > register should control the TLB for the system agent, which is what CPU
> > > > mapped objects will go through.
> > > 
> > > It has been updated for the new AGP-less code by me, and included with
> > > it is feedback from the original patch.
> > > 
> > > CC: Jesse Barnes <jbarnes@virtuousgeek.org>
> > > Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
> > > ---
> > >  drivers/gpu/drm/i915/i915_gem_gtt.c | 5 +++--
> > >  drivers/gpu/drm/i915/i915_reg.h     | 2 ++
> > >  2 files changed, 5 insertions(+), 2 deletions(-)
> > > 
> > > diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> > > index 16fe960..e5f0a7f 100644
> > > --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> > > +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> > > @@ -391,6 +391,7 @@ static void gen6_ggtt_bind_object(struct drm_i915_gem_object *obj,
> > >  	}
> > >  
> > >  out:
> > > +	I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
> > >  	/* XXX: This serves as a posting read preserving the way the old code
> > >  	 * works. It's not clear if this is strictly necessary or just voodoo
> > >  	 * based on what I've tried to gather from the docs.
> > > @@ -593,8 +594,8 @@ int i915_gem_gtt_init(struct drm_device *dev)
> > >  	if (ret)
> > >  		return ret;
> > >  
> > > -	dev_priv->mm.gtt->gtt = ioremap(gtt_bus_addr,
> > > -					dev_priv->mm.gtt->gtt_total_entries * sizeof(gtt_pte_t));
> > > +	dev_priv->mm.gtt->gtt = ioremap_wc(gtt_bus_addr,
> > > +					   dev_priv->mm.gtt->gtt_total_entries * sizeof(gtt_pte_t));
> > >  
> > >  	/* GMADR is the PCI aperture used by SW to access tiled GFX surfaces in a linear fashion. */
> > >  	DRM_DEBUG_DRIVER("GMADR size = %dM\n", dev_priv->mm.gtt->gtt_mappable_entries >> 8);
> > > diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
> > > index 1d54328..e8c578f 100644
> > > --- a/drivers/gpu/drm/i915/i915_reg.h
> > > +++ b/drivers/gpu/drm/i915/i915_reg.h
> > > @@ -683,6 +683,8 @@
> > >  #define   CM0_RC_OP_FLUSH_DISABLE (1<<0)
> > >  #define BB_ADDR		0x02140 /* 8 bytes */
> > >  #define GFX_FLSH_CNTL	0x02170 /* 915+ only */
> > > +#define GFX_FLSH_CNTL_GEN6	0x101008
> > > +#define   GFX_FLSH_CNTL_EN	(1<<0)
> > >  #define ECOSKPD		0x021d0
> > >  #define   ECO_GATING_CX_ONLY	(1<<3)
> > >  #define   ECO_FLIP_DONE		(1<<0)
> > 
> > Looks good.  Has anyone tried this on gen3-5?
> 
> pre-gen6 still use the intel_gtt code, since we need to for the next
> decade or so to keep ums (and crappy xvmc on gen3) going.

Flush needs to happen somewhere though...
Ben Widawsky Oct. 23, 2012, 2:42 p.m. UTC | #4
On Tue, 23 Oct 2012 07:35:48 -0700
Jesse Barnes <jbarnes@virtuousgeek.org> wrote:

> On Tue, 23 Oct 2012 16:28:53 +0200
> Daniel Vetter <daniel@ffwll.ch> wrote:
> 
> > On Tue, Oct 23, 2012 at 07:17:53AM -0700, Jesse Barnes wrote:
> > > On Mon, 22 Oct 2012 18:34:14 -0700
> > > Ben Widawsky <ben@bwidawsk.net> wrote:
> > > 
> > > > This allows us to map the PTEs WC. I've not done thorough
> > > > testing or performance measurements with this patch, but it
> > > > should be decent.
> > > > 
> > > > This is based on a patch from Jesse with the original commit
> > > > message
> > > > > I've only lightly tested this so far, but the corruption
> > > > > seems to be gone if I write the GFX_FLSH_CNTL reg after
> > > > > binding an object.  This register should control the TLB for
> > > > > the system agent, which is what CPU mapped objects will go
> > > > > through.
> > > > 
> > > > It has been updated for the new AGP-less code by me, and
> > > > included with it is feedback from the original patch.
> > > > 
> > > > CC: Jesse Barnes <jbarnes@virtuousgeek.org>
> > > > Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
> > > > ---
> > > >  drivers/gpu/drm/i915/i915_gem_gtt.c | 5 +++--
> > > >  drivers/gpu/drm/i915/i915_reg.h     | 2 ++
> > > >  2 files changed, 5 insertions(+), 2 deletions(-)
> > > > 
> > > > diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c
> > > > b/drivers/gpu/drm/i915/i915_gem_gtt.c index 16fe960..e5f0a7f
> > > > 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> > > > +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> > > > @@ -391,6 +391,7 @@ static void gen6_ggtt_bind_object(struct
> > > > drm_i915_gem_object *obj, }
> > > >  
> > > >  out:
> > > > +	I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
> > > >  	/* XXX: This serves as a posting read preserving the
> > > > way the old code
> > > >  	 * works. It's not clear if this is strictly necessary
> > > > or just voodoo
> > > >  	 * based on what I've tried to gather from the docs.
> > > > @@ -593,8 +594,8 @@ int i915_gem_gtt_init(struct drm_device
> > > > *dev) if (ret)
> > > >  		return ret;
> > > >  
> > > > -	dev_priv->mm.gtt->gtt = ioremap(gtt_bus_addr,
> > > > -
> > > > dev_priv->mm.gtt->gtt_total_entries * sizeof(gtt_pte_t));
> > > > +	dev_priv->mm.gtt->gtt = ioremap_wc(gtt_bus_addr,
> > > > +
> > > > dev_priv->mm.gtt->gtt_total_entries * sizeof(gtt_pte_t)); 
> > > >  	/* GMADR is the PCI aperture used by SW to access
> > > > tiled GFX surfaces in a linear fashion. */
> > > > DRM_DEBUG_DRIVER("GMADR size = %dM\n",
> > > > dev_priv->mm.gtt->gtt_mappable_entries >> 8); diff --git
> > > > a/drivers/gpu/drm/i915/i915_reg.h
> > > > b/drivers/gpu/drm/i915/i915_reg.h index 1d54328..e8c578f 100644
> > > > --- a/drivers/gpu/drm/i915/i915_reg.h +++
> > > > b/drivers/gpu/drm/i915/i915_reg.h @@ -683,6 +683,8 @@ #define
> > > > CM0_RC_OP_FLUSH_DISABLE (1<<0) #define BB_ADDR
> > > > 0x02140 /* 8 bytes */ #define GFX_FLSH_CNTL	0x02170 /*
> > > > 915+ only */ +#define GFX_FLSH_CNTL_GEN6	0x101008
> > > > +#define   GFX_FLSH_CNTL_EN	(1<<0)
> > > >  #define ECOSKPD		0x021d0
> > > >  #define   ECO_GATING_CX_ONLY	(1<<3)
> > > >  #define   ECO_FLIP_DONE		(1<<0)
> > > 
> > > Looks good.  Has anyone tried this on gen3-5?
> > 
> > pre-gen6 still use the intel_gtt code, since we need to for the next
> > decade or so to keep ums (and crappy xvmc on gen3) going.
> 
> Flush needs to happen somewhere though...
> 

I thought pre-gen6 was working already?
Jesse Barnes Oct. 25, 2012, 9:07 p.m. UTC | #5
On Mon, 22 Oct 2012 18:34:14 -0700
Ben Widawsky <ben@bwidawsk.net> wrote:

> This allows us to map the PTEs WC. I've not done thorough testing or
> performance measurements with this patch, but it should be decent.
> 
> This is based on a patch from Jesse with the original commit message
> > I've only lightly tested this so far, but the corruption seems to be
> > gone if I write the GFX_FLSH_CNTL reg after binding an object.  This
> > register should control the TLB for the system agent, which is what CPU
> > mapped objects will go through.
> 
> It has been updated for the new AGP-less code by me, and included with
> it is feedback from the original patch.
> 
> CC: Jesse Barnes <jbarnes@virtuousgeek.org>
> Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
> ---
>  drivers/gpu/drm/i915/i915_gem_gtt.c | 5 +++--
>  drivers/gpu/drm/i915/i915_reg.h     | 2 ++
>  2 files changed, 5 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> index 16fe960..e5f0a7f 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> @@ -391,6 +391,7 @@ static void gen6_ggtt_bind_object(struct drm_i915_gem_object *obj,
>  	}
>  
>  out:
> +	I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
>  	/* XXX: This serves as a posting read preserving the way the old code
>  	 * works. It's not clear if this is strictly necessary or just voodoo
>  	 * based on what I've tried to gather from the docs.
> @@ -593,8 +594,8 @@ int i915_gem_gtt_init(struct drm_device *dev)
>  	if (ret)
>  		return ret;
>  
> -	dev_priv->mm.gtt->gtt = ioremap(gtt_bus_addr,
> -					dev_priv->mm.gtt->gtt_total_entries * sizeof(gtt_pte_t));
> +	dev_priv->mm.gtt->gtt = ioremap_wc(gtt_bus_addr,
> +					   dev_priv->mm.gtt->gtt_total_entries * sizeof(gtt_pte_t));
>  
>  	/* GMADR is the PCI aperture used by SW to access tiled GFX surfaces in a linear fashion. */
>  	DRM_DEBUG_DRIVER("GMADR size = %dM\n", dev_priv->mm.gtt->gtt_mappable_entries >> 8);
> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
> index 1d54328..e8c578f 100644
> --- a/drivers/gpu/drm/i915/i915_reg.h
> +++ b/drivers/gpu/drm/i915/i915_reg.h
> @@ -683,6 +683,8 @@
>  #define   CM0_RC_OP_FLUSH_DISABLE (1<<0)
>  #define BB_ADDR		0x02140 /* 8 bytes */
>  #define GFX_FLSH_CNTL	0x02170 /* 915+ only */
> +#define GFX_FLSH_CNTL_GEN6	0x101008
> +#define   GFX_FLSH_CNTL_EN	(1<<0)
>  #define ECOSKPD		0x021d0
>  #define   ECO_GATING_CX_ONLY	(1<<3)
>  #define   ECO_FLIP_DONE		(1<<0)

Reviewed-by: Jesse Barnes <jbarnes@virtuousgeek.org>
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 16fe960..e5f0a7f 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -391,6 +391,7 @@  static void gen6_ggtt_bind_object(struct drm_i915_gem_object *obj,
 	}
 
 out:
+	I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
 	/* XXX: This serves as a posting read preserving the way the old code
 	 * works. It's not clear if this is strictly necessary or just voodoo
 	 * based on what I've tried to gather from the docs.
@@ -593,8 +594,8 @@  int i915_gem_gtt_init(struct drm_device *dev)
 	if (ret)
 		return ret;
 
-	dev_priv->mm.gtt->gtt = ioremap(gtt_bus_addr,
-					dev_priv->mm.gtt->gtt_total_entries * sizeof(gtt_pte_t));
+	dev_priv->mm.gtt->gtt = ioremap_wc(gtt_bus_addr,
+					   dev_priv->mm.gtt->gtt_total_entries * sizeof(gtt_pte_t));
 
 	/* GMADR is the PCI aperture used by SW to access tiled GFX surfaces in a linear fashion. */
 	DRM_DEBUG_DRIVER("GMADR size = %dM\n", dev_priv->mm.gtt->gtt_mappable_entries >> 8);
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 1d54328..e8c578f 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -683,6 +683,8 @@ 
 #define   CM0_RC_OP_FLUSH_DISABLE (1<<0)
 #define BB_ADDR		0x02140 /* 8 bytes */
 #define GFX_FLSH_CNTL	0x02170 /* 915+ only */
+#define GFX_FLSH_CNTL_GEN6	0x101008
+#define   GFX_FLSH_CNTL_EN	(1<<0)
 #define ECOSKPD		0x021d0
 #define   ECO_GATING_CX_ONLY	(1<<3)
 #define   ECO_FLIP_DONE		(1<<0)