diff mbox series

[4/4] drm/i915/execlists: Use coherent writes into the context image

Message ID 20180914094215.23649-4-chris@chris-wilson.co.uk (mailing list archive)
State New, archived
Headers show
Series [1/4] drm/i915/execlists: Delay updating ring register state after resume | expand

Commit Message

Chris Wilson Sept. 14, 2018, 9:42 a.m. UTC
That we use a WB mapping for updating the RING_TAIL register inside the
context image even on !llc machines has been a source of consternation
for every reader. It appears to work on bsw+, but it may just have been
that we have been incredibly bad at detecting the errors.

v2: With extra enthusiasm.
v3: Drop force of map type for pinned default_state as by the time we
pin it, the map type is always WB and doesn't conflict with the earlier
use by ce->state.
v4: Transfer engine->default_state from MAP_WC to MAP_WB on creation so
we do not need the MAP_FORCE littered around the backends

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h  | 6 ++++++
 drivers/gpu/drm/i915/i915_gem.c  | 4 +++-
 drivers/gpu/drm/i915/i915_perf.c | 3 ++-
 drivers/gpu/drm/i915/intel_lrc.c | 6 ++++--
 4 files changed, 15 insertions(+), 4 deletions(-)

Comments

Ville Syrjälä Sept. 14, 2018, 1:03 p.m. UTC | #1
On Fri, Sep 14, 2018 at 10:42:15AM +0100, Chris Wilson wrote:
> That we use a WB mapping for updating the RING_TAIL register inside the
> context image even on !llc machines has been a source of consternation
> for every reader. It appears to work on bsw+, but it may just have been
> that we have been incredibly bad at detecting the errors.

Presumably it's due to the "all ggtt accesses go through pat[0]" and
we make pat[0] snoop. So presumably the hw should snoop when loading
the context... maybe.

> 
> v2: With extra enthusiasm.
> v3: Drop force of map type for pinned default_state as by the time we
> pin it, the map type is always WB and doesn't conflict with the earlier
> use by ce->state.
> v4: Transfer engine->default_state from MAP_WC to MAP_WB on creation so
> we do not need the MAP_FORCE littered around the backends
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> ---
>  drivers/gpu/drm/i915/i915_drv.h  | 6 ++++++
>  drivers/gpu/drm/i915/i915_gem.c  | 4 +++-
>  drivers/gpu/drm/i915/i915_perf.c | 3 ++-
>  drivers/gpu/drm/i915/intel_lrc.c | 6 ++++--
>  4 files changed, 15 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index bb43e56df197..7d4daa7412f1 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -3097,6 +3097,12 @@ enum i915_map_type {
>  	I915_MAP_FORCE_WC = I915_MAP_WC | I915_MAP_OVERRIDE,
>  };
>  
> +static inline enum i915_map_type
> +i915_coherent_map_type(struct drm_i915_private *i915)
> +{
> +	return HAS_LLC(i915) ? I915_MAP_WB : I915_MAP_WC;
> +}
> +
>  /**
>   * i915_gem_object_pin_map - return a contiguous mapping of the entire object
>   * @obj: the object to map into kernel address space
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 37353afec66e..d9465bd1a00a 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -5426,6 +5426,8 @@ static int __intel_engines_record_defaults(struct drm_i915_private *i915)
>  		struct i915_vma *state;
>  		void *vaddr;
>  
> +		GEM_BUG_ON(to_intel_context(ctx, engine)->pin_count);
> +
>  		state = to_intel_context(ctx, engine)->state;
>  		if (!state)
>  			continue;
> @@ -5450,7 +5452,7 @@ static int __intel_engines_record_defaults(struct drm_i915_private *i915)
>  
>  		/* Check we can acquire the image of the context state */
>  		vaddr = i915_gem_object_pin_map(engine->default_state,
> -						I915_MAP_WB);
> +						I915_MAP_FORCE_WB);
>  		if (IS_ERR(vaddr)) {
>  			err = PTR_ERR(vaddr);
>  			goto err_active;
> diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
> index 3d7a052b4cca..664b96bb65a3 100644
> --- a/drivers/gpu/drm/i915/i915_perf.c
> +++ b/drivers/gpu/drm/i915/i915_perf.c
> @@ -1707,6 +1707,7 @@ static int gen8_configure_all_contexts(struct drm_i915_private *dev_priv,
>  				       const struct i915_oa_config *oa_config)
>  {
>  	struct intel_engine_cs *engine = dev_priv->engine[RCS];
> +	unsigned int map_type = i915_coherent_map_type(dev_priv);
>  	struct i915_gem_context *ctx;
>  	struct i915_request *rq;
>  	int ret;
> @@ -1741,7 +1742,7 @@ static int gen8_configure_all_contexts(struct drm_i915_private *dev_priv,
>  		if (!ce->state)
>  			continue;
>  
> -		regs = i915_gem_object_pin_map(ce->state->obj, I915_MAP_WB);
> +		regs = i915_gem_object_pin_map(ce->state->obj, map_type);
>  		if (IS_ERR(regs))
>  			return PTR_ERR(regs);
>  
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index d7fcbba8e982..a51be16ddaac 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -1294,7 +1294,7 @@ static int __context_pin(struct i915_gem_context *ctx, struct i915_vma *vma)
>  	 * on an active context (which by nature is already on the GPU).
>  	 */
>  	if (!(vma->flags & I915_VMA_GLOBAL_BIND)) {
> -		err = i915_gem_object_set_to_gtt_domain(vma->obj, true);
> +		err = i915_gem_object_set_to_wc_domain(vma->obj, true);
>  		if (err)
>  			return err;
>  	}
> @@ -1322,7 +1322,9 @@ __execlists_context_pin(struct intel_engine_cs *engine,
>  	if (ret)
>  		goto err;
>  
> -	vaddr = i915_gem_object_pin_map(ce->state->obj, I915_MAP_WB);
> +	vaddr = i915_gem_object_pin_map(ce->state->obj,
> +					i915_coherent_map_type(ctx->i915) |
> +					I915_MAP_OVERRIDE);
>  	if (IS_ERR(vaddr)) {
>  		ret = PTR_ERR(vaddr);
>  		goto unpin_vma;
> -- 
> 2.19.0
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Chris Wilson Sept. 14, 2018, 1:16 p.m. UTC | #2
Quoting Ville Syrjälä (2018-09-14 14:03:35)
> On Fri, Sep 14, 2018 at 10:42:15AM +0100, Chris Wilson wrote:
> > That we use a WB mapping for updating the RING_TAIL register inside the
> > context image even on !llc machines has been a source of consternation
> > for every reader. It appears to work on bsw+, but it may just have been
> > that we have been incredibly bad at detecting the errors.
> 
> Presumably it's due to the "all ggtt accesses go through pat[0]" and
> we make pat[0] snoop. So presumably the hw should snoop when loading
> the context... maybe.

Shows how much attention I pay, I thought we made pat[0] uncached. Seems
strange to suggest that we should always be snooping when reading GGTT
from the GPU.

We still have the same PTE bits for GGTT as for ppGTT, do we not?
-Chris
Ville Syrjälä Sept. 14, 2018, 1:39 p.m. UTC | #3
On Fri, Sep 14, 2018 at 02:16:12PM +0100, Chris Wilson wrote:
> Quoting Ville Syrjälä (2018-09-14 14:03:35)
> > On Fri, Sep 14, 2018 at 10:42:15AM +0100, Chris Wilson wrote:
> > > That we use a WB mapping for updating the RING_TAIL register inside the
> > > context image even on !llc machines has been a source of consternation
> > > for every reader. It appears to work on bsw+, but it may just have been
> > > that we have been incredibly bad at detecting the errors.
> > 
> > Presumably it's due to the "all ggtt accesses go through pat[0]" and
> > we make pat[0] snoop. So presumably the hw should snoop when loading
> > the context... maybe.
> 
> Shows how much attention I pay, I thought we made pat[0] uncached. Seems
> strange to suggest that we should always be snooping when reading GGTT
> from the GPU.

IIRC I did it originally that way for the status page. Sadly it's
either "all snooped" or "none snooped" due to the hw not having
wired up the bits for ggtt.

> 
> We still have the same PTE bits for GGTT as for ppGTT, do we not?

Same, except the pwt/pcd/pat bits do nothing for ggtt.
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index bb43e56df197..7d4daa7412f1 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3097,6 +3097,12 @@  enum i915_map_type {
 	I915_MAP_FORCE_WC = I915_MAP_WC | I915_MAP_OVERRIDE,
 };
 
+static inline enum i915_map_type
+i915_coherent_map_type(struct drm_i915_private *i915)
+{
+	return HAS_LLC(i915) ? I915_MAP_WB : I915_MAP_WC;
+}
+
 /**
  * i915_gem_object_pin_map - return a contiguous mapping of the entire object
  * @obj: the object to map into kernel address space
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 37353afec66e..d9465bd1a00a 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -5426,6 +5426,8 @@  static int __intel_engines_record_defaults(struct drm_i915_private *i915)
 		struct i915_vma *state;
 		void *vaddr;
 
+		GEM_BUG_ON(to_intel_context(ctx, engine)->pin_count);
+
 		state = to_intel_context(ctx, engine)->state;
 		if (!state)
 			continue;
@@ -5450,7 +5452,7 @@  static int __intel_engines_record_defaults(struct drm_i915_private *i915)
 
 		/* Check we can acquire the image of the context state */
 		vaddr = i915_gem_object_pin_map(engine->default_state,
-						I915_MAP_WB);
+						I915_MAP_FORCE_WB);
 		if (IS_ERR(vaddr)) {
 			err = PTR_ERR(vaddr);
 			goto err_active;
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 3d7a052b4cca..664b96bb65a3 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -1707,6 +1707,7 @@  static int gen8_configure_all_contexts(struct drm_i915_private *dev_priv,
 				       const struct i915_oa_config *oa_config)
 {
 	struct intel_engine_cs *engine = dev_priv->engine[RCS];
+	unsigned int map_type = i915_coherent_map_type(dev_priv);
 	struct i915_gem_context *ctx;
 	struct i915_request *rq;
 	int ret;
@@ -1741,7 +1742,7 @@  static int gen8_configure_all_contexts(struct drm_i915_private *dev_priv,
 		if (!ce->state)
 			continue;
 
-		regs = i915_gem_object_pin_map(ce->state->obj, I915_MAP_WB);
+		regs = i915_gem_object_pin_map(ce->state->obj, map_type);
 		if (IS_ERR(regs))
 			return PTR_ERR(regs);
 
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index d7fcbba8e982..a51be16ddaac 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -1294,7 +1294,7 @@  static int __context_pin(struct i915_gem_context *ctx, struct i915_vma *vma)
 	 * on an active context (which by nature is already on the GPU).
 	 */
 	if (!(vma->flags & I915_VMA_GLOBAL_BIND)) {
-		err = i915_gem_object_set_to_gtt_domain(vma->obj, true);
+		err = i915_gem_object_set_to_wc_domain(vma->obj, true);
 		if (err)
 			return err;
 	}
@@ -1322,7 +1322,9 @@  __execlists_context_pin(struct intel_engine_cs *engine,
 	if (ret)
 		goto err;
 
-	vaddr = i915_gem_object_pin_map(ce->state->obj, I915_MAP_WB);
+	vaddr = i915_gem_object_pin_map(ce->state->obj,
+					i915_coherent_map_type(ctx->i915) |
+					I915_MAP_OVERRIDE);
 	if (IS_ERR(vaddr)) {
 		ret = PTR_ERR(vaddr);
 		goto unpin_vma;