diff mbox

[6/8] drm/i915: Make l3 remapping use the ring

Message ID 1379050122-12774-7-git-send-email-benjamin.widawsky@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Ben Widawsky Sept. 13, 2013, 5:28 a.m. UTC
Using LRI for setting the remapping registers allows us to stream l3
remapping information. This is necessary to handle per context remaps as
we'll see implemented in an upcoming patch.

Using the ring also means we don't need to frob the DOP clock gating
bits.

Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
---
 drivers/gpu/drm/i915/i915_drv.h   |  2 +-
 drivers/gpu/drm/i915/i915_gem.c   | 39 +++++++++++++++++----------------------
 drivers/gpu/drm/i915/i915_sysfs.c |  3 ++-
 3 files changed, 20 insertions(+), 24 deletions(-)

Comments

Daniel Vetter Sept. 13, 2013, 4:16 p.m. UTC | #1
On Thu, Sep 12, 2013 at 10:28:32PM -0700, Ben Widawsky wrote:
> Using LRI for setting the remapping registers allows us to stream l3
> remapping information. This is necessary to handle per context remaps as
> we'll see implemented in an upcoming patch.
> 
> Using the ring also means we don't need to frob the DOP clock gating
> bits.
> 
> Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
> ---
>  drivers/gpu/drm/i915/i915_drv.h   |  2 +-
>  drivers/gpu/drm/i915/i915_gem.c   | 39 +++++++++++++++++----------------------
>  drivers/gpu/drm/i915/i915_sysfs.c |  3 ++-
>  3 files changed, 20 insertions(+), 24 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index eb90461..493a9cd 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -1950,7 +1950,7 @@ bool i915_gem_clflush_object(struct drm_i915_gem_object *obj, bool force);
>  int __must_check i915_gem_object_finish_gpu(struct drm_i915_gem_object *obj);
>  int __must_check i915_gem_init(struct drm_device *dev);
>  int __must_check i915_gem_init_hw(struct drm_device *dev);
> -void i915_gem_l3_remap(struct drm_device *dev, int slice);
> +int i915_gem_l3_remap(struct intel_ring_buffer *ring, int slice);
>  void i915_gem_init_swizzling(struct drm_device *dev);
>  void i915_gem_cleanup_ringbuffer(struct drm_device *dev);
>  int __must_check i915_gpu_idle(struct drm_device *dev);
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index b11f7d6c..fa01c69 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -4256,41 +4256,36 @@ i915_gem_idle(struct drm_device *dev)
>  	return 0;
>  }
>  
> -void i915_gem_l3_remap(struct drm_device *dev, int slice)
> +int i915_gem_l3_remap(struct intel_ring_buffer *ring, int slice)
>  {
> +	struct drm_device *dev = ring->dev;
>  	drm_i915_private_t *dev_priv = dev->dev_private;
>  	u32 reg_base = GEN7_L3LOG_BASE + (slice * 0x200);
>  	u32 *remap_info = dev_priv->l3_parity.remap_info[slice];
> -	u32 misccpctl;
> -	int i;
> +	int i, ret;
>  
>  	if (!HAS_L3_GPU_CACHE(dev))
> -		return;
> +		return 0;
>  
>  	if (NUM_L3_SLICES(dev) < 2 && slice)
> -		return;
> +		return 0;
>  
>  	if (!remap_info)
> -		return;
> +		return 0;
>  
> -	misccpctl = I915_READ(GEN7_MISCCPCTL);
> -	I915_WRITE(GEN7_MISCCPCTL, misccpctl & ~GEN7_DOP_CLOCK_GATE_ENABLE);
> -	POSTING_READ(GEN7_MISCCPCTL);
> +	ret = intel_ring_begin(ring, GEN7_L3LOG_SIZE / 4 * 3);
> +	if (ret)
> +		return ret;
>  
>  	for (i = 0; i < GEN7_L3LOG_SIZE; i += 4) {
> -		u32 remap = I915_READ(reg_base + i);
> -		if (remap && remap != remap_info[i/4])
> -			DRM_DEBUG("0x%x was already programmed to %x\n",
> -				  reg_base + i, remap);
> -		if (remap && !remap_info[i/4])
> -			DRM_DEBUG_DRIVER("Clearing remapped register\n");
> -		I915_WRITE(reg_base + i, remap_info[i/4]);
> +		intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
> +		intel_ring_emit(ring, reg_base + i);
> +		intel_ring_emit(ring, remap_info[i/4]);

I think a comment here explaining that on haswell we don't ever read back
this register range and hence should be safe for concurrent register
access would be good. Or is this not a concern here?
-Daniel

>  	}
>  
> -	/* Make sure all the writes land before disabling dop clock gating */
> -	POSTING_READ(reg_base);
> +	intel_ring_advance(ring);
>  
> -	I915_WRITE(GEN7_MISCCPCTL, misccpctl);
> +	return ret;
>  }
>  
>  void i915_gem_init_swizzling(struct drm_device *dev)
> @@ -4401,15 +4396,15 @@ i915_gem_init_hw(struct drm_device *dev)
>  		I915_WRITE(GEN7_MSG_CTL, temp);
>  	}
>  
> -	for (i = 0; i < NUM_L3_SLICES(dev); i++)
> -		i915_gem_l3_remap(dev, i);
> -
>  	i915_gem_init_swizzling(dev);
>  
>  	ret = i915_gem_init_rings(dev);
>  	if (ret)
>  		return ret;
>  
> +	for (i = 0; i < NUM_L3_SLICES(dev); i++)
> +		i915_gem_l3_remap(&dev_priv->ring[RCS], i);
> +
>  	/*
>  	 * XXX: There was some w/a described somewhere suggesting loading
>  	 * contexts before PPGTT.
> diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c
> index d208f2d..65a7274 100644
> --- a/drivers/gpu/drm/i915/i915_sysfs.c
> +++ b/drivers/gpu/drm/i915/i915_sysfs.c
> @@ -206,7 +206,8 @@ i915_l3_write(struct file *filp, struct kobject *kobj,
>  
>  	memcpy(dev_priv->l3_parity.remap_info[slice] + (offset/4), buf, count);
>  
> -	i915_gem_l3_remap(drm_dev, slice);
> +	if (i915_gem_l3_remap(&dev_priv->ring[RCS], slice))
> +		count = 0;
>  
>  	mutex_unlock(&drm_dev->struct_mutex);
>  
> -- 
> 1.8.4
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index eb90461..493a9cd 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1950,7 +1950,7 @@  bool i915_gem_clflush_object(struct drm_i915_gem_object *obj, bool force);
 int __must_check i915_gem_object_finish_gpu(struct drm_i915_gem_object *obj);
 int __must_check i915_gem_init(struct drm_device *dev);
 int __must_check i915_gem_init_hw(struct drm_device *dev);
-void i915_gem_l3_remap(struct drm_device *dev, int slice);
+int i915_gem_l3_remap(struct intel_ring_buffer *ring, int slice);
 void i915_gem_init_swizzling(struct drm_device *dev);
 void i915_gem_cleanup_ringbuffer(struct drm_device *dev);
 int __must_check i915_gpu_idle(struct drm_device *dev);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index b11f7d6c..fa01c69 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -4256,41 +4256,36 @@  i915_gem_idle(struct drm_device *dev)
 	return 0;
 }
 
-void i915_gem_l3_remap(struct drm_device *dev, int slice)
+int i915_gem_l3_remap(struct intel_ring_buffer *ring, int slice)
 {
+	struct drm_device *dev = ring->dev;
 	drm_i915_private_t *dev_priv = dev->dev_private;
 	u32 reg_base = GEN7_L3LOG_BASE + (slice * 0x200);
 	u32 *remap_info = dev_priv->l3_parity.remap_info[slice];
-	u32 misccpctl;
-	int i;
+	int i, ret;
 
 	if (!HAS_L3_GPU_CACHE(dev))
-		return;
+		return 0;
 
 	if (NUM_L3_SLICES(dev) < 2 && slice)
-		return;
+		return 0;
 
 	if (!remap_info)
-		return;
+		return 0;
 
-	misccpctl = I915_READ(GEN7_MISCCPCTL);
-	I915_WRITE(GEN7_MISCCPCTL, misccpctl & ~GEN7_DOP_CLOCK_GATE_ENABLE);
-	POSTING_READ(GEN7_MISCCPCTL);
+	ret = intel_ring_begin(ring, GEN7_L3LOG_SIZE / 4 * 3);
+	if (ret)
+		return ret;
 
 	for (i = 0; i < GEN7_L3LOG_SIZE; i += 4) {
-		u32 remap = I915_READ(reg_base + i);
-		if (remap && remap != remap_info[i/4])
-			DRM_DEBUG("0x%x was already programmed to %x\n",
-				  reg_base + i, remap);
-		if (remap && !remap_info[i/4])
-			DRM_DEBUG_DRIVER("Clearing remapped register\n");
-		I915_WRITE(reg_base + i, remap_info[i/4]);
+		intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
+		intel_ring_emit(ring, reg_base + i);
+		intel_ring_emit(ring, remap_info[i/4]);
 	}
 
-	/* Make sure all the writes land before disabling dop clock gating */
-	POSTING_READ(reg_base);
+	intel_ring_advance(ring);
 
-	I915_WRITE(GEN7_MISCCPCTL, misccpctl);
+	return ret;
 }
 
 void i915_gem_init_swizzling(struct drm_device *dev)
@@ -4401,15 +4396,15 @@  i915_gem_init_hw(struct drm_device *dev)
 		I915_WRITE(GEN7_MSG_CTL, temp);
 	}
 
-	for (i = 0; i < NUM_L3_SLICES(dev); i++)
-		i915_gem_l3_remap(dev, i);
-
 	i915_gem_init_swizzling(dev);
 
 	ret = i915_gem_init_rings(dev);
 	if (ret)
 		return ret;
 
+	for (i = 0; i < NUM_L3_SLICES(dev); i++)
+		i915_gem_l3_remap(&dev_priv->ring[RCS], i);
+
 	/*
 	 * XXX: There was some w/a described somewhere suggesting loading
 	 * contexts before PPGTT.
diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c
index d208f2d..65a7274 100644
--- a/drivers/gpu/drm/i915/i915_sysfs.c
+++ b/drivers/gpu/drm/i915/i915_sysfs.c
@@ -206,7 +206,8 @@  i915_l3_write(struct file *filp, struct kobject *kobj,
 
 	memcpy(dev_priv->l3_parity.remap_info[slice] + (offset/4), buf, count);
 
-	i915_gem_l3_remap(drm_dev, slice);
+	if (i915_gem_l3_remap(&dev_priv->ring[RCS], slice))
+		count = 0;
 
 	mutex_unlock(&drm_dev->struct_mutex);