diff mbox

[8/8] drm/i915: Do remaps for all contexts

Message ID 1379050122-12774-9-git-send-email-benjamin.widawsky@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Ben Widawsky Sept. 13, 2013, 5:28 a.m. UTC
On both Ivybridge and Haswell, row remapping information is saved and
restored with context. This means, we never actually properly supported
the l3 remapping because our sysfs interface is asynchronous (and not
tied to any context), and the known faulty HW would be reused by the
next context to run.

Not that due to the asynchronous nature of the sysfs entry, there is no
point modifying the registers for the existing context. Instead we set a
flag for all contexts to load the correct remapping information on the
next run. Interested clients can use debugfs to determine whether or not
the row has been remapped.

One could propose at this point that we just do the remapping in the
kernel. I guess since we have to maintain the sysfs interface anyway,
I'm not sure how useful it is, and I do like keeping the policy in
userspace; (it wasn't my original decision to make the
interface the way it is, so I'm not attached).

Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
---
 drivers/gpu/drm/i915/i915_debugfs.c     |  8 +++++++
 drivers/gpu/drm/i915/i915_drv.h         |  1 +
 drivers/gpu/drm/i915/i915_gem_context.c | 17 +++++++++++++--
 drivers/gpu/drm/i915/i915_sysfs.c       | 38 +++++++++++----------------------
 4 files changed, 36 insertions(+), 28 deletions(-)

Comments

Ville Syrjala Sept. 13, 2013, 9:17 a.m. UTC | #1
On Thu, Sep 12, 2013 at 10:28:34PM -0700, Ben Widawsky wrote:
> On both Ivybridge and Haswell, row remapping information is saved and
> restored with context. This means, we never actually properly supported
> the l3 remapping because our sysfs interface is asynchronous (and not
> tied to any context), and the known faulty HW would be reused by the
> next context to run.
> 
> Not that due to the asynchronous nature of the sysfs entry, there is no
> point modifying the registers for the existing context. Instead we set a
> flag for all contexts to load the correct remapping information on the
> next run. Interested clients can use debugfs to determine whether or not
> the row has been remapped.
> 
> One could propose at this point that we just do the remapping in the
> kernel. I guess since we have to maintain the sysfs interface anyway,
> I'm not sure how useful it is, and I do like keeping the policy in
> userspace; (it wasn't my original decision to make the
> interface the way it is, so I'm not attached).
> 
> Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
> ---
>  drivers/gpu/drm/i915/i915_debugfs.c     |  8 +++++++
>  drivers/gpu/drm/i915/i915_drv.h         |  1 +
>  drivers/gpu/drm/i915/i915_gem_context.c | 17 +++++++++++++--
>  drivers/gpu/drm/i915/i915_sysfs.c       | 38 +++++++++++----------------------
>  4 files changed, 36 insertions(+), 28 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index ada0950..80bed69 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -145,6 +145,13 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj)
>  		seq_printf(m, " (%s)", obj->ring->name);
>  }
>  
> +static void describe_ctx(struct seq_file *m, struct i915_hw_context *ctx)
> +{
> +	seq_putc(m, ctx->is_initialized ? 'I' : 'i');
> +	seq_putc(m, ctx->remap_slice ? 'R' : 'r');
> +	seq_putc(m, ' ');
> +}
> +
>  static int i915_gem_object_list_info(struct seq_file *m, void *data)
>  {
>  	struct drm_info_node *node = (struct drm_info_node *) m->private;
> @@ -1463,6 +1470,7 @@ static int i915_context_status(struct seq_file *m, void *unused)
>  
>  	list_for_each_entry(ctx, &dev_priv->context_list, link) {
>  		seq_puts(m, "HW context ");
> +		describe_ctx(m, ctx);
>  		for_each_ring(ring, dev_priv, i)
>  			if (ring->default_context == ctx)
>  				seq_printf(m, "(default context %s) ", ring->name);
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 68f992b..6ba78cd 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -602,6 +602,7 @@ struct i915_hw_context {
>  	struct kref ref;
>  	int id;
>  	bool is_initialized;
> +	uint8_t remap_slice;
>  	struct drm_i915_file_private *file_priv;
>  	struct intel_ring_buffer *ring;
>  	struct drm_i915_gem_object *obj;
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
> index 2bbdce8..72b7202 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
> @@ -140,7 +140,7 @@ create_hw_context(struct drm_device *dev,
>  {
>  	struct drm_i915_private *dev_priv = dev->dev_private;
>  	struct i915_hw_context *ctx;
> -	int ret;
> +	int ret, i;
>  
>  	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
>  	if (ctx == NULL)
> @@ -181,6 +181,8 @@ create_hw_context(struct drm_device *dev,
>  
>  	ctx->file_priv = file_priv;
>  	ctx->id = ret;
> +	for (i = 0; i < NUM_L3_SLICES(dev); i++)
> +		ctx->remap_slice |= 1 << 1;
                                         ^
i

>  
>  	return ctx;
>  
> @@ -396,7 +398,7 @@ static int do_switch(struct i915_hw_context *to)
>  	struct intel_ring_buffer *ring = to->ring;
>  	struct i915_hw_context *from = ring->last_context;
>  	u32 hw_flags = 0;
> -	int ret;
> +	int ret, i;
>  
>  	BUG_ON(from != NULL && from->obj != NULL && from->obj->pin_count == 0);
>  
> @@ -432,6 +434,17 @@ static int do_switch(struct i915_hw_context *to)
>  		return ret;
>  	}
>  
> +	for (i = 0; i < MAX_L3_SLICES; i++) {
> +		if (!(to->remap_slice & (1<<i)))
> +			continue;
> +
> +		ret = i915_gem_l3_remap(ring, i);
> +		if (!ret) {
> +			to->remap_slice &= ~(1<<i);
> +		}
> +		/* If it failed, try again next round */
> +	}
> +
>  	/* The backing object for the context is done after switching to the
>  	 * *next* context. Therefore we cannot retire the previous context until
>  	 * the next context has already started running. In fact, the below code
> diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c
> index 65a7274..f0d7e1c 100644
> --- a/drivers/gpu/drm/i915/i915_sysfs.c
> +++ b/drivers/gpu/drm/i915/i915_sysfs.c
> @@ -118,9 +118,8 @@ i915_l3_read(struct file *filp, struct kobject *kobj,
>  	struct drm_minor *dminor = container_of(dev, struct drm_minor, kdev);
>  	struct drm_device *drm_dev = dminor->dev;
>  	struct drm_i915_private *dev_priv = drm_dev->dev_private;
> -	uint32_t misccpctl;
>  	int slice = (int)(uintptr_t)attr->private;
> -	int i, ret;
> +	int ret;
>  
>  	count = round_down(count, 4);
>  
> @@ -134,31 +133,16 @@ i915_l3_read(struct file *filp, struct kobject *kobj,
>  	if (ret)
>  		return ret;
>  
> -	if (IS_HASWELL(drm_dev)) {
> -		int last = min_t(int, GEN7_L3LOG_SIZE, count + offset);
> -		if ((!dev_priv->l3_parity.remap_info[slice]))
> -			memset(buf + offset, 0, last - offset);
> -		else
> -			memcpy(buf + offset,
> -			       dev_priv->l3_parity.remap_info[slice] + (offset/4),
> -			       last - offset);
> -
> -		i = last;
> -		goto out;
> -	}
> -
> -	misccpctl = I915_READ(GEN7_MISCCPCTL);
> -	I915_WRITE(GEN7_MISCCPCTL, misccpctl & ~GEN7_DOP_CLOCK_GATE_ENABLE);
> -
> -	for (i = 0; i < count; i += 4)
> -		*((uint32_t *)(&buf[i])) = I915_READ(GEN7_L3LOG_BASE + offset + i);
> -
> -	I915_WRITE(GEN7_MISCCPCTL, misccpctl);
> +	if ((!dev_priv->l3_parity.remap_info[slice]))
> +		memset(buf + offset, 0, count);
> +	else
> +		memcpy(buf + offset,
> +		       dev_priv->l3_parity.remap_info[slice] + (offset/4),
> +		       count);

Needs fixing after patch 4 is fixed.

>  
> -out:
>  	mutex_unlock(&drm_dev->struct_mutex);
>  
> -	return i;
> +	return count;
>  }
>  
>  static ssize_t
> @@ -170,6 +154,7 @@ i915_l3_write(struct file *filp, struct kobject *kobj,
>  	struct drm_minor *dminor = container_of(dev, struct drm_minor, kdev);
>  	struct drm_device *drm_dev = dminor->dev;
>  	struct drm_i915_private *dev_priv = drm_dev->dev_private;
> +	struct i915_hw_context *ctx;
>  	u32 *temp = NULL; /* Just here to make handling failures easy */
>  	int slice = (int)(uintptr_t)attr->private;
>  	int ret;
> @@ -206,8 +191,9 @@ i915_l3_write(struct file *filp, struct kobject *kobj,
>  
>  	memcpy(dev_priv->l3_parity.remap_info[slice] + (offset/4), buf, count);
>  
> -	if (i915_gem_l3_remap(&dev_priv->ring[RCS], slice))
> -		count = 0;
> +	/* NB: We defer the remapping until we switch to the context */
> +	list_for_each_entry(ctx, &dev_priv->context_list, link)
> +		ctx->remap_slice |= (1<<slice);

Should we force a context switch on the next batch if the current
context has remap_slice != 0?

Also what happens if hw_contexts_disabled==false?

>  
>  	mutex_unlock(&drm_dev->struct_mutex);
>  
> -- 
> 1.8.4
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Ville Syrjala Sept. 13, 2013, 9:20 a.m. UTC | #2
On Fri, Sep 13, 2013 at 12:17:17PM +0300, Ville Syrjälä wrote:
> On Thu, Sep 12, 2013 at 10:28:34PM -0700, Ben Widawsky wrote:
> > On both Ivybridge and Haswell, row remapping information is saved and
> > restored with context. This means, we never actually properly supported
> > the l3 remapping because our sysfs interface is asynchronous (and not
> > tied to any context), and the known faulty HW would be reused by the
> > next context to run.
> > 
> > Not that due to the asynchronous nature of the sysfs entry, there is no
> > point modifying the registers for the existing context. Instead we set a
> > flag for all contexts to load the correct remapping information on the
> > next run. Interested clients can use debugfs to determine whether or not
> > the row has been remapped.
> > 
> > One could propose at this point that we just do the remapping in the
> > kernel. I guess since we have to maintain the sysfs interface anyway,
> > I'm not sure how useful it is, and I do like keeping the policy in
> > userspace; (it wasn't my original decision to make the
> > interface the way it is, so I'm not attached).
> > 
> > Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
> > ---
> >  drivers/gpu/drm/i915/i915_debugfs.c     |  8 +++++++
> >  drivers/gpu/drm/i915/i915_drv.h         |  1 +
> >  drivers/gpu/drm/i915/i915_gem_context.c | 17 +++++++++++++--
> >  drivers/gpu/drm/i915/i915_sysfs.c       | 38 +++++++++++----------------------
> >  4 files changed, 36 insertions(+), 28 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> > index ada0950..80bed69 100644
> > --- a/drivers/gpu/drm/i915/i915_debugfs.c
> > +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> > @@ -145,6 +145,13 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj)
> >  		seq_printf(m, " (%s)", obj->ring->name);
> >  }
> >  
> > +static void describe_ctx(struct seq_file *m, struct i915_hw_context *ctx)
> > +{
> > +	seq_putc(m, ctx->is_initialized ? 'I' : 'i');
> > +	seq_putc(m, ctx->remap_slice ? 'R' : 'r');
> > +	seq_putc(m, ' ');
> > +}
> > +
> >  static int i915_gem_object_list_info(struct seq_file *m, void *data)
> >  {
> >  	struct drm_info_node *node = (struct drm_info_node *) m->private;
> > @@ -1463,6 +1470,7 @@ static int i915_context_status(struct seq_file *m, void *unused)
> >  
> >  	list_for_each_entry(ctx, &dev_priv->context_list, link) {
> >  		seq_puts(m, "HW context ");
> > +		describe_ctx(m, ctx);
> >  		for_each_ring(ring, dev_priv, i)
> >  			if (ring->default_context == ctx)
> >  				seq_printf(m, "(default context %s) ", ring->name);
> > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> > index 68f992b..6ba78cd 100644
> > --- a/drivers/gpu/drm/i915/i915_drv.h
> > +++ b/drivers/gpu/drm/i915/i915_drv.h
> > @@ -602,6 +602,7 @@ struct i915_hw_context {
> >  	struct kref ref;
> >  	int id;
> >  	bool is_initialized;
> > +	uint8_t remap_slice;
> >  	struct drm_i915_file_private *file_priv;
> >  	struct intel_ring_buffer *ring;
> >  	struct drm_i915_gem_object *obj;
> > diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
> > index 2bbdce8..72b7202 100644
> > --- a/drivers/gpu/drm/i915/i915_gem_context.c
> > +++ b/drivers/gpu/drm/i915/i915_gem_context.c
> > @@ -140,7 +140,7 @@ create_hw_context(struct drm_device *dev,
> >  {
> >  	struct drm_i915_private *dev_priv = dev->dev_private;
> >  	struct i915_hw_context *ctx;
> > -	int ret;
> > +	int ret, i;
> >  
> >  	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
> >  	if (ctx == NULL)
> > @@ -181,6 +181,8 @@ create_hw_context(struct drm_device *dev,
> >  
> >  	ctx->file_priv = file_priv;
> >  	ctx->id = ret;
> > +	for (i = 0; i < NUM_L3_SLICES(dev); i++)
> > +		ctx->remap_slice |= 1 << 1;
>                                          ^
> i
> 
> >  
> >  	return ctx;
> >  
> > @@ -396,7 +398,7 @@ static int do_switch(struct i915_hw_context *to)
> >  	struct intel_ring_buffer *ring = to->ring;
> >  	struct i915_hw_context *from = ring->last_context;
> >  	u32 hw_flags = 0;
> > -	int ret;
> > +	int ret, i;
> >  
> >  	BUG_ON(from != NULL && from->obj != NULL && from->obj->pin_count == 0);
> >  
> > @@ -432,6 +434,17 @@ static int do_switch(struct i915_hw_context *to)
> >  		return ret;
> >  	}
> >  
> > +	for (i = 0; i < MAX_L3_SLICES; i++) {
> > +		if (!(to->remap_slice & (1<<i)))
> > +			continue;
> > +
> > +		ret = i915_gem_l3_remap(ring, i);
> > +		if (!ret) {
> > +			to->remap_slice &= ~(1<<i);
> > +		}
> > +		/* If it failed, try again next round */
> > +	}
> > +
> >  	/* The backing object for the context is done after switching to the
> >  	 * *next* context. Therefore we cannot retire the previous context until
> >  	 * the next context has already started running. In fact, the below code
> > diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c
> > index 65a7274..f0d7e1c 100644
> > --- a/drivers/gpu/drm/i915/i915_sysfs.c
> > +++ b/drivers/gpu/drm/i915/i915_sysfs.c
> > @@ -118,9 +118,8 @@ i915_l3_read(struct file *filp, struct kobject *kobj,
> >  	struct drm_minor *dminor = container_of(dev, struct drm_minor, kdev);
> >  	struct drm_device *drm_dev = dminor->dev;
> >  	struct drm_i915_private *dev_priv = drm_dev->dev_private;
> > -	uint32_t misccpctl;
> >  	int slice = (int)(uintptr_t)attr->private;
> > -	int i, ret;
> > +	int ret;
> >  
> >  	count = round_down(count, 4);
> >  
> > @@ -134,31 +133,16 @@ i915_l3_read(struct file *filp, struct kobject *kobj,
> >  	if (ret)
> >  		return ret;
> >  
> > -	if (IS_HASWELL(drm_dev)) {
> > -		int last = min_t(int, GEN7_L3LOG_SIZE, count + offset);
> > -		if ((!dev_priv->l3_parity.remap_info[slice]))
> > -			memset(buf + offset, 0, last - offset);
> > -		else
> > -			memcpy(buf + offset,
> > -			       dev_priv->l3_parity.remap_info[slice] + (offset/4),
> > -			       last - offset);
> > -
> > -		i = last;
> > -		goto out;
> > -	}
> > -
> > -	misccpctl = I915_READ(GEN7_MISCCPCTL);
> > -	I915_WRITE(GEN7_MISCCPCTL, misccpctl & ~GEN7_DOP_CLOCK_GATE_ENABLE);
> > -
> > -	for (i = 0; i < count; i += 4)
> > -		*((uint32_t *)(&buf[i])) = I915_READ(GEN7_L3LOG_BASE + offset + i);
> > -
> > -	I915_WRITE(GEN7_MISCCPCTL, misccpctl);
> > +	if ((!dev_priv->l3_parity.remap_info[slice]))
> > +		memset(buf + offset, 0, count);
> > +	else
> > +		memcpy(buf + offset,
> > +		       dev_priv->l3_parity.remap_info[slice] + (offset/4),
> > +		       count);
> 
> Needs fixing after patch 4 is fixed.
> 
> >  
> > -out:
> >  	mutex_unlock(&drm_dev->struct_mutex);
> >  
> > -	return i;
> > +	return count;
> >  }
> >  
> >  static ssize_t
> > @@ -170,6 +154,7 @@ i915_l3_write(struct file *filp, struct kobject *kobj,
> >  	struct drm_minor *dminor = container_of(dev, struct drm_minor, kdev);
> >  	struct drm_device *drm_dev = dminor->dev;
> >  	struct drm_i915_private *dev_priv = drm_dev->dev_private;
> > +	struct i915_hw_context *ctx;
> >  	u32 *temp = NULL; /* Just here to make handling failures easy */
> >  	int slice = (int)(uintptr_t)attr->private;
> >  	int ret;
> > @@ -206,8 +191,9 @@ i915_l3_write(struct file *filp, struct kobject *kobj,
> >  
> >  	memcpy(dev_priv->l3_parity.remap_info[slice] + (offset/4), buf, count);
> >  
> > -	if (i915_gem_l3_remap(&dev_priv->ring[RCS], slice))
> > -		count = 0;
> > +	/* NB: We defer the remapping until we switch to the context */
> > +	list_for_each_entry(ctx, &dev_priv->context_list, link)
> > +		ctx->remap_slice |= (1<<slice);
> 
> Should we force a context switch on the next batch if the current
> context has remap_slice != 0?
> 
> Also what happens if hw_contexts_disabled==false?

Argh. I mean true of course. Brain has regressed and no longer
understands a single "disabled" boolean.

> 
> >  
> >  	mutex_unlock(&drm_dev->struct_mutex);
> >  
> > -- 
> > 1.8.4
> > 
> > _______________________________________________
> > Intel-gfx mailing list
> > Intel-gfx@lists.freedesktop.org
> > http://lists.freedesktop.org/mailman/listinfo/intel-gfx
> 
> -- 
> Ville Syrjälä
> Intel OTC
Ben Widawsky Sept. 17, 2013, 8:42 p.m. UTC | #3
On Fri, Sep 13, 2013 at 12:17:17PM +0300, Ville Syrjälä wrote:
> On Thu, Sep 12, 2013 at 10:28:34PM -0700, Ben Widawsky wrote:
> > On both Ivybridge and Haswell, row remapping information is saved and
> > restored with context. This means, we never actually properly supported
> > the l3 remapping because our sysfs interface is asynchronous (and not
> > tied to any context), and the known faulty HW would be reused by the
> > next context to run.
> > 
> > Not that due to the asynchronous nature of the sysfs entry, there is no
> > point modifying the registers for the existing context. Instead we set a
> > flag for all contexts to load the correct remapping information on the
> > next run. Interested clients can use debugfs to determine whether or not
> > the row has been remapped.
> > 
> > One could propose at this point that we just do the remapping in the
> > kernel. I guess since we have to maintain the sysfs interface anyway,
> > I'm not sure how useful it is, and I do like keeping the policy in
> > userspace; (it wasn't my original decision to make the
> > interface the way it is, so I'm not attached).
> > 
> > Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
> > ---
> >  drivers/gpu/drm/i915/i915_debugfs.c     |  8 +++++++
> >  drivers/gpu/drm/i915/i915_drv.h         |  1 +
> >  drivers/gpu/drm/i915/i915_gem_context.c | 17 +++++++++++++--
> >  drivers/gpu/drm/i915/i915_sysfs.c       | 38 +++++++++++----------------------
> >  4 files changed, 36 insertions(+), 28 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> > index ada0950..80bed69 100644
> > --- a/drivers/gpu/drm/i915/i915_debugfs.c
> > +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> > @@ -145,6 +145,13 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj)
> >  		seq_printf(m, " (%s)", obj->ring->name);
> >  }
> >  
> > +static void describe_ctx(struct seq_file *m, struct i915_hw_context *ctx)
> > +{
> > +	seq_putc(m, ctx->is_initialized ? 'I' : 'i');
> > +	seq_putc(m, ctx->remap_slice ? 'R' : 'r');
> > +	seq_putc(m, ' ');
> > +}
> > +
> >  static int i915_gem_object_list_info(struct seq_file *m, void *data)
> >  {
> >  	struct drm_info_node *node = (struct drm_info_node *) m->private;
> > @@ -1463,6 +1470,7 @@ static int i915_context_status(struct seq_file *m, void *unused)
> >  
> >  	list_for_each_entry(ctx, &dev_priv->context_list, link) {
> >  		seq_puts(m, "HW context ");
> > +		describe_ctx(m, ctx);
> >  		for_each_ring(ring, dev_priv, i)
> >  			if (ring->default_context == ctx)
> >  				seq_printf(m, "(default context %s) ", ring->name);
> > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> > index 68f992b..6ba78cd 100644
> > --- a/drivers/gpu/drm/i915/i915_drv.h
> > +++ b/drivers/gpu/drm/i915/i915_drv.h
> > @@ -602,6 +602,7 @@ struct i915_hw_context {
> >  	struct kref ref;
> >  	int id;
> >  	bool is_initialized;
> > +	uint8_t remap_slice;
> >  	struct drm_i915_file_private *file_priv;
> >  	struct intel_ring_buffer *ring;
> >  	struct drm_i915_gem_object *obj;
> > diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
> > index 2bbdce8..72b7202 100644
> > --- a/drivers/gpu/drm/i915/i915_gem_context.c
> > +++ b/drivers/gpu/drm/i915/i915_gem_context.c
> > @@ -140,7 +140,7 @@ create_hw_context(struct drm_device *dev,
> >  {
> >  	struct drm_i915_private *dev_priv = dev->dev_private;
> >  	struct i915_hw_context *ctx;
> > -	int ret;
> > +	int ret, i;
> >  
> >  	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
> >  	if (ctx == NULL)
> > @@ -181,6 +181,8 @@ create_hw_context(struct drm_device *dev,
> >  
> >  	ctx->file_priv = file_priv;
> >  	ctx->id = ret;
> > +	for (i = 0; i < NUM_L3_SLICES(dev); i++)
> > +		ctx->remap_slice |= 1 << 1;
>                                          ^
> i
> 
> >  
> >  	return ctx;
> >  
> > @@ -396,7 +398,7 @@ static int do_switch(struct i915_hw_context *to)
> >  	struct intel_ring_buffer *ring = to->ring;
> >  	struct i915_hw_context *from = ring->last_context;
> >  	u32 hw_flags = 0;
> > -	int ret;
> > +	int ret, i;
> >  
> >  	BUG_ON(from != NULL && from->obj != NULL && from->obj->pin_count == 0);
> >  
> > @@ -432,6 +434,17 @@ static int do_switch(struct i915_hw_context *to)
> >  		return ret;
> >  	}
> >  
> > +	for (i = 0; i < MAX_L3_SLICES; i++) {
> > +		if (!(to->remap_slice & (1<<i)))
> > +			continue;
> > +
> > +		ret = i915_gem_l3_remap(ring, i);
> > +		if (!ret) {
> > +			to->remap_slice &= ~(1<<i);
> > +		}
> > +		/* If it failed, try again next round */
> > +	}
> > +
> >  	/* The backing object for the context is done after switching to the
> >  	 * *next* context. Therefore we cannot retire the previous context until
> >  	 * the next context has already started running. In fact, the below code
> > diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c
> > index 65a7274..f0d7e1c 100644
> > --- a/drivers/gpu/drm/i915/i915_sysfs.c
> > +++ b/drivers/gpu/drm/i915/i915_sysfs.c
> > @@ -118,9 +118,8 @@ i915_l3_read(struct file *filp, struct kobject *kobj,
> >  	struct drm_minor *dminor = container_of(dev, struct drm_minor, kdev);
> >  	struct drm_device *drm_dev = dminor->dev;
> >  	struct drm_i915_private *dev_priv = drm_dev->dev_private;
> > -	uint32_t misccpctl;
> >  	int slice = (int)(uintptr_t)attr->private;
> > -	int i, ret;
> > +	int ret;
> >  
> >  	count = round_down(count, 4);
> >  
> > @@ -134,31 +133,16 @@ i915_l3_read(struct file *filp, struct kobject *kobj,
> >  	if (ret)
> >  		return ret;
> >  
> > -	if (IS_HASWELL(drm_dev)) {
> > -		int last = min_t(int, GEN7_L3LOG_SIZE, count + offset);
> > -		if ((!dev_priv->l3_parity.remap_info[slice]))
> > -			memset(buf + offset, 0, last - offset);
> > -		else
> > -			memcpy(buf + offset,
> > -			       dev_priv->l3_parity.remap_info[slice] + (offset/4),
> > -			       last - offset);
> > -
> > -		i = last;
> > -		goto out;
> > -	}
> > -
> > -	misccpctl = I915_READ(GEN7_MISCCPCTL);
> > -	I915_WRITE(GEN7_MISCCPCTL, misccpctl & ~GEN7_DOP_CLOCK_GATE_ENABLE);
> > -
> > -	for (i = 0; i < count; i += 4)
> > -		*((uint32_t *)(&buf[i])) = I915_READ(GEN7_L3LOG_BASE + offset + i);
> > -
> > -	I915_WRITE(GEN7_MISCCPCTL, misccpctl);
> > +	if ((!dev_priv->l3_parity.remap_info[slice]))
> > +		memset(buf + offset, 0, count);
> > +	else
> > +		memcpy(buf + offset,
> > +		       dev_priv->l3_parity.remap_info[slice] + (offset/4),
> > +		       count);
> 
> Needs fixing after patch 4 is fixed.
> 
> >  
> > -out:
> >  	mutex_unlock(&drm_dev->struct_mutex);
> >  
> > -	return i;
> > +	return count;
> >  }
> >  
> >  static ssize_t
> > @@ -170,6 +154,7 @@ i915_l3_write(struct file *filp, struct kobject *kobj,
> >  	struct drm_minor *dminor = container_of(dev, struct drm_minor, kdev);
> >  	struct drm_device *drm_dev = dminor->dev;
> >  	struct drm_i915_private *dev_priv = drm_dev->dev_private;
> > +	struct i915_hw_context *ctx;
> >  	u32 *temp = NULL; /* Just here to make handling failures easy */
> >  	int slice = (int)(uintptr_t)attr->private;
> >  	int ret;
> > @@ -206,8 +191,9 @@ i915_l3_write(struct file *filp, struct kobject *kobj,
> >  
> >  	memcpy(dev_priv->l3_parity.remap_info[slice] + (offset/4), buf, count);
> >  
> > -	if (i915_gem_l3_remap(&dev_priv->ring[RCS], slice))
> > -		count = 0;
> > +	/* NB: We defer the remapping until we switch to the context */
> > +	list_for_each_entry(ctx, &dev_priv->context_list, link)
> > +		ctx->remap_slice |= (1<<slice);
> 
> Should we force a context switch on the next batch if the current
> context has remap_slice != 0?
> 

I think that's a good idea as long as you're willing to review the added
complexity.

> Also what happens if hw_contexts_disabled==false?
> 

Well if I had gotten my way and we'd merged full PPGTT already,
hw_contexts couldn't be disabled; but alas it is a valid concern.

I think punting is the right thing to do here. Mesa won't run if
contexts are disabled. However an error message, and return value would
be appropriate. What do you think?
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index ada0950..80bed69 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -145,6 +145,13 @@  describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj)
 		seq_printf(m, " (%s)", obj->ring->name);
 }
 
+static void describe_ctx(struct seq_file *m, struct i915_hw_context *ctx)
+{
+	seq_putc(m, ctx->is_initialized ? 'I' : 'i');
+	seq_putc(m, ctx->remap_slice ? 'R' : 'r');
+	seq_putc(m, ' ');
+}
+
 static int i915_gem_object_list_info(struct seq_file *m, void *data)
 {
 	struct drm_info_node *node = (struct drm_info_node *) m->private;
@@ -1463,6 +1470,7 @@  static int i915_context_status(struct seq_file *m, void *unused)
 
 	list_for_each_entry(ctx, &dev_priv->context_list, link) {
 		seq_puts(m, "HW context ");
+		describe_ctx(m, ctx);
 		for_each_ring(ring, dev_priv, i)
 			if (ring->default_context == ctx)
 				seq_printf(m, "(default context %s) ", ring->name);
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 68f992b..6ba78cd 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -602,6 +602,7 @@  struct i915_hw_context {
 	struct kref ref;
 	int id;
 	bool is_initialized;
+	uint8_t remap_slice;
 	struct drm_i915_file_private *file_priv;
 	struct intel_ring_buffer *ring;
 	struct drm_i915_gem_object *obj;
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 2bbdce8..72b7202 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -140,7 +140,7 @@  create_hw_context(struct drm_device *dev,
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct i915_hw_context *ctx;
-	int ret;
+	int ret, i;
 
 	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
 	if (ctx == NULL)
@@ -181,6 +181,8 @@  create_hw_context(struct drm_device *dev,
 
 	ctx->file_priv = file_priv;
 	ctx->id = ret;
+	for (i = 0; i < NUM_L3_SLICES(dev); i++)
+		ctx->remap_slice |= 1 << 1;
 
 	return ctx;
 
@@ -396,7 +398,7 @@  static int do_switch(struct i915_hw_context *to)
 	struct intel_ring_buffer *ring = to->ring;
 	struct i915_hw_context *from = ring->last_context;
 	u32 hw_flags = 0;
-	int ret;
+	int ret, i;
 
 	BUG_ON(from != NULL && from->obj != NULL && from->obj->pin_count == 0);
 
@@ -432,6 +434,17 @@  static int do_switch(struct i915_hw_context *to)
 		return ret;
 	}
 
+	for (i = 0; i < MAX_L3_SLICES; i++) {
+		if (!(to->remap_slice & (1<<i)))
+			continue;
+
+		ret = i915_gem_l3_remap(ring, i);
+		if (!ret) {
+			to->remap_slice &= ~(1<<i);
+		}
+		/* If it failed, try again next round */
+	}
+
 	/* The backing object for the context is done after switching to the
 	 * *next* context. Therefore we cannot retire the previous context until
 	 * the next context has already started running. In fact, the below code
diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c
index 65a7274..f0d7e1c 100644
--- a/drivers/gpu/drm/i915/i915_sysfs.c
+++ b/drivers/gpu/drm/i915/i915_sysfs.c
@@ -118,9 +118,8 @@  i915_l3_read(struct file *filp, struct kobject *kobj,
 	struct drm_minor *dminor = container_of(dev, struct drm_minor, kdev);
 	struct drm_device *drm_dev = dminor->dev;
 	struct drm_i915_private *dev_priv = drm_dev->dev_private;
-	uint32_t misccpctl;
 	int slice = (int)(uintptr_t)attr->private;
-	int i, ret;
+	int ret;
 
 	count = round_down(count, 4);
 
@@ -134,31 +133,16 @@  i915_l3_read(struct file *filp, struct kobject *kobj,
 	if (ret)
 		return ret;
 
-	if (IS_HASWELL(drm_dev)) {
-		int last = min_t(int, GEN7_L3LOG_SIZE, count + offset);
-		if ((!dev_priv->l3_parity.remap_info[slice]))
-			memset(buf + offset, 0, last - offset);
-		else
-			memcpy(buf + offset,
-			       dev_priv->l3_parity.remap_info[slice] + (offset/4),
-			       last - offset);
-
-		i = last;
-		goto out;
-	}
-
-	misccpctl = I915_READ(GEN7_MISCCPCTL);
-	I915_WRITE(GEN7_MISCCPCTL, misccpctl & ~GEN7_DOP_CLOCK_GATE_ENABLE);
-
-	for (i = 0; i < count; i += 4)
-		*((uint32_t *)(&buf[i])) = I915_READ(GEN7_L3LOG_BASE + offset + i);
-
-	I915_WRITE(GEN7_MISCCPCTL, misccpctl);
+	if ((!dev_priv->l3_parity.remap_info[slice]))
+		memset(buf + offset, 0, count);
+	else
+		memcpy(buf + offset,
+		       dev_priv->l3_parity.remap_info[slice] + (offset/4),
+		       count);
 
-out:
 	mutex_unlock(&drm_dev->struct_mutex);
 
-	return i;
+	return count;
 }
 
 static ssize_t
@@ -170,6 +154,7 @@  i915_l3_write(struct file *filp, struct kobject *kobj,
 	struct drm_minor *dminor = container_of(dev, struct drm_minor, kdev);
 	struct drm_device *drm_dev = dminor->dev;
 	struct drm_i915_private *dev_priv = drm_dev->dev_private;
+	struct i915_hw_context *ctx;
 	u32 *temp = NULL; /* Just here to make handling failures easy */
 	int slice = (int)(uintptr_t)attr->private;
 	int ret;
@@ -206,8 +191,9 @@  i915_l3_write(struct file *filp, struct kobject *kobj,
 
 	memcpy(dev_priv->l3_parity.remap_info[slice] + (offset/4), buf, count);
 
-	if (i915_gem_l3_remap(&dev_priv->ring[RCS], slice))
-		count = 0;
+	/* NB: We defer the remapping until we switch to the context */
+	list_for_each_entry(ctx, &dev_priv->context_list, link)
+		ctx->remap_slice |= (1<<slice);
 
 	mutex_unlock(&drm_dev->struct_mutex);