diff mbox

drm/i915: Debugfs to disable context banning

Message ID 20171017152533.1865-1-jeff.mcgee@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

jeff.mcgee@intel.com Oct. 17, 2017, 3:25 p.m. UTC
From: Jeff McGee <jeff.mcgee@intel.com>

Useful for stress testing various reset scenarios. The ioctl that we
have for specific client/context banning disable is difficult to utilize
outside of unit testing.

Signed-off-by: Jeff McGee <jeff.mcgee@intel.com>
---
 drivers/gpu/drm/i915/i915_debugfs.c | 25 +++++++++++++++++++++++++
 drivers/gpu/drm/i915/i915_drv.h     |  2 ++
 drivers/gpu/drm/i915/i915_gem.c     |  3 ++-
 3 files changed, 29 insertions(+), 1 deletion(-)

Comments

Daniel Vetter Oct. 17, 2017, 3:36 p.m. UTC | #1
On Tue, Oct 17, 2017 at 08:25:33AM -0700, jeff.mcgee@intel.com wrote:
> From: Jeff McGee <jeff.mcgee@intel.com>
> 
> Useful for stress testing various reset scenarios. The ioctl that we
> have for specific client/context banning disable is difficult to utilize
> outside of unit testing.

Do we have these stress tests available somewhere?
-Daniel

> 
> Signed-off-by: Jeff McGee <jeff.mcgee@intel.com>
> ---
>  drivers/gpu/drm/i915/i915_debugfs.c | 25 +++++++++++++++++++++++++
>  drivers/gpu/drm/i915/i915_drv.h     |  2 ++
>  drivers/gpu/drm/i915/i915_gem.c     |  3 ++-
>  3 files changed, 29 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index 0bb6e01121fc..17e6b388b80c 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -4408,6 +4408,30 @@ DEFINE_SIMPLE_ATTRIBUTE(i915_min_freq_fops,
>  			i915_min_freq_get, i915_min_freq_set,
>  			"%llu\n");
>  
> +static int i915_banning_disable_get(void *data, u64 *val)
> +{
> +	struct drm_device *dev = data;
> +	struct drm_i915_private *dev_priv = dev->dev_private;
> +
> +	*val = dev_priv->gpu_error.banning_disabled;
> +
> +	return 0;
> +}
> +
> +static int i915_banning_disable_set(void *data, u64 val)
> +{
> +	struct drm_device *dev = data;
> +	struct drm_i915_private *dev_priv = dev->dev_private;
> +
> +	dev_priv->gpu_error.banning_disabled = (bool)val;
> +
> +	return 0;
> +}
> +
> +DEFINE_SIMPLE_ATTRIBUTE(i915_banning_disable_fops,
> +			i915_banning_disable_get, i915_banning_disable_set,
> +			"%llu\n");
> +
>  static int
>  i915_cache_sharing_get(void *data, u64 *val)
>  {
> @@ -4829,6 +4853,7 @@ static const struct i915_debugfs_files {
>  	{"i915_guc_log_control", &i915_guc_log_control_fops},
>  	{"i915_hpd_storm_ctl", &i915_hpd_storm_ctl_fops},
>  	{"i915_ipc_status", &i915_ipc_status_fops}
> +	{"i915_banning_disable", &i915_banning_disable_fops},
>  };
>  
>  int i915_debugfs_register(struct drm_i915_private *dev_priv)
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index c7b2ca6aff05..da65b6f8cc6f 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -1667,6 +1667,8 @@ struct i915_gpu_error {
>  
>  	/* For missed irq/seqno simulation. */
>  	unsigned long test_irq_rings;
> +
> +	bool banning_disabled;
>  };
>  
>  enum modeset_restore {
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 20fcac37c85a..e0cb53cafc66 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -2770,7 +2770,8 @@ i915_gem_object_pwrite_gtt(struct drm_i915_gem_object *obj,
>  static bool ban_context(const struct i915_gem_context *ctx,
>  			unsigned int score)
>  {
> -	return (i915_gem_context_is_bannable(ctx) &&
> +	return (!ctx->i915->gpu_error.banning_disabled &&
> +		i915_gem_context_is_bannable(ctx) &&
>  		score >= CONTEXT_SCORE_BAN_THRESHOLD);
>  }
>  
> -- 
> 2.14.2
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
jeff.mcgee@intel.com Oct. 17, 2017, 4:23 p.m. UTC | #2
On Tue, Oct 17, 2017 at 05:36:54PM +0200, Daniel Vetter wrote:
> On Tue, Oct 17, 2017 at 08:25:33AM -0700, jeff.mcgee@intel.com wrote:
> > From: Jeff McGee <jeff.mcgee@intel.com>
> > 
> > Useful for stress testing various reset scenarios. The ioctl that we
> > have for specific client/context banning disable is difficult to utilize
> > outside of unit testing.
> 
> Do we have these stress tests available somewhere?
> -Daniel
> 
Potentially any test scenario that relies upon app/umd instead of IGT to
inject batches that need to be reset would benefit from the ability to
disable context banning. We are using such scenarios to validate for the
Yocto APL project, specifically for a feature where preemption of non-
preemptible batches is forced using reset. It it not needed for IGT-based
reset stress tests.
-Jeff

> > 
> > Signed-off-by: Jeff McGee <jeff.mcgee@intel.com>
> > ---
> >  drivers/gpu/drm/i915/i915_debugfs.c | 25 +++++++++++++++++++++++++
> >  drivers/gpu/drm/i915/i915_drv.h     |  2 ++
> >  drivers/gpu/drm/i915/i915_gem.c     |  3 ++-
> >  3 files changed, 29 insertions(+), 1 deletion(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> > index 0bb6e01121fc..17e6b388b80c 100644
> > --- a/drivers/gpu/drm/i915/i915_debugfs.c
> > +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> > @@ -4408,6 +4408,30 @@ DEFINE_SIMPLE_ATTRIBUTE(i915_min_freq_fops,
> >  			i915_min_freq_get, i915_min_freq_set,
> >  			"%llu\n");
> >  
> > +static int i915_banning_disable_get(void *data, u64 *val)
> > +{
> > +	struct drm_device *dev = data;
> > +	struct drm_i915_private *dev_priv = dev->dev_private;
> > +
> > +	*val = dev_priv->gpu_error.banning_disabled;
> > +
> > +	return 0;
> > +}
> > +
> > +static int i915_banning_disable_set(void *data, u64 val)
> > +{
> > +	struct drm_device *dev = data;
> > +	struct drm_i915_private *dev_priv = dev->dev_private;
> > +
> > +	dev_priv->gpu_error.banning_disabled = (bool)val;
> > +
> > +	return 0;
> > +}
> > +
> > +DEFINE_SIMPLE_ATTRIBUTE(i915_banning_disable_fops,
> > +			i915_banning_disable_get, i915_banning_disable_set,
> > +			"%llu\n");
> > +
> >  static int
> >  i915_cache_sharing_get(void *data, u64 *val)
> >  {
> > @@ -4829,6 +4853,7 @@ static const struct i915_debugfs_files {
> >  	{"i915_guc_log_control", &i915_guc_log_control_fops},
> >  	{"i915_hpd_storm_ctl", &i915_hpd_storm_ctl_fops},
> >  	{"i915_ipc_status", &i915_ipc_status_fops}
> > +	{"i915_banning_disable", &i915_banning_disable_fops},
> >  };
> >  
> >  int i915_debugfs_register(struct drm_i915_private *dev_priv)
> > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> > index c7b2ca6aff05..da65b6f8cc6f 100644
> > --- a/drivers/gpu/drm/i915/i915_drv.h
> > +++ b/drivers/gpu/drm/i915/i915_drv.h
> > @@ -1667,6 +1667,8 @@ struct i915_gpu_error {
> >  
> >  	/* For missed irq/seqno simulation. */
> >  	unsigned long test_irq_rings;
> > +
> > +	bool banning_disabled;
> >  };
> >  
> >  enum modeset_restore {
> > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> > index 20fcac37c85a..e0cb53cafc66 100644
> > --- a/drivers/gpu/drm/i915/i915_gem.c
> > +++ b/drivers/gpu/drm/i915/i915_gem.c
> > @@ -2770,7 +2770,8 @@ i915_gem_object_pwrite_gtt(struct drm_i915_gem_object *obj,
> >  static bool ban_context(const struct i915_gem_context *ctx,
> >  			unsigned int score)
> >  {
> > -	return (i915_gem_context_is_bannable(ctx) &&
> > +	return (!ctx->i915->gpu_error.banning_disabled &&
> > +		i915_gem_context_is_bannable(ctx) &&
> >  		score >= CONTEXT_SCORE_BAN_THRESHOLD);
> >  }
> >  
> > -- 
> > 2.14.2
> > 
> > _______________________________________________
> > Intel-gfx mailing list
> > Intel-gfx@lists.freedesktop.org
> > https://lists.freedesktop.org/mailman/listinfo/intel-gfx
> 
> -- 
> Daniel Vetter
> Software Engineer, Intel Corporation
> http://blog.ffwll.ch
Daniel Vetter Oct. 18, 2017, 9:01 a.m. UTC | #3
On Tue, Oct 17, 2017 at 09:23:43AM -0700, Jeff McGee wrote:
> On Tue, Oct 17, 2017 at 05:36:54PM +0200, Daniel Vetter wrote:
> > On Tue, Oct 17, 2017 at 08:25:33AM -0700, jeff.mcgee@intel.com wrote:
> > > From: Jeff McGee <jeff.mcgee@intel.com>
> > > 
> > > Useful for stress testing various reset scenarios. The ioctl that we
> > > have for specific client/context banning disable is difficult to utilize
> > > outside of unit testing.
> > 
> > Do we have these stress tests available somewhere?
> > -Daniel
> > 
> Potentially any test scenario that relies upon app/umd instead of IGT to
> inject batches that need to be reset would benefit from the ability to
> disable context banning. We are using such scenarios to validate for the
> Yocto APL project, specifically for a feature where preemption of non-
> preemptible batches is forced using reset. It it not needed for IGT-based
> reset stress tests.

Is that test suite public somewhere?
-Daniel
jeff.mcgee@intel.com Oct. 18, 2017, 2:22 p.m. UTC | #4
On Wed, Oct 18, 2017 at 11:01:13AM +0200, Daniel Vetter wrote:
> On Tue, Oct 17, 2017 at 09:23:43AM -0700, Jeff McGee wrote:
> > On Tue, Oct 17, 2017 at 05:36:54PM +0200, Daniel Vetter wrote:
> > > On Tue, Oct 17, 2017 at 08:25:33AM -0700, jeff.mcgee@intel.com wrote:
> > > > From: Jeff McGee <jeff.mcgee@intel.com>
> > > > 
> > > > Useful for stress testing various reset scenarios. The ioctl that we
> > > > have for specific client/context banning disable is difficult to utilize
> > > > outside of unit testing.
> > > 
> > > Do we have these stress tests available somewhere?
> > > -Daniel
> > > 
> > Potentially any test scenario that relies upon app/umd instead of IGT to
> > inject batches that need to be reset would benefit from the ability to
> > disable context banning. We are using such scenarios to validate for the
> > Yocto APL project, specifically for a feature where preemption of non-
> > preemptible batches is forced using reset. It it not needed for IGT-based
> > reset stress tests.
> 
> Is that test suite public somewhere?
> -Daniel

No, but it could be. I don't own it. Anyway, I'll reintroduce this patch
when I have a better case to make for it. It was worth a try ;)
-Jeff
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 0bb6e01121fc..17e6b388b80c 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -4408,6 +4408,30 @@  DEFINE_SIMPLE_ATTRIBUTE(i915_min_freq_fops,
 			i915_min_freq_get, i915_min_freq_set,
 			"%llu\n");
 
+static int i915_banning_disable_get(void *data, u64 *val)
+{
+	struct drm_device *dev = data;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+
+	*val = dev_priv->gpu_error.banning_disabled;
+
+	return 0;
+}
+
+static int i915_banning_disable_set(void *data, u64 val)
+{
+	struct drm_device *dev = data;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+
+	dev_priv->gpu_error.banning_disabled = (bool)val;
+
+	return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(i915_banning_disable_fops,
+			i915_banning_disable_get, i915_banning_disable_set,
+			"%llu\n");
+
 static int
 i915_cache_sharing_get(void *data, u64 *val)
 {
@@ -4829,6 +4853,7 @@  static const struct i915_debugfs_files {
 	{"i915_guc_log_control", &i915_guc_log_control_fops},
 	{"i915_hpd_storm_ctl", &i915_hpd_storm_ctl_fops},
 	{"i915_ipc_status", &i915_ipc_status_fops}
+	{"i915_banning_disable", &i915_banning_disable_fops},
 };
 
 int i915_debugfs_register(struct drm_i915_private *dev_priv)
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index c7b2ca6aff05..da65b6f8cc6f 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1667,6 +1667,8 @@  struct i915_gpu_error {
 
 	/* For missed irq/seqno simulation. */
 	unsigned long test_irq_rings;
+
+	bool banning_disabled;
 };
 
 enum modeset_restore {
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 20fcac37c85a..e0cb53cafc66 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2770,7 +2770,8 @@  i915_gem_object_pwrite_gtt(struct drm_i915_gem_object *obj,
 static bool ban_context(const struct i915_gem_context *ctx,
 			unsigned int score)
 {
-	return (i915_gem_context_is_bannable(ctx) &&
+	return (!ctx->i915->gpu_error.banning_disabled &&
+		i915_gem_context_is_bannable(ctx) &&
 		score >= CONTEXT_SCORE_BAN_THRESHOLD);
 }