diff mbox

[v5] drm/i915: Tune down init error message due to failure injection

Message ID 1458283837-11936-1-git-send-email-imre.deak@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Imre Deak March 18, 2016, 6:50 a.m. UTC
Atm, in case failure injection forces an error the subsequent "*ERROR*
failed to init modeset" error message will make automated tests (CI)
report this event as a breakage even though the event is expected. To
fix this print the error message with debug log level in this case.

While at it print the error message for any init failure and change it
to
"""
Device initialization failed (errno)
Please file a bug at https://bugs.freedesktop.org/enter_bug.cgi?product=DRI
against DRM/Intel providing the dmesg log by booting with drm.debug=0xf
"""
and export a helper printing error messages using this same format.
A follow-up patch will convert all uses of DRM_ERROR reporting a user
facing problem to use this new helper instead.

v2:
- Include the problematic error message in the commit log, add a
  request to file an fdo bug to the message (Chris)
v3:
- Include the new error message too in the commit log, make the
  fdo link more precise and print part of the message with info log
  level (Chris)
v4: (Chris)
- Use dev_printk instead of DRM_ERROR/INFO and use NOTICE instead of
  INFO loglevel
- Export a helper for printing user facing error messages
v5:
- Keep the DRM_ERROR message prefix used by piglit-igt/CI to filter
  relevant dmesg lines
- Use dev_notice(), instead of dev_printk(KERN_NOTICE,...)

CC: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Imre Deak <imre.deak@intel.com>
---
 drivers/gpu/drm/i915/i915_dma.c | 48 ++++++++++++++++++++++++++++++++++++-----
 drivers/gpu/drm/i915/i915_drv.h |  7 ++++++
 2 files changed, 50 insertions(+), 5 deletions(-)

Comments

Chris Wilson March 18, 2016, 7:58 a.m. UTC | #1
On Fri, Mar 18, 2016 at 08:50:37AM +0200, Imre Deak wrote:
> Atm, in case failure injection forces an error the subsequent "*ERROR*
> failed to init modeset" error message will make automated tests (CI)
> report this event as a breakage even though the event is expected. To
> fix this print the error message with debug log level in this case.
> 
> While at it print the error message for any init failure and change it
> to
> """
> Device initialization failed (errno)
> Please file a bug at https://bugs.freedesktop.org/enter_bug.cgi?product=DRI
> against DRM/Intel providing the dmesg log by booting with drm.debug=0xf
> """
> and export a helper printing error messages using this same format.
> A follow-up patch will convert all uses of DRM_ERROR reporting a user
> facing problem to use this new helper instead.
> 
> v2:
> - Include the problematic error message in the commit log, add a
>   request to file an fdo bug to the message (Chris)
> v3:
> - Include the new error message too in the commit log, make the
>   fdo link more precise and print part of the message with info log
>   level (Chris)
> v4: (Chris)
> - Use dev_printk instead of DRM_ERROR/INFO and use NOTICE instead of
>   INFO loglevel
> - Export a helper for printing user facing error messages
> v5:
> - Keep the DRM_ERROR message prefix used by piglit-igt/CI to filter
>   relevant dmesg lines
> - Use dev_notice(), instead of dev_printk(KERN_NOTICE,...)
> 
> CC: Chris Wilson <chris@chris-wilson.co.uk>
> Signed-off-by: Imre Deak <imre.deak@intel.com>
> ---
>  drivers/gpu/drm/i915/i915_dma.c | 48 ++++++++++++++++++++++++++++++++++++-----
>  drivers/gpu/drm/i915/i915_drv.h |  7 ++++++
>  2 files changed, 50 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
> index 68592b0..096dc35 100644
> --- a/drivers/gpu/drm/i915/i915_dma.c
> +++ b/drivers/gpu/drm/i915/i915_dma.c
> @@ -66,6 +66,44 @@ bool __i915_inject_load_failure(const char *func, int line)
>  	return false;
>  }
>  
> +#define FDO_BUG_URL "https://bugs.freedesktop.org/enter_bug.cgi?product=DRI"
> +#define FDO_BUG_MSG "Please file a bug at " FDO_BUG_URL " against DRM/Intel " \
> +		    "providing the dmesg log by booting with drm.debug=0xf"
> +
> +void
> +__i915_printk(struct drm_i915_private *dev_priv, const char *level,
> +	      const char *fmt, ...)
> +{

static int shown_bug_once;

> +	struct device *dev = dev_priv->dev->dev;
> +	bool is_error = level[1] <= KERN_ERR[1];
> +	struct va_format vaf;
> +	va_list args;
> +
> +	va_start(args, fmt);
> +
> +	vaf.fmt = fmt;
> +	vaf.va = &args;
> +
> +	dev_printk(level, dev, "[" DRM_NAME ":%ps] %pV",
> +		   __builtin_return_address(0), &vaf);

Also on my wishlist is to
#undef DRM_NAME
#define DRM_NAME "drm/i915"
in i915_drv.h

Don't tell me that would break CI!

> +	if (is_error)

if (is_error && !shown_bug_once) {
> +		dev_notice(dev, "%s", FDO_BUG_MSG);
shown_bug_once = 1;
}

With just showing the bug once,
Reviwed-by: Chris Wilson <chris@chris-wilson.co.uk>
-Chris
Imre Deak March 18, 2016, 8:16 a.m. UTC | #2
On Fri, 2016-03-18 at 07:58 +0000, Chris Wilson wrote:
> On Fri, Mar 18, 2016 at 08:50:37AM +0200, Imre Deak wrote:
> > Atm, in case failure injection forces an error the subsequent "*ERROR*
> > failed to init modeset" error message will make automated tests (CI)
> > report this event as a breakage even though the event is expected. To
> > fix this print the error message with debug log level in this case.
> > 
> > While at it print the error message for any init failure and change it
> > to
> > """
> > Device initialization failed (errno)
> > Please file a bug at https://bugs.freedesktop.org/enter_bug.cgi?product=DRI
> > against DRM/Intel providing the dmesg log by booting with drm.debug=0xf
> > """
> > and export a helper printing error messages using this same format.
> > A follow-up patch will convert all uses of DRM_ERROR reporting a user
> > facing problem to use this new helper instead.
> > 
> > v2:
> > - Include the problematic error message in the commit log, add a
> >   request to file an fdo bug to the message (Chris)
> > v3:
> > - Include the new error message too in the commit log, make the
> >   fdo link more precise and print part of the message with info log
> >   level (Chris)
> > v4: (Chris)
> > - Use dev_printk instead of DRM_ERROR/INFO and use NOTICE instead of
> >   INFO loglevel
> > - Export a helper for printing user facing error messages
> > v5:
> > - Keep the DRM_ERROR message prefix used by piglit-igt/CI to filter
> >   relevant dmesg lines
> > - Use dev_notice(), instead of dev_printk(KERN_NOTICE,...)
> > 
> > CC: Chris Wilson <chris@chris-wilson.co.uk>
> > Signed-off-by: Imre Deak <imre.deak@intel.com>
> > ---
> >  drivers/gpu/drm/i915/i915_dma.c | 48 ++++++++++++++++++++++++++++++++++++-----
> >  drivers/gpu/drm/i915/i915_drv.h |  7 ++++++
> >  2 files changed, 50 insertions(+), 5 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
> > index 68592b0..096dc35 100644
> > --- a/drivers/gpu/drm/i915/i915_dma.c
> > +++ b/drivers/gpu/drm/i915/i915_dma.c
> > @@ -66,6 +66,44 @@ bool __i915_inject_load_failure(const char *func, int line)
> >  	return false;
> >  }
> >  
> > +#define FDO_BUG_URL "https://bugs.freedesktop.org/enter_bug.cgi?product=DRI"
> > +#define FDO_BUG_MSG "Please file a bug at " FDO_BUG_URL " against DRM/Intel " \
> > +		    "providing the dmesg log by booting with drm.debug=0xf"
> > +
> > +void
> > +__i915_printk(struct drm_i915_private *dev_priv, const char *level,
> > +	      const char *fmt, ...)
> > +{
> 
> static int shown_bug_once;
> 
> > +	struct device *dev = dev_priv->dev->dev;
> > +	bool is_error = level[1] <= KERN_ERR[1];
> > +	struct va_format vaf;
> > +	va_list args;
> > +
> > +	va_start(args, fmt);
> > +
> > +	vaf.fmt = fmt;
> > +	vaf.va = &args;
> > +
> > +	dev_printk(level, dev, "[" DRM_NAME ":%ps] %pV",
> > +		   __builtin_return_address(0), &vaf);
> 
> Also on my wishlist is to
> #undef DRM_NAME
> #define DRM_NAME "drm/i915"
> in i915_drv.h
> 
> Don't tell me that would break CI!

The current regex it uses on all messages with a loglevel <=
KERN_NOTICE:
(\[drm:|drm_|intel_|i915_)

Probably could be changed to 'i915 0000:00:02.0:' if we converted all
user facing DRM_ERRORs to use the new helper. Btw, according to your
plan the rest non-user facing DRM_ERRORs would be printed with debug
log level I assume, so isn't it a problem that they won't be treated as
test failures by CI?

> > +	if (is_error)
> 
> if (is_error && !shown_bug_once) {
> > +		dev_notice(dev, "%s", FDO_BUG_MSG);
> shown_bug_once = 1;
> }

Ok, will respin it.

--Imre

> 
> With just showing the bug once,
> Reviwed-by: Chris Wilson <chris@chris-wilson.co.uk>
> -Chris
>
Chris Wilson March 18, 2016, 8:23 a.m. UTC | #3
On Fri, Mar 18, 2016 at 10:16:28AM +0200, Imre Deak wrote:
> On Fri, 2016-03-18 at 07:58 +0000, Chris Wilson wrote:
> > On Fri, Mar 18, 2016 at 08:50:37AM +0200, Imre Deak wrote:
> > > Atm, in case failure injection forces an error the subsequent "*ERROR*
> > > failed to init modeset" error message will make automated tests (CI)
> > > report this event as a breakage even though the event is expected. To
> > > fix this print the error message with debug log level in this case.
> > > 
> > > While at it print the error message for any init failure and change it
> > > to
> > > """
> > > Device initialization failed (errno)
> > > Please file a bug at https://bugs.freedesktop.org/enter_bug.cgi?product=DRI
> > > against DRM/Intel providing the dmesg log by booting with drm.debug=0xf
> > > """
> > > and export a helper printing error messages using this same format.
> > > A follow-up patch will convert all uses of DRM_ERROR reporting a user
> > > facing problem to use this new helper instead.
> > > 
> > > v2:
> > > - Include the problematic error message in the commit log, add a
> > >   request to file an fdo bug to the message (Chris)
> > > v3:
> > > - Include the new error message too in the commit log, make the
> > >   fdo link more precise and print part of the message with info log
> > >   level (Chris)
> > > v4: (Chris)
> > > - Use dev_printk instead of DRM_ERROR/INFO and use NOTICE instead of
> > >   INFO loglevel
> > > - Export a helper for printing user facing error messages
> > > v5:
> > > - Keep the DRM_ERROR message prefix used by piglit-igt/CI to filter
> > >   relevant dmesg lines
> > > - Use dev_notice(), instead of dev_printk(KERN_NOTICE,...)
> > > 
> > > CC: Chris Wilson <chris@chris-wilson.co.uk>
> > > Signed-off-by: Imre Deak <imre.deak@intel.com>
> > > ---
> > >  drivers/gpu/drm/i915/i915_dma.c | 48 ++++++++++++++++++++++++++++++++++++-----
> > >  drivers/gpu/drm/i915/i915_drv.h |  7 ++++++
> > >  2 files changed, 50 insertions(+), 5 deletions(-)
> > > 
> > > diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
> > > index 68592b0..096dc35 100644
> > > --- a/drivers/gpu/drm/i915/i915_dma.c
> > > +++ b/drivers/gpu/drm/i915/i915_dma.c
> > > @@ -66,6 +66,44 @@ bool __i915_inject_load_failure(const char *func, int line)
> > >  	return false;
> > >  }
> > >  
> > > +#define FDO_BUG_URL "https://bugs.freedesktop.org/enter_bug.cgi?product=DRI"
> > > +#define FDO_BUG_MSG "Please file a bug at " FDO_BUG_URL " against DRM/Intel " \
> > > +		    "providing the dmesg log by booting with drm.debug=0xf"
> > > +
> > > +void
> > > +__i915_printk(struct drm_i915_private *dev_priv, const char *level,
> > > +	      const char *fmt, ...)
> > > +{
> > 
> > static int shown_bug_once;
> > 
> > > +	struct device *dev = dev_priv->dev->dev;
> > > +	bool is_error = level[1] <= KERN_ERR[1];
> > > +	struct va_format vaf;
> > > +	va_list args;
> > > +
> > > +	va_start(args, fmt);
> > > +
> > > +	vaf.fmt = fmt;
> > > +	vaf.va = &args;
> > > +
> > > +	dev_printk(level, dev, "[" DRM_NAME ":%ps] %pV",
> > > +		   __builtin_return_address(0), &vaf);
> > 
> > Also on my wishlist is to
> > #undef DRM_NAME
> > #define DRM_NAME "drm/i915"
> > in i915_drv.h
> > 
> > Don't tell me that would break CI!
> 
> The current regex it uses on all messages with a loglevel <=
> KERN_NOTICE:
> (\[drm:|drm_|intel_|i915_)
> 
> Probably could be changed to 'i915 0000:00:02.0:' if we converted all
> user facing DRM_ERRORs to use the new helper. Btw, according to your
> plan the rest non-user facing DRM_ERRORs would be printed with debug
> log level I assume, so isn't it a problem that they won't be treated as
> test failures by CI?

If a DRM_ERROR didn't result in a user visible error then it wasn't much
of an error and CI is only complaining because we didn't keep dmesg
clean - i.e. the only bug there would be using DRM_ERROR as a debugging
aide, so yes I'm okay with that. (And if the DRM_ERROR did result in a
user visible error and the only reason CI was complaining was because of
the dmesg spam, then our testing is snafu.)
-Chris
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 68592b0..096dc35 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -66,6 +66,44 @@  bool __i915_inject_load_failure(const char *func, int line)
 	return false;
 }
 
+#define FDO_BUG_URL "https://bugs.freedesktop.org/enter_bug.cgi?product=DRI"
+#define FDO_BUG_MSG "Please file a bug at " FDO_BUG_URL " against DRM/Intel " \
+		    "providing the dmesg log by booting with drm.debug=0xf"
+
+void
+__i915_printk(struct drm_i915_private *dev_priv, const char *level,
+	      const char *fmt, ...)
+{
+	struct device *dev = dev_priv->dev->dev;
+	bool is_error = level[1] <= KERN_ERR[1];
+	struct va_format vaf;
+	va_list args;
+
+	va_start(args, fmt);
+
+	vaf.fmt = fmt;
+	vaf.va = &args;
+
+	dev_printk(level, dev, "[" DRM_NAME ":%ps] %pV",
+		   __builtin_return_address(0), &vaf);
+
+	if (is_error)
+		dev_notice(dev, "%s", FDO_BUG_MSG);
+
+	va_end(args);
+}
+
+static bool i915_error_injected(struct drm_i915_private *dev_priv)
+{
+	return i915.inject_load_failure &&
+	       i915_load_fail_count == i915.inject_load_failure;
+}
+
+#define i915_load_error(dev_priv, fmt, ...)				     \
+	__i915_printk(dev_priv,						     \
+		      i915_error_injected(dev_priv) ? KERN_DEBUG : KERN_ERR, \
+		      fmt, ##__VA_ARGS__)
+
 static int i915_getparam(struct drm_device *dev, void *data,
 			 struct drm_file *file_priv)
 {
@@ -972,8 +1010,6 @@  static int i915_driver_init_early(struct drm_i915_private *dev_priv,
 	if (i915_inject_load_failure())
 		return -ENODEV;
 
-	dev_priv->dev = dev;
-
 	/* Setup the write-once "constant" device info */
 	device_info = (struct intel_device_info *)&dev_priv->info;
 	memcpy(device_info, info, sizeof(dev_priv->info));
@@ -1303,6 +1339,8 @@  int i915_driver_load(struct drm_device *dev, unsigned long flags)
 		return -ENOMEM;
 
 	dev->dev_private = dev_priv;
+	/* Must be set before calling __i915_printk */
+	dev_priv->dev = dev;
 
 	ret = i915_driver_init_early(dev_priv, dev,
 				     (struct intel_device_info *)flags);
@@ -1332,10 +1370,8 @@  int i915_driver_load(struct drm_device *dev, unsigned long flags)
 	}
 
 	ret = i915_load_modeset_init(dev);
-	if (ret < 0) {
-		DRM_ERROR("failed to init modeset\n");
+	if (ret < 0)
 		goto out_cleanup_vblank;
-	}
 
 	i915_driver_register(dev_priv);
 
@@ -1357,6 +1393,8 @@  out_runtime_pm_put:
 out_free_priv:
 	kfree(dev_priv);
 
+	i915_load_error(dev_priv, "Device initialization failed (%d)\n", ret);
+
 	return ret;
 }
 
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 00c41a4..e3b019a 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2696,6 +2696,13 @@  extern int i915_suspend_switcheroo(struct drm_device *dev, pm_message_t state);
 extern int i915_resume_switcheroo(struct drm_device *dev);
 
 /* i915_dma.c */
+void __printf(3, 4)
+__i915_printk(struct drm_i915_private *dev_priv, const char *level,
+	      const char *fmt, ...);
+
+#define i915_report_error(dev_priv, fmt, ...)				   \
+	__i915_printk(dev_priv, KERN_ERR, fmt, ##__VA_ARGS__)
+
 extern int i915_driver_load(struct drm_device *, unsigned long flags);
 extern int i915_driver_unload(struct drm_device *);
 extern int i915_driver_open(struct drm_device *dev, struct drm_file *file);