[RFC,i-g-t,v2,1/8] tests/core_hotunplug: Duplicate debug messages in dmesg
diff mbox series

Message ID 20200622164415.30352-2-janusz.krzysztofik@linux.intel.com
State New
Headers show
Series
  • tests/core_hotunplug: New subtests and enhancements
Related show

Commit Message

Janusz Krzysztofik June 22, 2020, 4:44 p.m. UTC
The purpose of debug messages displayed by the test is to make
identification of a subtest phase that fails more easy.  Since issues
exhibited by the test are mostly reported to dmesg, print those debug
messages to /dev/kmsg as well.

v2: rebase on upstream

Signed-off-by: Janusz Krzysztofik <janusz.krzysztofik@linux.intel.com>
---
 tests/core_hotunplug.c | 38 ++++++++++++++++++++++----------------
 1 file changed, 22 insertions(+), 16 deletions(-)

Comments

Michał Winiarski June 25, 2020, 3:27 p.m. UTC | #1
Quoting Janusz Krzysztofik (2020-06-22 18:44:08)
> The purpose of debug messages displayed by the test is to make
> identification of a subtest phase that fails more easy.  Since issues
> exhibited by the test are mostly reported to dmesg, print those debug
> messages to /dev/kmsg as well.

I'm not a fan of spamming dmesg from IGT and I'd prefer if you add this logging
to the kernel, but let's go over this case-by-case.

> v2: rebase on upstream
> 
> Signed-off-by: Janusz Krzysztofik <janusz.krzysztofik@linux.intel.com>
> ---
>  tests/core_hotunplug.c | 38 ++++++++++++++++++++++----------------
>  1 file changed, 22 insertions(+), 16 deletions(-)
> 
> diff --git a/tests/core_hotunplug.c b/tests/core_hotunplug.c
> index e03f3b945..826645b1f 100644
> --- a/tests/core_hotunplug.c
> +++ b/tests/core_hotunplug.c
> @@ -49,6 +49,12 @@ struct hotunplug {
>  
>  /* Helpers */
>  
> +#define local_debug(msg...)                                                 \
> +({                                                                          \
> +       igt_debug("%s: %s\n", __func__, msg);                                \
> +       igt_kmsg(KMSG_DEBUG "%s: %s: %s\n", igt_test_name(), __func__, msg); \
> +})
> +
>  static void prepare_for_unbind(struct hotunplug *priv, char *buf, int buflen)
>  {
>         int len;
> @@ -68,9 +74,9 @@ static void prepare_for_unbind(struct hotunplug *priv, char *buf, int buflen)
>         close(priv->fd.sysfs_dev);
>  }
>  
> -static void prepare(struct hotunplug *priv, char *buf, int buflen)
> +static inline void prepare(struct hotunplug *priv, char *buf, int buflen)
>  {
> -       igt_debug("opening device\n");
> +       local_debug("opening device");

[  220.458370] [drm:drm_open] pid = 194, minor = 128
[  220.460062] [drm:i915_gem_open [i915]]

>         priv->fd.drm = __drm_open_driver(DRIVER_ANY);
>         igt_assert(priv->fd.drm >= 0);
>  
> @@ -137,14 +143,14 @@ static void bus_rescan(int fd_sysfs_bus)
>         close(fd_sysfs_bus);
>  }
>  
> -static void healthcheck(void)
> +static inline void healthcheck(void)
>  {
>         int fd_drm;
>  
>         /* device name may have changed, rebuild IGT device list */
>         igt_devices_scan(true);
>  
> -       igt_debug("reopening the device\n");
> +       local_debug("reopening the device");

Well, this is going to look the same as open, except closing it won't print
drm_lastclose.

[  293.957567] [drm:drm_release] open_count = 2
[  293.958805] [drm:drm_file_free.part.0] pid = 194, device = 0xe280, open_count = 2

>         fd_drm = __drm_open_driver(DRIVER_ANY);
>         igt_abort_on_f(fd_drm < 0, "Device reopen failure");
>  
> @@ -181,13 +187,13 @@ static void unbind_rebind(void)
>  
>         prepare(&priv, buf, sizeof(buf));
>  
> -       igt_debug("closing the device\n");
> +       local_debug("closing the device");

[  250.157568] [drm:drm_release] open_count = 1
[  250.158807] [drm:drm_file_free.part.0] pid = 194, device = 0xe280, open_count = 1
[  250.161183] [drm:drm_lastclose]
[  250.162312] [drm:drm_lastclose] driver lastclose completed

>         close(priv.fd.drm);
>  
> -       igt_debug("unbinding the driver from the device\n");
> +       local_debug("unbinding the driver from the device");
>         driver_unbind(priv.fd.sysfs_drv, priv.dev_bus_addr);

[ 1553.868235] bus: 'event_source': remove device i915_0000_00_02.0

>  
> -       igt_debug("rebinding the driver to the device\n");
> +       local_debug("rebinding the driver to the device");
>         driver_bind(priv.fd.sysfs_drv, priv.dev_bus_addr);

[ 1592.758219] bus: 'pci': driver_probe_device: matched device 0000:00:02.0 with driver i915
[ 1592.760543] bus: 'pci': really_probe: probing driver i915 with device 0000:00:02.0
(...bunch of i915 logs...)
[  203.961656] driver: 'i915': driver_bound: bound to device '0000:00:02.0'
[  203.966421] bus: 'pci': really_probe: bound device 0000:00:02.0 to driver i915

>  
>         healthcheck();
> @@ -199,13 +205,13 @@ static void unplug_rescan(void)
>  
>         prepare(&priv, NULL, 0);
>  
> -       igt_debug("closing the device\n");
> +       local_debug("closing the device");
>         close(priv.fd.drm);
>  
> -       igt_debug("unplugging the device\n");
> +       local_debug("unplugging the device");
>         device_unplug(priv.fd.sysfs_dev);

[   60.664163] bus: 'pci': remove device 0000:00:02.0

> -       igt_debug("recovering the device\n");
> +       local_debug("recovering the device");
>         bus_rescan(priv.fd.sysfs_bus);

[   97.384479] bus: 'pci': add device 0000:00:02.0

>  
>         healthcheck();
> @@ -218,13 +224,13 @@ static void hotunbind_lateclose(void)
>  
>         prepare(&priv, buf, sizeof(buf));
>  
> -       igt_debug("hot unbinding the driver from the device\n");
> +       local_debug("hot unbinding the driver from the device");
>         driver_unbind(priv.fd.sysfs_drv, priv.dev_bus_addr);
>  
> -       igt_debug("rebinding the driver to the device\n");
> +       local_debug("rebinding the driver to the device");
>         driver_bind(priv.fd.sysfs_drv, priv.dev_bus_addr);
>  
> -       igt_debug("late closing the unbound device instance\n");
> +       local_debug("late closing the unbound device instance");
>         close(priv.fd.drm);

Would it be possible to add extra logging allowing us to distinguish this from
regular unbind on i915 side?

>  
>         healthcheck();
> @@ -236,13 +242,13 @@ static void hotunplug_lateclose(void)
>  
>         prepare(&priv, NULL, 0);
>  
> -       igt_debug("hot unplugging the device\n");
> +       local_debug("hot unplugging the device");
>         device_unplug(priv.fd.sysfs_dev);
>  
> -       igt_debug("recovering the device\n");
> +       local_debug("recovering the device");
>         bus_rescan(priv.fd.sysfs_bus);
>  
> -       igt_debug("late closing the removed device instance\n");
> +       local_debug("late closing the removed device instance");
>         close(priv.fd.drm);

Same thing here.

So, not including the hot unplug/unbind, I think the logging is already there.

Also - note, the "driver core" logs are probably disabled on CI, but I still
think that figuring out how to enable those from IGT (and letting the kernel
just do its regular logging) rather than adding kmsg prints from userspace is a
better approach.

-Michał

>  
>         healthcheck();
> -- 
> 2.21.1
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Janusz Krzysztofik June 26, 2020, 10:18 a.m. UTC | #2
Hi Michał,

Thanks for review.

On Thu, 2020-06-25 at 17:27 +0200, Michał Winiarski wrote:
> Quoting Janusz Krzysztofik (2020-06-22 18:44:08)
> > The purpose of debug messages displayed by the test is to make
> > identification of a subtest phase that fails more easy.  Since issues
> > exhibited by the test are mostly reported to dmesg, print those debug
> > messages to /dev/kmsg as well.
> 
> I'm not a fan of spamming dmesg from IGT and I'd prefer if you add this logging
> to the kernel, 

The idea was to simply log IGT actions, not to log kernel reactions on
them which already happens.  Doing that from the kernel would probably
require modifications to PCI sysfs handling or to sysfs in general.

If you see no benefits from that, let's drop this patch.

Thanks,
Janusz 
 
> but let's go over this case-by-case.
> 
> > v2: rebase on upstream
> > 
> > Signed-off-by: Janusz Krzysztofik <janusz.krzysztofik@linux.intel.com>
> > ---
> >  tests/core_hotunplug.c | 38 ++++++++++++++++++++++----------------
> >  1 file changed, 22 insertions(+), 16 deletions(-)
> > 
> > diff --git a/tests/core_hotunplug.c b/tests/core_hotunplug.c
> > index e03f3b945..826645b1f 100644
> > --- a/tests/core_hotunplug.c
> > +++ b/tests/core_hotunplug.c
> > @@ -49,6 +49,12 @@ struct hotunplug {
> >  
> >  /* Helpers */
> >  
> > +#define local_debug(msg...)                                                 \
> > +({                                                                          \
> > +       igt_debug("%s: %s\n", __func__, msg);                                \
> > +       igt_kmsg(KMSG_DEBUG "%s: %s: %s\n", igt_test_name(), __func__, msg); \
> > +})
> > +
> >  static void prepare_for_unbind(struct hotunplug *priv, char *buf, int buflen)
> >  {
> >         int len;
> > @@ -68,9 +74,9 @@ static void prepare_for_unbind(struct hotunplug *priv, char *buf, int buflen)
> >         close(priv->fd.sysfs_dev);
> >  }
> >  
> > -static void prepare(struct hotunplug *priv, char *buf, int buflen)
> > +static inline void prepare(struct hotunplug *priv, char *buf, int buflen)
> >  {
> > -       igt_debug("opening device\n");
> > +       local_debug("opening device");
> 
> [  220.458370] [drm:drm_open] pid = 194, minor = 128
> [  220.460062] [drm:i915_gem_open [i915]]
> 
> >         priv->fd.drm = __drm_open_driver(DRIVER_ANY);
> >         igt_assert(priv->fd.drm >= 0);
> >  
> > @@ -137,14 +143,14 @@ static void bus_rescan(int fd_sysfs_bus)
> >         close(fd_sysfs_bus);
> >  }
> >  
> > -static void healthcheck(void)
> > +static inline void healthcheck(void)
> >  {
> >         int fd_drm;
> >  
> >         /* device name may have changed, rebuild IGT device list */
> >         igt_devices_scan(true);
> >  
> > -       igt_debug("reopening the device\n");
> > +       local_debug("reopening the device");
> 
> Well, this is going to look the same as open, except closing it won't print
> drm_lastclose.
> 
> [  293.957567] [drm:drm_release] open_count = 2
> [  293.958805] [drm:drm_file_free.part.0] pid = 194, device = 0xe280, open_count = 2
> 
> >         fd_drm = __drm_open_driver(DRIVER_ANY);
> >         igt_abort_on_f(fd_drm < 0, "Device reopen failure");
> >  
> > @@ -181,13 +187,13 @@ static void unbind_rebind(void)
> >  
> >         prepare(&priv, buf, sizeof(buf));
> >  
> > -       igt_debug("closing the device\n");
> > +       local_debug("closing the device");
> 
> [  250.157568] [drm:drm_release] open_count = 1
> [  250.158807] [drm:drm_file_free.part.0] pid = 194, device = 0xe280, open_count = 1
> [  250.161183] [drm:drm_lastclose]
> [  250.162312] [drm:drm_lastclose] driver lastclose completed
> 
> >         close(priv.fd.drm);
> >  
> > -       igt_debug("unbinding the driver from the device\n");
> > +       local_debug("unbinding the driver from the device");
> >         driver_unbind(priv.fd.sysfs_drv, priv.dev_bus_addr);
> 
> [ 1553.868235] bus: 'event_source': remove device i915_0000_00_02.0
> 
> >  
> > -       igt_debug("rebinding the driver to the device\n");
> > +       local_debug("rebinding the driver to the device");
> >         driver_bind(priv.fd.sysfs_drv, priv.dev_bus_addr);
> 
> [ 1592.758219] bus: 'pci': driver_probe_device: matched device 0000:00:02.0 with driver i915
> [ 1592.760543] bus: 'pci': really_probe: probing driver i915 with device 0000:00:02.0
> (...bunch of i915 logs...)
> [  203.961656] driver: 'i915': driver_bound: bound to device '0000:00:02.0'
> [  203.966421] bus: 'pci': really_probe: bound device 0000:00:02.0 to driver i915
> 
> >  
> >         healthcheck();
> > @@ -199,13 +205,13 @@ static void unplug_rescan(void)
> >  
> >         prepare(&priv, NULL, 0);
> >  
> > -       igt_debug("closing the device\n");
> > +       local_debug("closing the device");
> >         close(priv.fd.drm);
> >  
> > -       igt_debug("unplugging the device\n");
> > +       local_debug("unplugging the device");
> >         device_unplug(priv.fd.sysfs_dev);
> 
> [   60.664163] bus: 'pci': remove device 0000:00:02.0
> 
> > -       igt_debug("recovering the device\n");
> > +       local_debug("recovering the device");
> >         bus_rescan(priv.fd.sysfs_bus);
> 
> [   97.384479] bus: 'pci': add device 0000:00:02.0
> 
> >  
> >         healthcheck();
> > @@ -218,13 +224,13 @@ static void hotunbind_lateclose(void)
> >  
> >         prepare(&priv, buf, sizeof(buf));
> >  
> > -       igt_debug("hot unbinding the driver from the device\n");
> > +       local_debug("hot unbinding the driver from the device");
> >         driver_unbind(priv.fd.sysfs_drv, priv.dev_bus_addr);
> >  
> > -       igt_debug("rebinding the driver to the device\n");
> > +       local_debug("rebinding the driver to the device");
> >         driver_bind(priv.fd.sysfs_drv, priv.dev_bus_addr);
> >  
> > -       igt_debug("late closing the unbound device instance\n");
> > +       local_debug("late closing the unbound device instance");
> >         close(priv.fd.drm);
> 
> Would it be possible to add extra logging allowing us to distinguish this from
> regular unbind on i915 side?
> 
> >  
> >         healthcheck();
> > @@ -236,13 +242,13 @@ static void hotunplug_lateclose(void)
> >  
> >         prepare(&priv, NULL, 0);
> >  
> > -       igt_debug("hot unplugging the device\n");
> > +       local_debug("hot unplugging the device");
> >         device_unplug(priv.fd.sysfs_dev);
> >  
> > -       igt_debug("recovering the device\n");
> > +       local_debug("recovering the device");
> >         bus_rescan(priv.fd.sysfs_bus);
> >  
> > -       igt_debug("late closing the removed device instance\n");
> > +       local_debug("late closing the removed device instance");
> >         close(priv.fd.drm);
> 
> Same thing here.
> 
> So, not including the hot unplug/unbind, I think the logging is already there.
> 
> Also - note, the "driver core" logs are probably disabled on CI, but I still
> think that figuring out how to enable those from IGT (and letting the kernel
> just do its regular logging) rather than adding kmsg prints from userspace is a
> better approach.
> 
> -Michał
> 
> >  
> >         healthcheck();
> > -- 
> > 2.21.1
> > 
> > _______________________________________________
> > Intel-gfx mailing list
> > Intel-gfx@lists.freedesktop.org
> > https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Arkadiusz Hiler July 8, 2020, 12:49 p.m. UTC | #3
On Fri, Jun 26, 2020 at 12:18:00PM +0200, Janusz Krzysztofik wrote:
> Hi Michał,
> 
> Thanks for review.
> 
> On Thu, 2020-06-25 at 17:27 +0200, Michał Winiarski wrote:
> > Quoting Janusz Krzysztofik (2020-06-22 18:44:08)
> > > The purpose of debug messages displayed by the test is to make
> > > identification of a subtest phase that fails more easy.  Since issues
> > > exhibited by the test are mostly reported to dmesg, print those debug
> > > messages to /dev/kmsg as well.
> > 
> > I'm not a fan of spamming dmesg from IGT and I'd prefer if you add this logging
> > to the kernel, 
> The idea was to simply log IGT actions, not to log kernel reactions on
> them which already happens.  Doing that from the kernel would probably
> require modifications to PCI sysfs handling or to sysfs in general.
> 
> If you see no benefits from that, let's drop this patch.

We (me & Petri) were thinking about doing something similar, but only
for the places where kernel logs are hard to correlate with the test
actions, to have those "synchronization points" between logs for key
operations.

The main reason was Chamelium - external board that simulates display
and can cause hotplug events to happen. Logging Chamelium operations in
dmesg would make any kind of bug assessment or troubleshooting much
easier - was that a phantom hotplug? or something that was triggered by
us? Have we started doing anything else before the link has settled?
What happened during DP FSM handling?

This is a very special case as we deal with an external board that
drives our HW through wires and layers of firmware and for sure there be
dragons.


Anyway, I would like to see us having a way of logging those "sync
points" into kmesg in igt_core, e.g. by creating _kmesg suffixed version
of log functions.

But I also see Michał's point - too frivolous logging just creates
noise, and we shouldn't double log - if something is already easy to
find in the kernel logs and the correlating test action to logs is not
too hard why should we add more noise?

As of the examples in this thread - I am not very familiar with the area
and I leave it up to you two to decide what would be helpful, what would
be unnecessary repetition and what would be better off logged in the
kernel.

TL;DR: Yes for logging things into kmesg, but we should be careful about
       what we log to not make the situation any worse.

Patch
diff mbox series

diff --git a/tests/core_hotunplug.c b/tests/core_hotunplug.c
index e03f3b945..826645b1f 100644
--- a/tests/core_hotunplug.c
+++ b/tests/core_hotunplug.c
@@ -49,6 +49,12 @@  struct hotunplug {
 
 /* Helpers */
 
+#define local_debug(msg...)						     \
+({									     \
+	igt_debug("%s: %s\n", __func__, msg);				     \
+	igt_kmsg(KMSG_DEBUG "%s: %s: %s\n", igt_test_name(), __func__, msg); \
+})
+
 static void prepare_for_unbind(struct hotunplug *priv, char *buf, int buflen)
 {
 	int len;
@@ -68,9 +74,9 @@  static void prepare_for_unbind(struct hotunplug *priv, char *buf, int buflen)
 	close(priv->fd.sysfs_dev);
 }
 
-static void prepare(struct hotunplug *priv, char *buf, int buflen)
+static inline void prepare(struct hotunplug *priv, char *buf, int buflen)
 {
-	igt_debug("opening device\n");
+	local_debug("opening device");
 	priv->fd.drm = __drm_open_driver(DRIVER_ANY);
 	igt_assert(priv->fd.drm >= 0);
 
@@ -137,14 +143,14 @@  static void bus_rescan(int fd_sysfs_bus)
 	close(fd_sysfs_bus);
 }
 
-static void healthcheck(void)
+static inline void healthcheck(void)
 {
 	int fd_drm;
 
 	/* device name may have changed, rebuild IGT device list */
 	igt_devices_scan(true);
 
-	igt_debug("reopening the device\n");
+	local_debug("reopening the device");
 	fd_drm = __drm_open_driver(DRIVER_ANY);
 	igt_abort_on_f(fd_drm < 0, "Device reopen failure");
 
@@ -181,13 +187,13 @@  static void unbind_rebind(void)
 
 	prepare(&priv, buf, sizeof(buf));
 
-	igt_debug("closing the device\n");
+	local_debug("closing the device");
 	close(priv.fd.drm);
 
-	igt_debug("unbinding the driver from the device\n");
+	local_debug("unbinding the driver from the device");
 	driver_unbind(priv.fd.sysfs_drv, priv.dev_bus_addr);
 
-	igt_debug("rebinding the driver to the device\n");
+	local_debug("rebinding the driver to the device");
 	driver_bind(priv.fd.sysfs_drv, priv.dev_bus_addr);
 
 	healthcheck();
@@ -199,13 +205,13 @@  static void unplug_rescan(void)
 
 	prepare(&priv, NULL, 0);
 
-	igt_debug("closing the device\n");
+	local_debug("closing the device");
 	close(priv.fd.drm);
 
-	igt_debug("unplugging the device\n");
+	local_debug("unplugging the device");
 	device_unplug(priv.fd.sysfs_dev);
 
-	igt_debug("recovering the device\n");
+	local_debug("recovering the device");
 	bus_rescan(priv.fd.sysfs_bus);
 
 	healthcheck();
@@ -218,13 +224,13 @@  static void hotunbind_lateclose(void)
 
 	prepare(&priv, buf, sizeof(buf));
 
-	igt_debug("hot unbinding the driver from the device\n");
+	local_debug("hot unbinding the driver from the device");
 	driver_unbind(priv.fd.sysfs_drv, priv.dev_bus_addr);
 
-	igt_debug("rebinding the driver to the device\n");
+	local_debug("rebinding the driver to the device");
 	driver_bind(priv.fd.sysfs_drv, priv.dev_bus_addr);
 
-	igt_debug("late closing the unbound device instance\n");
+	local_debug("late closing the unbound device instance");
 	close(priv.fd.drm);
 
 	healthcheck();
@@ -236,13 +242,13 @@  static void hotunplug_lateclose(void)
 
 	prepare(&priv, NULL, 0);
 
-	igt_debug("hot unplugging the device\n");
+	local_debug("hot unplugging the device");
 	device_unplug(priv.fd.sysfs_dev);
 
-	igt_debug("recovering the device\n");
+	local_debug("recovering the device");
 	bus_rescan(priv.fd.sysfs_bus);
 
-	igt_debug("late closing the removed device instance\n");
+	local_debug("late closing the removed device instance");
 	close(priv.fd.drm);
 
 	healthcheck();