diff mbox series

PCI: acpiphp: Log more slot and notification details

Message ID 20230808192713.329414-1-helgaas@kernel.org (mailing list archive)
State Changes Requested
Delegated to: Bjorn Helgaas
Headers show
Series PCI: acpiphp: Log more slot and notification details | expand

Commit Message

Bjorn Helgaas Aug. 8, 2023, 7:27 p.m. UTC
From: Bjorn Helgaas <bhelgaas@google.com>

When registering an acpiphp slot, log the slot name in the same style as
pciehp and include the PCI bus/device and whether a device is present or
the slot is empty.

When handling an ACPI notification, log the PCI bus/device and notification
type.

Sample dmesg log diff:

    ACPI: PCI Root Bridge [PCI0] (domain 0000 [bus 00-ff])
  - acpiphp: Slot [3] registered
  - acpiphp: Slot [4] registered
    PCI host bridge to bus 0000:00
    pci 0000:00:03.0: [8086:100e] type 00 class 0x020000
    <ACPI Device Check notification>
    pci 0000:00:04.0: [8086:100e] type 00 class 0x020000

    ACPI: PCI Root Bridge [PCI0] (domain 0000 [bus 00-ff])
  + acpiphp: pci 0000:00:03 Slot(3) registered (enabled)
  + acpiphp: pci 0000:00:04 Slot(4) registered (empty)
    PCI host bridge to bus 0000:00
    pci 0000:00:03.0: [8086:100e] type 00 class 0x020000
    <ACPI Device Check notification>
  + acpiphp: pci 0000:00:04 Slot(4) Device Check
    pci 0000:00:04.0: [8086:100e] type 00 class 0x020000

Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 drivers/pci/hotplug/acpiphp_core.c |  4 ----
 drivers/pci/hotplug/acpiphp_glue.c | 23 +++++++++++++++++++++--
 2 files changed, 21 insertions(+), 6 deletions(-)

Comments

Rafael J. Wysocki Aug. 8, 2023, 7:39 p.m. UTC | #1
On Tue, Aug 8, 2023 at 9:27 PM Bjorn Helgaas <helgaas@kernel.org> wrote:
>
> From: Bjorn Helgaas <bhelgaas@google.com>
>
> When registering an acpiphp slot, log the slot name in the same style as
> pciehp and include the PCI bus/device and whether a device is present or
> the slot is empty.
>
> When handling an ACPI notification, log the PCI bus/device and notification
> type.
>
> Sample dmesg log diff:
>
>     ACPI: PCI Root Bridge [PCI0] (domain 0000 [bus 00-ff])
>   - acpiphp: Slot [3] registered
>   - acpiphp: Slot [4] registered
>     PCI host bridge to bus 0000:00
>     pci 0000:00:03.0: [8086:100e] type 00 class 0x020000
>     <ACPI Device Check notification>
>     pci 0000:00:04.0: [8086:100e] type 00 class 0x020000
>
>     ACPI: PCI Root Bridge [PCI0] (domain 0000 [bus 00-ff])
>   + acpiphp: pci 0000:00:03 Slot(3) registered (enabled)
>   + acpiphp: pci 0000:00:04 Slot(4) registered (empty)
>     PCI host bridge to bus 0000:00
>     pci 0000:00:03.0: [8086:100e] type 00 class 0x020000
>     <ACPI Device Check notification>
>   + acpiphp: pci 0000:00:04 Slot(4) Device Check
>     pci 0000:00:04.0: [8086:100e] type 00 class 0x020000
>
> Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
> ---
>  drivers/pci/hotplug/acpiphp_core.c |  4 ----
>  drivers/pci/hotplug/acpiphp_glue.c | 23 +++++++++++++++++++++--
>  2 files changed, 21 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/pci/hotplug/acpiphp_core.c b/drivers/pci/hotplug/acpiphp_core.c
> index c02257f4b61c..19d47607d009 100644
> --- a/drivers/pci/hotplug/acpiphp_core.c
> +++ b/drivers/pci/hotplug/acpiphp_core.c
> @@ -282,8 +282,6 @@ int acpiphp_register_hotplug_slot(struct acpiphp_slot *acpiphp_slot,
>                 goto error_slot;
>         }
>
> -       pr_info("Slot [%s] registered\n", slot_name(slot));
> -
>         return 0;
>  error_slot:
>         kfree(slot);
> @@ -296,8 +294,6 @@ void acpiphp_unregister_hotplug_slot(struct acpiphp_slot *acpiphp_slot)
>  {
>         struct slot *slot = acpiphp_slot->slot;
>
> -       pr_info("Slot [%s] unregistered\n", slot_name(slot));
> -
>         pci_hp_deregister(&slot->hotplug_slot);
>         kfree(slot);
>  }
> diff --git a/drivers/pci/hotplug/acpiphp_glue.c b/drivers/pci/hotplug/acpiphp_glue.c
> index 328d1e416014..eeca2753a5c7 100644
> --- a/drivers/pci/hotplug/acpiphp_glue.c
> +++ b/drivers/pci/hotplug/acpiphp_glue.c
> @@ -25,7 +25,7 @@
>   *    bus. It loses the refcount when the driver unloads.
>   */
>
> -#define pr_fmt(fmt) "acpiphp_glue: " fmt
> +#define pr_fmt(fmt) "acpiphp: " fmt
>
>  #include <linux/module.h>
>
> @@ -333,6 +333,12 @@ static acpi_status acpiphp_add_context(acpi_handle handle, u32 lvl, void *data,
>                                        &val, 60*1000))
>                 slot->flags |= SLOT_ENABLED;
>
> +       if (slot->slot)
> +               pr_info("pci %04x:%02x:%02x Slot(%s) registered (%s)\n",
> +                       pci_domain_nr(slot->bus), slot->bus->number,
> +                       slot->device, slot_name(slot->slot),
> +                       slot->flags & SLOT_ENABLED ? "enabled" : "empty");
> +
>         return AE_OK;
>  }
>
> @@ -351,8 +357,13 @@ static void cleanup_bridge(struct acpiphp_bridge *bridge)
>                         acpi_unlock_hp_context();
>                 }
>                 slot->flags |= SLOT_IS_GOING_AWAY;
> -               if (slot->slot)
> +               if (slot->slot) {
> +                       pr_info("pci %04x:%02x:%02x Slot(%s) unregistered\n",
> +                               pci_domain_nr(slot->bus), slot->bus->number,
> +                               slot->device, slot_name(slot->slot));
> +
>                         acpiphp_unregister_hotplug_slot(slot);
> +               }
>         }
>
>         mutex_lock(&bridge_mutex);
> @@ -793,6 +804,14 @@ static void hotplug_event(u32 type, struct acpiphp_context *context)
>
>         pci_lock_rescan_remove();
>
> +       pr_info("pci %04x:%02x:%02x Slot(%s) %s\n",
> +               pci_domain_nr(slot->bus), slot->bus->number,
> +               slot->device, slot_name(slot->slot),
> +               type == ACPI_NOTIFY_BUS_CHECK ? "Bus Check" :
> +               type == ACPI_NOTIFY_DEVICE_CHECK ? "Device Check" :
> +               type == ACPI_NOTIFY_EJECT_REQUEST ? "Eject Request" :
> +               "Notification");
> +

pr_debug() perhaps?

On systems that don't have any hotplug problems these messages will
just be filling the kernel log unnecessarily.

>         switch (type) {
>         case ACPI_NOTIFY_BUS_CHECK:
>                 /* bus re-enumerate */
> --
Bjorn Helgaas Aug. 8, 2023, 8:58 p.m. UTC | #2
On Tue, Aug 08, 2023 at 09:39:22PM +0200, Rafael J. Wysocki wrote:
> On Tue, Aug 8, 2023 at 9:27 PM Bjorn Helgaas <helgaas@kernel.org> wrote:
> > From: Bjorn Helgaas <bhelgaas@google.com>
> >
> > When registering an acpiphp slot, log the slot name in the same style as
> > pciehp and include the PCI bus/device and whether a device is present or
> > the slot is empty.
> >
> > When handling an ACPI notification, log the PCI bus/device and notification
> > type.
> >
> > Sample dmesg log diff:
> >
> >     ACPI: PCI Root Bridge [PCI0] (domain 0000 [bus 00-ff])
> >   - acpiphp: Slot [3] registered
> >   - acpiphp: Slot [4] registered
> >     PCI host bridge to bus 0000:00
> >     pci 0000:00:03.0: [8086:100e] type 00 class 0x020000
> >     <ACPI Device Check notification>
> >     pci 0000:00:04.0: [8086:100e] type 00 class 0x020000
> >
> >     ACPI: PCI Root Bridge [PCI0] (domain 0000 [bus 00-ff])
> >   + acpiphp: pci 0000:00:03 Slot(3) registered (enabled)
> >   + acpiphp: pci 0000:00:04 Slot(4) registered (empty)
> >     PCI host bridge to bus 0000:00
> >     pci 0000:00:03.0: [8086:100e] type 00 class 0x020000
> >     <ACPI Device Check notification>
> >   + acpiphp: pci 0000:00:04 Slot(4) Device Check
> >     pci 0000:00:04.0: [8086:100e] type 00 class 0x020000
> > ...

> > @@ -793,6 +804,14 @@ static void hotplug_event(u32 type, struct acpiphp_context *context)
> >
> >         pci_lock_rescan_remove();
> >
> > +       pr_info("pci %04x:%02x:%02x Slot(%s) %s\n",
> > +               pci_domain_nr(slot->bus), slot->bus->number,
> > +               slot->device, slot_name(slot->slot),
> > +               type == ACPI_NOTIFY_BUS_CHECK ? "Bus Check" :
> > +               type == ACPI_NOTIFY_DEVICE_CHECK ? "Device Check" :
> > +               type == ACPI_NOTIFY_EJECT_REQUEST ? "Eject Request" :
> > +               "Notification");
> 
> pr_debug() perhaps?
> 
> On systems that don't have any hotplug problems these messages will
> just be filling the kernel log unnecessarily.

If these notifications are really common, pr_debug() sounds like the
right thing.  I assumed that they would not be common, e.g., they
would happen for user-time things like dock/undock, plug/unplug,
suspend/resume, etc.

In pciehp, we use _info for attention button presses, presence detect
changes, link up/down, and I assumed the ACPI notify events would
roughly correspond to those.  No?

Bjorn
Rafael J. Wysocki Aug. 8, 2023, 9:31 p.m. UTC | #3
On Tue, Aug 8, 2023 at 10:58 PM Bjorn Helgaas <helgaas@kernel.org> wrote:
>
> On Tue, Aug 08, 2023 at 09:39:22PM +0200, Rafael J. Wysocki wrote:
> > On Tue, Aug 8, 2023 at 9:27 PM Bjorn Helgaas <helgaas@kernel.org> wrote:
> > > From: Bjorn Helgaas <bhelgaas@google.com>
> > >
> > > When registering an acpiphp slot, log the slot name in the same style as
> > > pciehp and include the PCI bus/device and whether a device is present or
> > > the slot is empty.
> > >
> > > When handling an ACPI notification, log the PCI bus/device and notification
> > > type.
> > >
> > > Sample dmesg log diff:
> > >
> > >     ACPI: PCI Root Bridge [PCI0] (domain 0000 [bus 00-ff])
> > >   - acpiphp: Slot [3] registered
> > >   - acpiphp: Slot [4] registered
> > >     PCI host bridge to bus 0000:00
> > >     pci 0000:00:03.0: [8086:100e] type 00 class 0x020000
> > >     <ACPI Device Check notification>
> > >     pci 0000:00:04.0: [8086:100e] type 00 class 0x020000
> > >
> > >     ACPI: PCI Root Bridge [PCI0] (domain 0000 [bus 00-ff])
> > >   + acpiphp: pci 0000:00:03 Slot(3) registered (enabled)
> > >   + acpiphp: pci 0000:00:04 Slot(4) registered (empty)
> > >     PCI host bridge to bus 0000:00
> > >     pci 0000:00:03.0: [8086:100e] type 00 class 0x020000
> > >     <ACPI Device Check notification>
> > >   + acpiphp: pci 0000:00:04 Slot(4) Device Check
> > >     pci 0000:00:04.0: [8086:100e] type 00 class 0x020000
> > > ...
>
> > > @@ -793,6 +804,14 @@ static void hotplug_event(u32 type, struct acpiphp_context *context)
> > >
> > >         pci_lock_rescan_remove();
> > >
> > > +       pr_info("pci %04x:%02x:%02x Slot(%s) %s\n",
> > > +               pci_domain_nr(slot->bus), slot->bus->number,
> > > +               slot->device, slot_name(slot->slot),
> > > +               type == ACPI_NOTIFY_BUS_CHECK ? "Bus Check" :
> > > +               type == ACPI_NOTIFY_DEVICE_CHECK ? "Device Check" :
> > > +               type == ACPI_NOTIFY_EJECT_REQUEST ? "Eject Request" :
> > > +               "Notification");
> >
> > pr_debug() perhaps?
> >
> > On systems that don't have any hotplug problems these messages will
> > just be filling the kernel log unnecessarily.
>
> If these notifications are really common, pr_debug() sounds like the
> right thing.  I assumed that they would not be common, e.g., they
> would happen for user-time things like dock/undock, plug/unplug,
> suspend/resume, etc.
>
> In pciehp, we use _info for attention button presses, presence detect
> changes, link up/down, and I assumed the ACPI notify events would
> roughly correspond to those.  No?

Depending on how often the system gets suspended and resumed, they may
end up in the log quite often and if there are no problems related to
them, they are just noise.

IMHO in that case the users are taught to ignore stuff that lands in
the log which is not fantastic.
Igor Mammedov Aug. 9, 2023, 9:21 a.m. UTC | #4
On Tue,  8 Aug 2023 14:27:13 -0500
Bjorn Helgaas <helgaas@kernel.org> wrote:

> From: Bjorn Helgaas <bhelgaas@google.com>
> 
> When registering an acpiphp slot, log the slot name in the same style as
> pciehp and include the PCI bus/device and whether a device is present or
> the slot is empty.
> 
> When handling an ACPI notification, log the PCI bus/device and notification
> type.
> 
> Sample dmesg log diff:
> 
>     ACPI: PCI Root Bridge [PCI0] (domain 0000 [bus 00-ff])
>   - acpiphp: Slot [3] registered
>   - acpiphp: Slot [4] registered
>     PCI host bridge to bus 0000:00
>     pci 0000:00:03.0: [8086:100e] type 00 class 0x020000
>     <ACPI Device Check notification>

Having ACPI node name/path here that received notification would be helpfull  

>     pci 0000:00:04.0: [8086:100e] type 00 class 0x020000
> 
>     ACPI: PCI Root Bridge [PCI0] (domain 0000 [bus 00-ff])
>   + acpiphp: pci 0000:00:03 Slot(3) registered (enabled)
>   + acpiphp: pci 0000:00:04 Slot(4) registered (empty)
>     PCI host bridge to bus 0000:00
>     pci 0000:00:03.0: [8086:100e] type 00 class 0x020000
>     <ACPI Device Check notification>
>   + acpiphp: pci 0000:00:04 Slot(4) Device Check
>     pci 0000:00:04.0: [8086:100e] type 00 class 0x020000
> 
> Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
> ---
>  drivers/pci/hotplug/acpiphp_core.c |  4 ----
>  drivers/pci/hotplug/acpiphp_glue.c | 23 +++++++++++++++++++++--
>  2 files changed, 21 insertions(+), 6 deletions(-)
> 
> diff --git a/drivers/pci/hotplug/acpiphp_core.c b/drivers/pci/hotplug/acpiphp_core.c
> index c02257f4b61c..19d47607d009 100644
> --- a/drivers/pci/hotplug/acpiphp_core.c
> +++ b/drivers/pci/hotplug/acpiphp_core.c
> @@ -282,8 +282,6 @@ int acpiphp_register_hotplug_slot(struct acpiphp_slot *acpiphp_slot,
>  		goto error_slot;
>  	}
>  
> -	pr_info("Slot [%s] registered\n", slot_name(slot));
> -
>  	return 0;
>  error_slot:
>  	kfree(slot);
> @@ -296,8 +294,6 @@ void acpiphp_unregister_hotplug_slot(struct acpiphp_slot *acpiphp_slot)
>  {
>  	struct slot *slot = acpiphp_slot->slot;
>  
> -	pr_info("Slot [%s] unregistered\n", slot_name(slot));
> -
>  	pci_hp_deregister(&slot->hotplug_slot);
>  	kfree(slot);
>  }
> diff --git a/drivers/pci/hotplug/acpiphp_glue.c b/drivers/pci/hotplug/acpiphp_glue.c
> index 328d1e416014..eeca2753a5c7 100644
> --- a/drivers/pci/hotplug/acpiphp_glue.c
> +++ b/drivers/pci/hotplug/acpiphp_glue.c
> @@ -25,7 +25,7 @@
>   *    bus. It loses the refcount when the driver unloads.
>   */
>  
> -#define pr_fmt(fmt) "acpiphp_glue: " fmt
> +#define pr_fmt(fmt) "acpiphp: " fmt
>  
>  #include <linux/module.h>
>  
> @@ -333,6 +333,12 @@ static acpi_status acpiphp_add_context(acpi_handle handle, u32 lvl, void *data,
>  				       &val, 60*1000))
>  		slot->flags |= SLOT_ENABLED;
>  
> +	if (slot->slot)
> +		pr_info("pci %04x:%02x:%02x Slot(%s) registered (%s)\n",
> +			pci_domain_nr(slot->bus), slot->bus->number,
> +			slot->device, slot_name(slot->slot),
> +			slot->flags & SLOT_ENABLED ? "enabled" : "empty");
> +
>  	return AE_OK;
>  }
>  
> @@ -351,8 +357,13 @@ static void cleanup_bridge(struct acpiphp_bridge *bridge)
>  			acpi_unlock_hp_context();
>  		}
>  		slot->flags |= SLOT_IS_GOING_AWAY;
> -		if (slot->slot)
> +		if (slot->slot) {
> +			pr_info("pci %04x:%02x:%02x Slot(%s) unregistered\n",
> +				pci_domain_nr(slot->bus), slot->bus->number,
> +				slot->device, slot_name(slot->slot));
> +
>  			acpiphp_unregister_hotplug_slot(slot);
> +		}
>  	}
>  
>  	mutex_lock(&bridge_mutex);
> @@ -793,6 +804,14 @@ static void hotplug_event(u32 type, struct acpiphp_context *context)
>  
>  	pci_lock_rescan_remove();
>  
> +	pr_info("pci %04x:%02x:%02x Slot(%s) %s\n",
> +		pci_domain_nr(slot->bus), slot->bus->number,
> +		slot->device, slot_name(slot->slot),
I had similar issue with logging patches that I've asked Woody to run.

it crashes here with buscheck on non existing device

Call Trace:
 <TASK>
 ? __die_body+0x19/0x60
 ? page_fault_oops+0x158/0x430
 ? fixup_exception+0x21/0x330
 ? exc_page_fault+0x6b/0x150
 ? asm_exc_page_fault+0x26/0x30
 ? acpiphp_hotplug_notify+0x13d/0x2f0
 ? __pfx_acpiphp_hotplug_notify+0x10/0x10
 acpi_device_hotplug+0xc2/0x4f0
 acpi_hotplug_work_fn+0x19/0x30
 process_one_work+0x1f8/0x3e0
 worker_thread+0x29/0x3b0
 ? __pfx_worker_thread+0x10/0x10
 kthread+0xf2/0x120
 ? __pfx_kthread+0x10/0x10
 ret_from_fork+0x2f/0x40
 ? __pfx_kthread+0x10/0x10
 ret_from_fork_asm+0

(gdb) p *slot
$2 = {node = {next = 0xffff8881003287c0, prev = 0xffff888100887730}, bus = 0xffff8881003de800,
 funcs = {next = 0xffff8881008877b0, prev = 0xffff888100887a50}, slot = 0x0 <fixed_percpu_data>,
                                                                 ^^^ likely culprit
 device = 0x0, flags = 0x1}


reproducer hack: https://gitlab.com/imammedo/qemu/-/tree/acpiphp_buscheck_on_missing_device?ref_type=heads

 ./qemu-system-x86_64 -M q35 -cpu host -smp 4 -enable-kvm  -m 6G  -kernel ~/builds/linux-2.6/arch/x86/boot/bzImage -append 'nokaslr root=/dev/sda1 console=ttyS0' -snapshot rhel9.img  -device pcie-root-port,id=rp1,bus=pcie.0,chassis=0,addr=8 -monitor stdio -serial file:/tmp/s -s

then once booted to trigger buscheck issue at monitor prompt:
(qemu) device_add e1000e,bus=rp1

> +		type == ACPI_NOTIFY_BUS_CHECK ? "Bus Check" :
> +		type == ACPI_NOTIFY_DEVICE_CHECK ? "Device Check" :
> +		type == ACPI_NOTIFY_EJECT_REQUEST ? "Eject Request" :
> +		"Notification");
> +
>  	switch (type) {
>  	case ACPI_NOTIFY_BUS_CHECK:
>  		/* bus re-enumerate */
diff mbox series

Patch

diff --git a/drivers/pci/hotplug/acpiphp_core.c b/drivers/pci/hotplug/acpiphp_core.c
index c02257f4b61c..19d47607d009 100644
--- a/drivers/pci/hotplug/acpiphp_core.c
+++ b/drivers/pci/hotplug/acpiphp_core.c
@@ -282,8 +282,6 @@  int acpiphp_register_hotplug_slot(struct acpiphp_slot *acpiphp_slot,
 		goto error_slot;
 	}
 
-	pr_info("Slot [%s] registered\n", slot_name(slot));
-
 	return 0;
 error_slot:
 	kfree(slot);
@@ -296,8 +294,6 @@  void acpiphp_unregister_hotplug_slot(struct acpiphp_slot *acpiphp_slot)
 {
 	struct slot *slot = acpiphp_slot->slot;
 
-	pr_info("Slot [%s] unregistered\n", slot_name(slot));
-
 	pci_hp_deregister(&slot->hotplug_slot);
 	kfree(slot);
 }
diff --git a/drivers/pci/hotplug/acpiphp_glue.c b/drivers/pci/hotplug/acpiphp_glue.c
index 328d1e416014..eeca2753a5c7 100644
--- a/drivers/pci/hotplug/acpiphp_glue.c
+++ b/drivers/pci/hotplug/acpiphp_glue.c
@@ -25,7 +25,7 @@ 
  *    bus. It loses the refcount when the driver unloads.
  */
 
-#define pr_fmt(fmt) "acpiphp_glue: " fmt
+#define pr_fmt(fmt) "acpiphp: " fmt
 
 #include <linux/module.h>
 
@@ -333,6 +333,12 @@  static acpi_status acpiphp_add_context(acpi_handle handle, u32 lvl, void *data,
 				       &val, 60*1000))
 		slot->flags |= SLOT_ENABLED;
 
+	if (slot->slot)
+		pr_info("pci %04x:%02x:%02x Slot(%s) registered (%s)\n",
+			pci_domain_nr(slot->bus), slot->bus->number,
+			slot->device, slot_name(slot->slot),
+			slot->flags & SLOT_ENABLED ? "enabled" : "empty");
+
 	return AE_OK;
 }
 
@@ -351,8 +357,13 @@  static void cleanup_bridge(struct acpiphp_bridge *bridge)
 			acpi_unlock_hp_context();
 		}
 		slot->flags |= SLOT_IS_GOING_AWAY;
-		if (slot->slot)
+		if (slot->slot) {
+			pr_info("pci %04x:%02x:%02x Slot(%s) unregistered\n",
+				pci_domain_nr(slot->bus), slot->bus->number,
+				slot->device, slot_name(slot->slot));
+
 			acpiphp_unregister_hotplug_slot(slot);
+		}
 	}
 
 	mutex_lock(&bridge_mutex);
@@ -793,6 +804,14 @@  static void hotplug_event(u32 type, struct acpiphp_context *context)
 
 	pci_lock_rescan_remove();
 
+	pr_info("pci %04x:%02x:%02x Slot(%s) %s\n",
+		pci_domain_nr(slot->bus), slot->bus->number,
+		slot->device, slot_name(slot->slot),
+		type == ACPI_NOTIFY_BUS_CHECK ? "Bus Check" :
+		type == ACPI_NOTIFY_DEVICE_CHECK ? "Device Check" :
+		type == ACPI_NOTIFY_EJECT_REQUEST ? "Eject Request" :
+		"Notification");
+
 	switch (type) {
 	case ACPI_NOTIFY_BUS_CHECK:
 		/* bus re-enumerate */