diff mbox series

[1/3] thermal: intel: int340x: Fix kernel warning during MSI cleanup

Message ID 20240723140228.865919-2-srinivas.pandruvada@linux.intel.com (mailing list archive)
State Mainlined, archived
Headers show
Series thermal: intel: int340x: Fix Lunar Lake MSI support | expand

Commit Message

srinivas pandruvada July 23, 2024, 2:02 p.m. UTC
On some pre-production Lunar Lake systems, there is a kernel
warning:

remove_proc_entry: removing non-empty directory 'irq/172'
WARNING: CPU: 0 PID: 501 at fs/proc/generic.c:717
	remove_proc_entry+0x1b4/0x1e0
...
...
remove_proc_entry+0x1b4/0x1e0
report_bug+0x182/0x1b0
handle_bug+0x51/0xa0
exc_invalid_op+0x18/0x80
asm_exc_invalid_op+0x1b/0x20
remove_proc_entry+0x1b4/0x1e0
remove_proc_entry+0x1b4/0x1e0
unregister_irq_proc+0xf2/0x120
free_desc+0x41/0xe0
irq_domain_free_irqs+0x138/0x1c0
irq_free_descs+0x52/0x80
irq_domain_free_irqs+0x151/0x1c0
msi_domain_free_locked.part.0+0x17e/0x1c0
msi_domain_free_irqs_all_locked+0x74/0xc0
pci_msi_teardown_msi_irqs+0x50/0x60
pci_free_msi_irqs+0x12/0x40
pci_free_irq_vectors+0x58/0x70

On these systems, not all the MSI thermal vectors are valid. This causes
devm_request_threaded_irq() to fail for some vectors. As part of the
clean up on this error, pci_free_irq_vectors() is called without calling
devm_free_irq(). This causes the above warning.

Add a function proc_thermal_free_msi() to call devm_free_irq() for all
successfully registered IRQ handlers, then call pci_free_irq_vectors().
Call this function for MSI cleanup.

Fixes: 7a9a8c5faf41 ("thermal: intel: int340x: Support MSI interrupt for Lunar Lake")
Reported-by: Yijun Shen <Yijun.shen@dell.com>
Tested-by: Yijun Shen <Yijun.shen@dell.com>
Signed-off-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
---
 .../processor_thermal_device_pci.c               | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

Comments

Zhang Rui July 30, 2024, 2:24 p.m. UTC | #1
On Tue, 2024-07-23 at 07:02 -0700, Srinivas Pandruvada wrote:
> On some pre-production Lunar Lake systems, there is a kernel
> warning:
> 
> remove_proc_entry: removing non-empty directory 'irq/172'
> WARNING: CPU: 0 PID: 501 at fs/proc/generic.c:717
>         remove_proc_entry+0x1b4/0x1e0
> ...
> ...
> remove_proc_entry+0x1b4/0x1e0
> report_bug+0x182/0x1b0
> handle_bug+0x51/0xa0
> exc_invalid_op+0x18/0x80
> asm_exc_invalid_op+0x1b/0x20
> remove_proc_entry+0x1b4/0x1e0
> remove_proc_entry+0x1b4/0x1e0
> unregister_irq_proc+0xf2/0x120
> free_desc+0x41/0xe0
> irq_domain_free_irqs+0x138/0x1c0
> irq_free_descs+0x52/0x80
> irq_domain_free_irqs+0x151/0x1c0
> msi_domain_free_locked.part.0+0x17e/0x1c0
> msi_domain_free_irqs_all_locked+0x74/0xc0
> pci_msi_teardown_msi_irqs+0x50/0x60
> pci_free_msi_irqs+0x12/0x40
> pci_free_irq_vectors+0x58/0x70
> 
> On these systems, not all the MSI thermal vectors are valid. This
> causes
> devm_request_threaded_irq() to fail for some vectors. As part of the
> clean up on this error, pci_free_irq_vectors() is called without
> calling
> devm_free_irq(). This causes the above warning.
> 
> Add a function proc_thermal_free_msi() to call devm_free_irq() for
> all
> successfully registered IRQ handlers, then call
> pci_free_irq_vectors().
> Call this function for MSI cleanup.
> 
> Fixes: 7a9a8c5faf41 ("thermal: intel: int340x: Support MSI interrupt
> for Lunar Lake")
> Reported-by: Yijun Shen <Yijun.shen@dell.com>
> Tested-by: Yijun Shen <Yijun.shen@dell.com>
> Signed-off-by: Srinivas Pandruvada
> <srinivas.pandruvada@linux.intel.com>

The whole patch set looks good to me.

Reviewed-by: Zhang Rui <rui.zhang@intel.com>

thanks,
rui
> ---
>  .../processor_thermal_device_pci.c               | 16
> ++++++++++++++--
>  1 file changed, 14 insertions(+), 2 deletions(-)
> 
> diff --git
> a/drivers/thermal/intel/int340x_thermal/processor_thermal_device_pci.
> c
> b/drivers/thermal/intel/int340x_thermal/processor_thermal_device_pci.
> c
> index 114136893a59..2c9c45eb5b4a 100644
> ---
> a/drivers/thermal/intel/int340x_thermal/processor_thermal_device_pci.
> c
> +++
> b/drivers/thermal/intel/int340x_thermal/processor_thermal_device_pci.
> c
> @@ -278,6 +278,18 @@ static struct thermal_zone_params tzone_params =
> {
>  
>  static bool msi_irq;
>  
> +static void proc_thermal_free_msi(struct pci_dev *pdev, struct
> proc_thermal_pci *pci_info)
> +{
> +       int i;
> +
> +       for (i = 0; i < MSI_THERMAL_MAX; i++) {
> +               if (proc_thermal_msi_map[i])
> +                       devm_free_irq(&pdev->dev,
> proc_thermal_msi_map[i], pci_info);
> +       }
> +
> +       pci_free_irq_vectors(pdev);
> +}
> +
>  static int proc_thermal_setup_msi(struct pci_dev *pdev, struct
> proc_thermal_pci *pci_info)
>  {
>         int ret, i, irq;
> @@ -310,7 +322,7 @@ static int proc_thermal_setup_msi(struct pci_dev
> *pdev, struct proc_thermal_pci
>         return 0;
>  
>  err_free_msi_vectors:
> -       pci_free_irq_vectors(pdev);
> +       proc_thermal_free_msi(pdev, pci_info);
>  
>         return ret;
>  }
> @@ -397,7 +409,7 @@ static int proc_thermal_pci_probe(struct pci_dev
> *pdev, const struct pci_device_
>  
>  err_free_vectors:
>         if (msi_irq)
> -               pci_free_irq_vectors(pdev);
> +               proc_thermal_free_msi(pdev, pci_info);
>  err_ret_tzone:
>         thermal_zone_device_unregister(pci_info->tzone);
>  err_del_legacy:
diff mbox series

Patch

diff --git a/drivers/thermal/intel/int340x_thermal/processor_thermal_device_pci.c b/drivers/thermal/intel/int340x_thermal/processor_thermal_device_pci.c
index 114136893a59..2c9c45eb5b4a 100644
--- a/drivers/thermal/intel/int340x_thermal/processor_thermal_device_pci.c
+++ b/drivers/thermal/intel/int340x_thermal/processor_thermal_device_pci.c
@@ -278,6 +278,18 @@  static struct thermal_zone_params tzone_params = {
 
 static bool msi_irq;
 
+static void proc_thermal_free_msi(struct pci_dev *pdev, struct proc_thermal_pci *pci_info)
+{
+	int i;
+
+	for (i = 0; i < MSI_THERMAL_MAX; i++) {
+		if (proc_thermal_msi_map[i])
+			devm_free_irq(&pdev->dev, proc_thermal_msi_map[i], pci_info);
+	}
+
+	pci_free_irq_vectors(pdev);
+}
+
 static int proc_thermal_setup_msi(struct pci_dev *pdev, struct proc_thermal_pci *pci_info)
 {
 	int ret, i, irq;
@@ -310,7 +322,7 @@  static int proc_thermal_setup_msi(struct pci_dev *pdev, struct proc_thermal_pci
 	return 0;
 
 err_free_msi_vectors:
-	pci_free_irq_vectors(pdev);
+	proc_thermal_free_msi(pdev, pci_info);
 
 	return ret;
 }
@@ -397,7 +409,7 @@  static int proc_thermal_pci_probe(struct pci_dev *pdev, const struct pci_device_
 
 err_free_vectors:
 	if (msi_irq)
-		pci_free_irq_vectors(pdev);
+		proc_thermal_free_msi(pdev, pci_info);
 err_ret_tzone:
 	thermal_zone_device_unregister(pci_info->tzone);
 err_del_legacy: