diff mbox series

[iwl-net,v2] igc: Fix deadlock on module removal

Message ID 20240411-igc_led_deadlock-v2-1-b758c0c88b2b@linutronix.de (mailing list archive)
State Awaiting Upstream
Delegated to: Netdev Maintainers
Headers show
Series [iwl-net,v2] igc: Fix deadlock on module removal | expand

Checks

Context Check Description
netdev/series_format success Single patches do not need cover letters
netdev/tree_selection success Clearly marked for net
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag present in non-next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 926 this patch: 926
netdev/build_tools success No tools touched, skip
netdev/cc_maintainers success CCed 9 of 9 maintainers
netdev/build_clang success Errors and warnings before: 939 this patch: 939
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success Fixes tag looks correct
netdev/build_allmodconfig_warn success Errors and warnings before: 937 this patch: 937
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 85 lines checked
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 29 this patch: 29
netdev/source_inline success Was 0 now: 0
netdev/contest success net-next-2024-04-15--15-00 (tests: 961)

Commit Message

Kurt Kanzenbach April 15, 2024, 10:59 a.m. UTC
From: Lukas Wunner <lukas@wunner.de>

The removal of the igc module leads to a deadlock:

|[Mon Apr  8 17:38:55 2024]  __mutex_lock.constprop.0+0x3e5/0x7a0
|[Mon Apr  8 17:38:55 2024]  ? preempt_count_add+0x85/0xd0
|[Mon Apr  8 17:38:55 2024]  __mutex_lock_slowpath+0x13/0x20
|[Mon Apr  8 17:38:55 2024]  mutex_lock+0x3b/0x50
|[Mon Apr  8 17:38:55 2024]  rtnl_lock+0x19/0x20
|[Mon Apr  8 17:38:55 2024]  unregister_netdevice_notifier+0x2a/0xc0
|[Mon Apr  8 17:38:55 2024]  netdev_trig_deactivate+0x25/0x70
|[Mon Apr  8 17:38:55 2024]  led_trigger_set+0xe2/0x2d0
|[Mon Apr  8 17:38:55 2024]  led_classdev_unregister+0x4f/0x100
|[Mon Apr  8 17:38:55 2024]  devm_led_classdev_release+0x15/0x20
|[Mon Apr  8 17:38:55 2024]  release_nodes+0x47/0xc0
|[Mon Apr  8 17:38:55 2024]  devres_release_all+0x9f/0xe0
|[Mon Apr  8 17:38:55 2024]  device_del+0x272/0x3c0
|[Mon Apr  8 17:38:55 2024]  netdev_unregister_kobject+0x8c/0xa0
|[Mon Apr  8 17:38:55 2024]  unregister_netdevice_many_notify+0x530/0x7c0
|[Mon Apr  8 17:38:55 2024]  unregister_netdevice_queue+0xad/0xf0
|[Mon Apr  8 17:38:55 2024]  unregister_netdev+0x21/0x30
|[Mon Apr  8 17:38:55 2024]  igc_remove+0xfb/0x1f0 [igc]
|[Mon Apr  8 17:38:55 2024]  pci_device_remove+0x42/0xb0
|[Mon Apr  8 17:38:55 2024]  device_remove+0x43/0x70

unregister_netdev() acquires the RNTL lock and releases the LEDs bound
to that netdevice. However, netdev_trig_deactivate() and later
unregister_netdevice_notifier() try to acquire the RTNL lock again.

Avoid this situation by not using the device-managed LED class
functions.

Link: https://lore.kernel.org/r/CAEhC_B=ksywxCG_+aQqXUrGEgKq+4mqnSV8EBHOKbC3-Obj9+Q@mail.gmail.com/
Link: https://lore.kernel.org/r/ZhRD3cOtz5i-61PB@mail-itl/
Reported-by: Roman Lozko <lozko.roma@gmail.com>
Reported-by: "Marek Marczykowski-Górecki" <marmarek@invisiblethingslab.com>
Fixes: ea578703b03d ("igc: Add support for LEDs on i225/i226")
Signed-off-by: Lukas Wunner <lukas@wunner.de>
[Kurt: Wrote commit message and tested on i225]
Signed-off-by: Kurt Kanzenbach <kurt@linutronix.de>
---
Changes in v2:
- Add Lukas' SoB (Lukas)
- Add Reported-by (Lukas)
- Add links to discussions
- Take care of error path (Lukas)
- Remove forward declaration (Lukas)
- Link to v1: https://lore.kernel.org/r/20240411-igc_led_deadlock-v1-1-0da98a3c68c5@linutronix.de
---
 drivers/net/ethernet/intel/igc/igc.h      |  2 ++
 drivers/net/ethernet/intel/igc/igc_leds.c | 38 ++++++++++++++++++++++++-------
 drivers/net/ethernet/intel/igc/igc_main.c |  3 +++
 3 files changed, 35 insertions(+), 8 deletions(-)


---
base-commit: 7efd0a74039fb6b584be2cb91c1d0ef0bd796ee1
change-id: 20240411-igc_led_deadlock-7abd85954f5e

Best regards,

Comments

Simon Horman April 16, 2024, 1:51 p.m. UTC | #1
On Mon, Apr 15, 2024 at 12:59:37PM +0200, Kurt Kanzenbach wrote:
> From: Lukas Wunner <lukas@wunner.de>
> 
> The removal of the igc module leads to a deadlock:
> 
> |[Mon Apr  8 17:38:55 2024]  __mutex_lock.constprop.0+0x3e5/0x7a0
> |[Mon Apr  8 17:38:55 2024]  ? preempt_count_add+0x85/0xd0
> |[Mon Apr  8 17:38:55 2024]  __mutex_lock_slowpath+0x13/0x20
> |[Mon Apr  8 17:38:55 2024]  mutex_lock+0x3b/0x50
> |[Mon Apr  8 17:38:55 2024]  rtnl_lock+0x19/0x20
> |[Mon Apr  8 17:38:55 2024]  unregister_netdevice_notifier+0x2a/0xc0
> |[Mon Apr  8 17:38:55 2024]  netdev_trig_deactivate+0x25/0x70
> |[Mon Apr  8 17:38:55 2024]  led_trigger_set+0xe2/0x2d0
> |[Mon Apr  8 17:38:55 2024]  led_classdev_unregister+0x4f/0x100
> |[Mon Apr  8 17:38:55 2024]  devm_led_classdev_release+0x15/0x20
> |[Mon Apr  8 17:38:55 2024]  release_nodes+0x47/0xc0
> |[Mon Apr  8 17:38:55 2024]  devres_release_all+0x9f/0xe0
> |[Mon Apr  8 17:38:55 2024]  device_del+0x272/0x3c0
> |[Mon Apr  8 17:38:55 2024]  netdev_unregister_kobject+0x8c/0xa0
> |[Mon Apr  8 17:38:55 2024]  unregister_netdevice_many_notify+0x530/0x7c0
> |[Mon Apr  8 17:38:55 2024]  unregister_netdevice_queue+0xad/0xf0
> |[Mon Apr  8 17:38:55 2024]  unregister_netdev+0x21/0x30
> |[Mon Apr  8 17:38:55 2024]  igc_remove+0xfb/0x1f0 [igc]
> |[Mon Apr  8 17:38:55 2024]  pci_device_remove+0x42/0xb0
> |[Mon Apr  8 17:38:55 2024]  device_remove+0x43/0x70
> 
> unregister_netdev() acquires the RNTL lock and releases the LEDs bound
> to that netdevice. However, netdev_trig_deactivate() and later
> unregister_netdevice_notifier() try to acquire the RTNL lock again.
> 
> Avoid this situation by not using the device-managed LED class
> functions.
> 
> Link: https://lore.kernel.org/r/CAEhC_B=ksywxCG_+aQqXUrGEgKq+4mqnSV8EBHOKbC3-Obj9+Q@mail.gmail.com/
> Link: https://lore.kernel.org/r/ZhRD3cOtz5i-61PB@mail-itl/
> Reported-by: Roman Lozko <lozko.roma@gmail.com>
> Reported-by: "Marek Marczykowski-Górecki" <marmarek@invisiblethingslab.com>
> Fixes: ea578703b03d ("igc: Add support for LEDs on i225/i226")
> Signed-off-by: Lukas Wunner <lukas@wunner.de>
> [Kurt: Wrote commit message and tested on i225]
> Signed-off-by: Kurt Kanzenbach <kurt@linutronix.de>

I am aware that this patch seems to have also been submitted by Lucas
himself. I'd like to suggest that we focus on review of that submission.

https://lore.kernel.org/netdev/2f1be6b1cf2b3346929b0049f2ac7d7d79acb5c9.1713188539.git.lukas@wunner.de/
diff mbox series

Patch

diff --git a/drivers/net/ethernet/intel/igc/igc.h b/drivers/net/ethernet/intel/igc/igc.h
index 90316dc58630..6bc56c7c181e 100644
--- a/drivers/net/ethernet/intel/igc/igc.h
+++ b/drivers/net/ethernet/intel/igc/igc.h
@@ -298,6 +298,7 @@  struct igc_adapter {
 
 	/* LEDs */
 	struct mutex led_mutex;
+	struct igc_led_classdev *leds;
 };
 
 void igc_up(struct igc_adapter *adapter);
@@ -723,6 +724,7 @@  void igc_ptp_read(struct igc_adapter *adapter, struct timespec64 *ts);
 void igc_ptp_tx_tstamp_event(struct igc_adapter *adapter);
 
 int igc_led_setup(struct igc_adapter *adapter);
+void igc_led_free(struct igc_adapter *adapter);
 
 #define igc_rx_pg_size(_ring) (PAGE_SIZE << igc_rx_pg_order(_ring))
 
diff --git a/drivers/net/ethernet/intel/igc/igc_leds.c b/drivers/net/ethernet/intel/igc/igc_leds.c
index bf240c5daf86..3929b25b6ae6 100644
--- a/drivers/net/ethernet/intel/igc/igc_leds.c
+++ b/drivers/net/ethernet/intel/igc/igc_leds.c
@@ -236,8 +236,8 @@  static void igc_led_get_name(struct igc_adapter *adapter, int index, char *buf,
 		 pci_dev_id(adapter->pdev), index);
 }
 
-static void igc_setup_ldev(struct igc_led_classdev *ldev,
-			   struct net_device *netdev, int index)
+static int igc_setup_ldev(struct igc_led_classdev *ldev,
+			  struct net_device *netdev, int index)
 {
 	struct igc_adapter *adapter = netdev_priv(netdev);
 	struct led_classdev *led_cdev = &ldev->led;
@@ -257,24 +257,46 @@  static void igc_setup_ldev(struct igc_led_classdev *ldev,
 	led_cdev->hw_control_get = igc_led_hw_control_get;
 	led_cdev->hw_control_get_device = igc_led_hw_control_get_device;
 
-	devm_led_classdev_register(&netdev->dev, led_cdev);
+	return led_classdev_register(&netdev->dev, led_cdev);
 }
 
 int igc_led_setup(struct igc_adapter *adapter)
 {
 	struct net_device *netdev = adapter->netdev;
-	struct device *dev = &netdev->dev;
 	struct igc_led_classdev *leds;
-	int i;
+	int i, err;
 
 	mutex_init(&adapter->led_mutex);
 
-	leds = devm_kcalloc(dev, IGC_NUM_LEDS, sizeof(*leds), GFP_KERNEL);
+	leds = kcalloc(IGC_NUM_LEDS, sizeof(*leds), GFP_KERNEL);
 	if (!leds)
 		return -ENOMEM;
 
-	for (i = 0; i < IGC_NUM_LEDS; i++)
-		igc_setup_ldev(leds + i, netdev, i);
+	for (i = 0; i < IGC_NUM_LEDS; i++) {
+		err = igc_setup_ldev(leds + i, netdev, i);
+		if (err)
+			goto err;
+	}
+
+	adapter->leds = leds;
 
 	return 0;
+
+err:
+	for (i--; i >= 0; i--)
+		led_classdev_unregister(&((leds + i)->led));
+
+	kfree(leds);
+	return err;
+}
+
+void igc_led_free(struct igc_adapter *adapter)
+{
+	struct igc_led_classdev *leds = adapter->leds;
+	int i;
+
+	for (i = 0; i < IGC_NUM_LEDS; i++)
+		led_classdev_unregister(&((leds + i)->led));
+
+	kfree(leds);
 }
diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
index 35ad40a803cb..4d975d620a8e 100644
--- a/drivers/net/ethernet/intel/igc/igc_main.c
+++ b/drivers/net/ethernet/intel/igc/igc_main.c
@@ -7021,6 +7021,9 @@  static void igc_remove(struct pci_dev *pdev)
 	cancel_work_sync(&adapter->watchdog_task);
 	hrtimer_cancel(&adapter->hrtimer);
 
+	if (IS_ENABLED(CONFIG_IGC_LEDS))
+		igc_led_free(adapter);
+
 	/* Release control of h/w to f/w.  If f/w is AMT enabled, this
 	 * would have already happened in close and is redundant.
 	 */