diff mbox series

[v8,2/2,RESEND] mwifiex: fix sleep in atomic context bugs caused by dev_coredumpv

Message ID 5cfa5c473ff6d069cb67760ffa04a2f84ef450a8.1661252818.git.duoming@zju.edu.cn (mailing list archive)
State Accepted
Commit 551e4745c7f218da7070b36a06318592913676ff
Delegated to: Kalle Valo
Headers show
Series Add new APIs of devcoredump and fix bugs | expand

Commit Message

Duoming Zhou Aug. 23, 2022, 11:21 a.m. UTC
There are sleep in atomic context bugs when uploading device dump
data in mwifiex. The root cause is that dev_coredumpv could not
be used in atomic contexts, because it calls dev_set_name which
include operations that may sleep. The call tree shows execution
paths that could lead to bugs:

   (Interrupt context)
fw_dump_timer_fn
  mwifiex_upload_device_dump
    dev_coredumpv(..., GFP_KERNEL)
      dev_coredumpm()
        kzalloc(sizeof(*devcd), gfp); //may sleep
        dev_set_name
          kobject_set_name_vargs
            kvasprintf_const(GFP_KERNEL, ...); //may sleep
            kstrdup(s, GFP_KERNEL); //may sleep

The corresponding fail log is shown below:

[  135.275938] usb 1-1: == mwifiex dump information to /sys/class/devcoredump start
[  135.281029] BUG: sleeping function called from invalid context at include/linux/sched/mm.h:265
...
[  135.293613] Call Trace:
[  135.293613]  <IRQ>
[  135.293613]  dump_stack_lvl+0x57/0x7d
[  135.293613]  __might_resched.cold+0x138/0x173
[  135.293613]  ? dev_coredumpm+0xca/0x2e0
[  135.293613]  kmem_cache_alloc_trace+0x189/0x1f0
[  135.293613]  ? devcd_match_failing+0x30/0x30
[  135.293613]  dev_coredumpm+0xca/0x2e0
[  135.293613]  ? devcd_freev+0x10/0x10
[  135.293613]  dev_coredumpv+0x1c/0x20
[  135.293613]  ? devcd_match_failing+0x30/0x30
[  135.293613]  mwifiex_upload_device_dump+0x65/0xb0
[  135.293613]  ? mwifiex_dnld_fw+0x1b0/0x1b0
[  135.293613]  call_timer_fn+0x122/0x3d0
[  135.293613]  ? msleep_interruptible+0xb0/0xb0
[  135.293613]  ? lock_downgrade+0x3c0/0x3c0
[  135.293613]  ? __next_timer_interrupt+0x13c/0x160
[  135.293613]  ? lockdep_hardirqs_on_prepare+0xe/0x220
[  135.293613]  ? mwifiex_dnld_fw+0x1b0/0x1b0
[  135.293613]  __run_timers.part.0+0x3f8/0x540
[  135.293613]  ? call_timer_fn+0x3d0/0x3d0
[  135.293613]  ? arch_restore_msi_irqs+0x10/0x10
[  135.293613]  ? lapic_next_event+0x31/0x40
[  135.293613]  run_timer_softirq+0x4f/0xb0
[  135.293613]  __do_softirq+0x1c2/0x651
...
[  135.293613] RIP: 0010:default_idle+0xb/0x10
[  135.293613] RSP: 0018:ffff888006317e68 EFLAGS: 00000246
[  135.293613] RAX: ffffffff82ad8d10 RBX: ffff888006301cc0 RCX: ffffffff82ac90e1
[  135.293613] RDX: ffffed100d9ff1b4 RSI: ffffffff831ad140 RDI: ffffffff82ad8f20
[  135.293613] RBP: 0000000000000003 R08: 0000000000000000 R09: ffff88806cff8d9b
[  135.293613] R10: ffffed100d9ff1b3 R11: 0000000000000001 R12: ffffffff84593410
[  135.293613] R13: 0000000000000000 R14: 0000000000000000 R15: 1ffff11000c62fd2
...
[  135.389205] usb 1-1: == mwifiex dump information to /sys/class/devcoredump end

This patch uses delayed work to replace timer and moves the operations
that may sleep into a delayed work in order to mitigate bugs, it was
tested on Marvell 88W8801 chip whose port is usb and the firmware is
usb8801_uapsta.bin. The following is the result after using delayed
work to replace timer.

[  134.936453] usb 1-1: == mwifiex dump information to /sys/class/devcoredump start
[  135.043344] usb 1-1: == mwifiex dump information to /sys/class/devcoredump end

As we can see, there is no bug now.

Fixes: f5ecd02a8b20 ("mwifiex: device dump support for usb interface")
Signed-off-by: Duoming Zhou <duoming@zju.edu.cn>
Reviewed-by: Brian Norris <briannorris@chromium.org>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
Changes since v6:
  - Use clang-format to adjust the format of code.

 drivers/net/wireless/marvell/mwifiex/init.c      | 9 +++++----
 drivers/net/wireless/marvell/mwifiex/main.h      | 3 ++-
 drivers/net/wireless/marvell/mwifiex/sta_event.c | 6 +++---
 3 files changed, 10 insertions(+), 8 deletions(-)

Comments

Kalle Valo Sept. 22, 2022, 6:08 a.m. UTC | #1
Duoming Zhou <duoming@zju.edu.cn> wrote:

> There are sleep in atomic context bugs when uploading device dump
> data in mwifiex. The root cause is that dev_coredumpv could not
> be used in atomic contexts, because it calls dev_set_name which
> include operations that may sleep. The call tree shows execution
> paths that could lead to bugs:
> 
>    (Interrupt context)
> fw_dump_timer_fn
>   mwifiex_upload_device_dump
>     dev_coredumpv(..., GFP_KERNEL)
>       dev_coredumpm()
>         kzalloc(sizeof(*devcd), gfp); //may sleep
>         dev_set_name
>           kobject_set_name_vargs
>             kvasprintf_const(GFP_KERNEL, ...); //may sleep
>             kstrdup(s, GFP_KERNEL); //may sleep
> 
> The corresponding fail log is shown below:
> 
> [  135.275938] usb 1-1: == mwifiex dump information to /sys/class/devcoredump start
> [  135.281029] BUG: sleeping function called from invalid context at include/linux/sched/mm.h:265
> ...
> [  135.293613] Call Trace:
> [  135.293613]  <IRQ>
> [  135.293613]  dump_stack_lvl+0x57/0x7d
> [  135.293613]  __might_resched.cold+0x138/0x173
> [  135.293613]  ? dev_coredumpm+0xca/0x2e0
> [  135.293613]  kmem_cache_alloc_trace+0x189/0x1f0
> [  135.293613]  ? devcd_match_failing+0x30/0x30
> [  135.293613]  dev_coredumpm+0xca/0x2e0
> [  135.293613]  ? devcd_freev+0x10/0x10
> [  135.293613]  dev_coredumpv+0x1c/0x20
> [  135.293613]  ? devcd_match_failing+0x30/0x30
> [  135.293613]  mwifiex_upload_device_dump+0x65/0xb0
> [  135.293613]  ? mwifiex_dnld_fw+0x1b0/0x1b0
> [  135.293613]  call_timer_fn+0x122/0x3d0
> [  135.293613]  ? msleep_interruptible+0xb0/0xb0
> [  135.293613]  ? lock_downgrade+0x3c0/0x3c0
> [  135.293613]  ? __next_timer_interrupt+0x13c/0x160
> [  135.293613]  ? lockdep_hardirqs_on_prepare+0xe/0x220
> [  135.293613]  ? mwifiex_dnld_fw+0x1b0/0x1b0
> [  135.293613]  __run_timers.part.0+0x3f8/0x540
> [  135.293613]  ? call_timer_fn+0x3d0/0x3d0
> [  135.293613]  ? arch_restore_msi_irqs+0x10/0x10
> [  135.293613]  ? lapic_next_event+0x31/0x40
> [  135.293613]  run_timer_softirq+0x4f/0xb0
> [  135.293613]  __do_softirq+0x1c2/0x651
> ...
> [  135.293613] RIP: 0010:default_idle+0xb/0x10
> [  135.293613] RSP: 0018:ffff888006317e68 EFLAGS: 00000246
> [  135.293613] RAX: ffffffff82ad8d10 RBX: ffff888006301cc0 RCX: ffffffff82ac90e1
> [  135.293613] RDX: ffffed100d9ff1b4 RSI: ffffffff831ad140 RDI: ffffffff82ad8f20
> [  135.293613] RBP: 0000000000000003 R08: 0000000000000000 R09: ffff88806cff8d9b
> [  135.293613] R10: ffffed100d9ff1b3 R11: 0000000000000001 R12: ffffffff84593410
> [  135.293613] R13: 0000000000000000 R14: 0000000000000000 R15: 1ffff11000c62fd2
> ...
> [  135.389205] usb 1-1: == mwifiex dump information to /sys/class/devcoredump end
> 
> This patch uses delayed work to replace timer and moves the operations
> that may sleep into a delayed work in order to mitigate bugs, it was
> tested on Marvell 88W8801 chip whose port is usb and the firmware is
> usb8801_uapsta.bin. The following is the result after using delayed
> work to replace timer.
> 
> [  134.936453] usb 1-1: == mwifiex dump information to /sys/class/devcoredump start
> [  135.043344] usb 1-1: == mwifiex dump information to /sys/class/devcoredump end
> 
> As we can see, there is no bug now.
> 
> Fixes: f5ecd02a8b20 ("mwifiex: device dump support for usb interface")
> Signed-off-by: Duoming Zhou <duoming@zju.edu.cn>
> Reviewed-by: Brian Norris <briannorris@chromium.org>
> Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Patch applied to wireless-next.git, thanks.

551e4745c7f2 mwifiex: fix sleep in atomic context bugs caused by dev_coredumpv
diff mbox series

Patch

diff --git a/drivers/net/wireless/marvell/mwifiex/init.c b/drivers/net/wireless/marvell/mwifiex/init.c
index fc77489cc51..7dddb4b5dea 100644
--- a/drivers/net/wireless/marvell/mwifiex/init.c
+++ b/drivers/net/wireless/marvell/mwifiex/init.c
@@ -51,9 +51,10 @@  static void wakeup_timer_fn(struct timer_list *t)
 		adapter->if_ops.card_reset(adapter);
 }
 
-static void fw_dump_timer_fn(struct timer_list *t)
+static void fw_dump_work(struct work_struct *work)
 {
-	struct mwifiex_adapter *adapter = from_timer(adapter, t, devdump_timer);
+	struct mwifiex_adapter *adapter =
+		container_of(work, struct mwifiex_adapter, devdump_work.work);
 
 	mwifiex_upload_device_dump(adapter);
 }
@@ -309,7 +310,7 @@  static void mwifiex_init_adapter(struct mwifiex_adapter *adapter)
 	adapter->active_scan_triggered = false;
 	timer_setup(&adapter->wakeup_timer, wakeup_timer_fn, 0);
 	adapter->devdump_len = 0;
-	timer_setup(&adapter->devdump_timer, fw_dump_timer_fn, 0);
+	INIT_DELAYED_WORK(&adapter->devdump_work, fw_dump_work);
 }
 
 /*
@@ -388,7 +389,7 @@  static void
 mwifiex_adapter_cleanup(struct mwifiex_adapter *adapter)
 {
 	del_timer(&adapter->wakeup_timer);
-	del_timer_sync(&adapter->devdump_timer);
+	cancel_delayed_work_sync(&adapter->devdump_work);
 	mwifiex_cancel_all_pending_cmd(adapter);
 	wake_up_interruptible(&adapter->cmd_wait_q.wait);
 	wake_up_interruptible(&adapter->hs_activate_wait_q);
diff --git a/drivers/net/wireless/marvell/mwifiex/main.h b/drivers/net/wireless/marvell/mwifiex/main.h
index 87729d251fe..63f861e6b28 100644
--- a/drivers/net/wireless/marvell/mwifiex/main.h
+++ b/drivers/net/wireless/marvell/mwifiex/main.h
@@ -37,6 +37,7 @@ 
 #include <linux/pm_runtime.h>
 #include <linux/slab.h>
 #include <linux/of_irq.h>
+#include <linux/workqueue.h>
 
 #include "decl.h"
 #include "ioctl.h"
@@ -1043,7 +1044,7 @@  struct mwifiex_adapter {
 	/* Device dump data/length */
 	void *devdump_data;
 	int devdump_len;
-	struct timer_list devdump_timer;
+	struct delayed_work devdump_work;
 
 	bool ignore_btcoex_events;
 };
diff --git a/drivers/net/wireless/marvell/mwifiex/sta_event.c b/drivers/net/wireless/marvell/mwifiex/sta_event.c
index b95e90a7d12..e80e372cce8 100644
--- a/drivers/net/wireless/marvell/mwifiex/sta_event.c
+++ b/drivers/net/wireless/marvell/mwifiex/sta_event.c
@@ -611,8 +611,8 @@  mwifiex_fw_dump_info_event(struct mwifiex_private *priv,
 		 * transmission event get lost, in this cornel case,
 		 * user would still get partial of the dump.
 		 */
-		mod_timer(&adapter->devdump_timer,
-			  jiffies + msecs_to_jiffies(MWIFIEX_TIMER_10S));
+		schedule_delayed_work(&adapter->devdump_work,
+				      msecs_to_jiffies(MWIFIEX_TIMER_10S));
 	}
 
 	/* Overflow check */
@@ -631,7 +631,7 @@  mwifiex_fw_dump_info_event(struct mwifiex_private *priv,
 	return;
 
 upload_dump:
-	del_timer_sync(&adapter->devdump_timer);
+	cancel_delayed_work_sync(&adapter->devdump_work);
 	mwifiex_upload_device_dump(adapter);
 }