diff mbox series

[net,v1] mlxbf_gige: open() should call request_irq() after NAPI init

Message ID 20240315145609.23950-1-davthompson@nvidia.com (mailing list archive)
State Changes Requested
Delegated to: Netdev Maintainers
Headers show
Series [net,v1] mlxbf_gige: open() should call request_irq() after NAPI init | expand

Checks

Context Check Description
netdev/series_format success Single patches do not need cover letters
netdev/tree_selection success Clearly marked for net
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag present in non-next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 939 this patch: 939
netdev/build_tools success No tools touched, skip
netdev/cc_maintainers success CCed 6 of 7 maintainers
netdev/build_clang success Errors and warnings before: 956 this patch: 956
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success Fixes tag looks correct
netdev/build_allmodconfig_warn success Errors and warnings before: 956 this patch: 956
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 51 lines checked
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0
netdev/contest success net-next-2024-03-15--18-00 (tests: 905)

Commit Message

David Thompson March 15, 2024, 2:56 p.m. UTC
This patch fixes an exception that occurs during open()
when kdump is enabled.  The sequence to reproduce the
exception is as follows:
a) enable kdump
b) trigger kdump via "echo c > /proc/sysrq-trigger"
c) kdump kernel executes
d) kdump kernel loads mlxbf_gige module
e) the mlxbf_gige module runs its open() as the
   the "oob_net0" interface is brought up
f) mlxbf_gige module will experience an exception
   during its open(), something like:

     Unable to handle kernel NULL pointer dereference at virtual address 0000000000000000
     Mem abort info:
       ESR = 0x0000000086000004
       EC = 0x21: IABT (current EL), IL = 32 bits
       SET = 0, FnV = 0
       EA = 0, S1PTW = 0
       FSC = 0x04: level 0 translation fault
     user pgtable: 4k pages, 48-bit VAs, pgdp=00000000e29a4000
     [0000000000000000] pgd=0000000000000000, p4d=0000000000000000
     Internal error: Oops: 0000000086000004 [#1] SMP
     CPU: 0 PID: 812 Comm: NetworkManager Tainted: G           OE     5.15.0-1035-bluefield #37-Ubuntu
     Hardware name: https://www.mellanox.com BlueField-3 SmartNIC Main Card/BlueField-3 SmartNIC Main Card, BIOS 4.6.0.13024 Jan 19 2024
     pstate: 80400009 (Nzcv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--)
     pc : 0x0
     lr : __napi_poll+0x40/0x230
     sp : ffff800008003e00
     x29: ffff800008003e00 x28: 0000000000000000 x27: 00000000ffffffff
     x26: ffff000066027238 x25: ffff00007cedec00 x24: ffff800008003ec8
     x23: 000000000000012c x22: ffff800008003eb7 x21: 0000000000000000
     x20: 0000000000000001 x19: ffff000066027238 x18: 0000000000000000
     x17: ffff578fcb450000 x16: ffffa870b083c7c0 x15: 0000aaab010441d0
     x14: 0000000000000001 x13: 00726f7272655f65 x12: 6769675f6662786c
     x11: 0000000000000000 x10: 0000000000000000 x9 : ffffa870b0842398
     x8 : 0000000000000004 x7 : fe5a48b9069706ea x6 : 17fdb11fc84ae0d2
     x5 : d94a82549d594f35 x4 : 0000000000000000 x3 : 0000000000400100
     x2 : 0000000000000000 x1 : 0000000000000000 x0 : ffff000066027238
     Call trace:
      0x0
      net_rx_action+0x178/0x360
      __do_softirq+0x15c/0x428
      __irq_exit_rcu+0xac/0xec
      irq_exit+0x18/0x2c
      handle_domain_irq+0x6c/0xa0
      gic_handle_irq+0xec/0x1b0
      call_on_irq_stack+0x20/0x2c
      do_interrupt_handler+0x5c/0x70
      el1_interrupt+0x30/0x50
      el1h_64_irq_handler+0x18/0x2c
      el1h_64_irq+0x7c/0x80
      __setup_irq+0x4c0/0x950
      request_threaded_irq+0xf4/0x1bc
      mlxbf_gige_request_irqs+0x68/0x110 [mlxbf_gige]
      mlxbf_gige_open+0x5c/0x170 [mlxbf_gige]
      __dev_open+0x100/0x220
      __dev_change_flags+0x16c/0x1f0
      dev_change_flags+0x2c/0x70
      do_setlink+0x220/0xa40
      __rtnl_newlink+0x56c/0x8a0
      rtnl_newlink+0x58/0x84
      rtnetlink_rcv_msg+0x138/0x3c4
      netlink_rcv_skb+0x64/0x130
      rtnetlink_rcv+0x20/0x30
      netlink_unicast+0x2ec/0x360
      netlink_sendmsg+0x278/0x490
      __sock_sendmsg+0x5c/0x6c
      ____sys_sendmsg+0x290/0x2d4
      ___sys_sendmsg+0x84/0xd0
      __sys_sendmsg+0x70/0xd0
      __arm64_sys_sendmsg+0x2c/0x40
      invoke_syscall+0x78/0x100
      el0_svc_common.constprop.0+0x54/0x184
      do_el0_svc+0x30/0xac
      el0_svc+0x48/0x160
      el0t_64_sync_handler+0xa4/0x12c
      el0t_64_sync+0x1a4/0x1a8
     Code: bad PC value
     ---[ end trace 7d1c3f3bf9d81885 ]---
     Kernel panic - not syncing: Oops: Fatal exception in interrupt
     Kernel Offset: 0x2870a7a00000 from 0xffff800008000000
     PHYS_OFFSET: 0x80000000
     CPU features: 0x0,000005c1,a3332a5a
     Memory Limit: none
     ---[ end Kernel panic - not syncing: Oops: Fatal exception in interrupt ]---

The exception happens because there is a pending RX interrupt before the
call to request_irq(RX IRQ) executes.  Then, the RX IRQ handler fires
immediately after this request_irq() completes. The RX IRQ handler runs
"napi_schedule()" before NAPI is fully initialized via "netif_napi_add()"
and "napi_enable()", both which happen later in the open() logic.

This patch fixes the issue by re-ordering the logic in mlxbf_gige open()
so that the request_irq() calls execute after NAPI is fully initialized.

Fixes: f92e1869d74e ("Add Mellanox BlueField Gigabit Ethernet driver")
Signed-off-by: David Thompson <davthompson@nvidia.com>
Reviewed-by: Asmaa Mnebhi <asmaa@nvidia.com>
---
 .../mellanox/mlxbf_gige/mlxbf_gige_main.c     | 21 ++++++++++++-------
 1 file changed, 14 insertions(+), 7 deletions(-)

Comments

Jiri Pirko March 18, 2024, 11:40 a.m. UTC | #1
Fri, Mar 15, 2024 at 03:56:09PM CET, davthompson@nvidia.com wrote:
>This patch fixes an exception that occurs during open()

Don't talk about "this patch" in the patch description. Be imperative to
the codebase, tell it what to do. In both patch description and subject.
Much easier to follow then.


>when kdump is enabled.  The sequence to reproduce the
>exception is as follows:
>a) enable kdump
>b) trigger kdump via "echo c > /proc/sysrq-trigger"
>c) kdump kernel executes
>d) kdump kernel loads mlxbf_gige module
>e) the mlxbf_gige module runs its open() as the
>   the "oob_net0" interface is brought up
>f) mlxbf_gige module will experience an exception
>   during its open(), something like:
>
>     Unable to handle kernel NULL pointer dereference at virtual address 0000000000000000
>     Mem abort info:
>       ESR = 0x0000000086000004
>       EC = 0x21: IABT (current EL), IL = 32 bits
>       SET = 0, FnV = 0
>       EA = 0, S1PTW = 0
>       FSC = 0x04: level 0 translation fault
>     user pgtable: 4k pages, 48-bit VAs, pgdp=00000000e29a4000
>     [0000000000000000] pgd=0000000000000000, p4d=0000000000000000
>     Internal error: Oops: 0000000086000004 [#1] SMP
>     CPU: 0 PID: 812 Comm: NetworkManager Tainted: G           OE     5.15.0-1035-bluefield #37-Ubuntu
>     Hardware name: https://www.mellanox.com BlueField-3 SmartNIC Main Card/BlueField-3 SmartNIC Main Card, BIOS 4.6.0.13024 Jan 19 2024
>     pstate: 80400009 (Nzcv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--)
>     pc : 0x0
>     lr : __napi_poll+0x40/0x230
>     sp : ffff800008003e00
>     x29: ffff800008003e00 x28: 0000000000000000 x27: 00000000ffffffff
>     x26: ffff000066027238 x25: ffff00007cedec00 x24: ffff800008003ec8
>     x23: 000000000000012c x22: ffff800008003eb7 x21: 0000000000000000
>     x20: 0000000000000001 x19: ffff000066027238 x18: 0000000000000000
>     x17: ffff578fcb450000 x16: ffffa870b083c7c0 x15: 0000aaab010441d0
>     x14: 0000000000000001 x13: 00726f7272655f65 x12: 6769675f6662786c
>     x11: 0000000000000000 x10: 0000000000000000 x9 : ffffa870b0842398
>     x8 : 0000000000000004 x7 : fe5a48b9069706ea x6 : 17fdb11fc84ae0d2
>     x5 : d94a82549d594f35 x4 : 0000000000000000 x3 : 0000000000400100
>     x2 : 0000000000000000 x1 : 0000000000000000 x0 : ffff000066027238
>     Call trace:
>      0x0
>      net_rx_action+0x178/0x360
>      __do_softirq+0x15c/0x428
>      __irq_exit_rcu+0xac/0xec
>      irq_exit+0x18/0x2c
>      handle_domain_irq+0x6c/0xa0
>      gic_handle_irq+0xec/0x1b0
>      call_on_irq_stack+0x20/0x2c
>      do_interrupt_handler+0x5c/0x70
>      el1_interrupt+0x30/0x50
>      el1h_64_irq_handler+0x18/0x2c
>      el1h_64_irq+0x7c/0x80
>      __setup_irq+0x4c0/0x950
>      request_threaded_irq+0xf4/0x1bc
>      mlxbf_gige_request_irqs+0x68/0x110 [mlxbf_gige]
>      mlxbf_gige_open+0x5c/0x170 [mlxbf_gige]
>      __dev_open+0x100/0x220
>      __dev_change_flags+0x16c/0x1f0
>      dev_change_flags+0x2c/0x70
>      do_setlink+0x220/0xa40
>      __rtnl_newlink+0x56c/0x8a0
>      rtnl_newlink+0x58/0x84
>      rtnetlink_rcv_msg+0x138/0x3c4
>      netlink_rcv_skb+0x64/0x130
>      rtnetlink_rcv+0x20/0x30
>      netlink_unicast+0x2ec/0x360
>      netlink_sendmsg+0x278/0x490
>      __sock_sendmsg+0x5c/0x6c
>      ____sys_sendmsg+0x290/0x2d4
>      ___sys_sendmsg+0x84/0xd0
>      __sys_sendmsg+0x70/0xd0
>      __arm64_sys_sendmsg+0x2c/0x40
>      invoke_syscall+0x78/0x100
>      el0_svc_common.constprop.0+0x54/0x184
>      do_el0_svc+0x30/0xac
>      el0_svc+0x48/0x160
>      el0t_64_sync_handler+0xa4/0x12c
>      el0t_64_sync+0x1a4/0x1a8
>     Code: bad PC value
>     ---[ end trace 7d1c3f3bf9d81885 ]---
>     Kernel panic - not syncing: Oops: Fatal exception in interrupt
>     Kernel Offset: 0x2870a7a00000 from 0xffff800008000000
>     PHYS_OFFSET: 0x80000000
>     CPU features: 0x0,000005c1,a3332a5a
>     Memory Limit: none
>     ---[ end Kernel panic - not syncing: Oops: Fatal exception in interrupt ]---
>
>The exception happens because there is a pending RX interrupt before the
>call to request_irq(RX IRQ) executes.  Then, the RX IRQ handler fires
>immediately after this request_irq() completes. The RX IRQ handler runs
>"napi_schedule()" before NAPI is fully initialized via "netif_napi_add()"
>and "napi_enable()", both which happen later in the open() logic.
>
>This patch fixes the issue by re-ordering the logic in mlxbf_gige open()
>so that the request_irq() calls execute after NAPI is fully initialized.

Same here.

>
>Fixes: f92e1869d74e ("Add Mellanox BlueField Gigabit Ethernet driver")
>Signed-off-by: David Thompson <davthompson@nvidia.com>
>Reviewed-by: Asmaa Mnebhi <asmaa@nvidia.com>


The patch itself looks okay.


pw-bot: cr

>---
> .../mellanox/mlxbf_gige/mlxbf_gige_main.c     | 21 ++++++++++++-------
> 1 file changed, 14 insertions(+), 7 deletions(-)
>
>diff --git a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c
>index 3d09fa54598f..77134ca92938 100644
>--- a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c
>+++ b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c
>@@ -139,13 +139,10 @@ static int mlxbf_gige_open(struct net_device *netdev)
> 	control |= MLXBF_GIGE_CONTROL_PORT_EN;
> 	writeq(control, priv->base + MLXBF_GIGE_CONTROL);
> 
>-	err = mlxbf_gige_request_irqs(priv);
>-	if (err)
>-		return err;
> 	mlxbf_gige_cache_stats(priv);
> 	err = mlxbf_gige_clean_port(priv);
> 	if (err)
>-		goto free_irqs;
>+		return err;
> 
> 	/* Clear driver's valid_polarity to match hardware,
> 	 * since the above call to clean_port() resets the
>@@ -157,7 +154,7 @@ static int mlxbf_gige_open(struct net_device *netdev)
> 
> 	err = mlxbf_gige_tx_init(priv);
> 	if (err)
>-		goto free_irqs;
>+		goto phy_deinit;
> 	err = mlxbf_gige_rx_init(priv);
> 	if (err)
> 		goto tx_deinit;
>@@ -166,6 +163,10 @@ static int mlxbf_gige_open(struct net_device *netdev)
> 	napi_enable(&priv->napi);
> 	netif_start_queue(netdev);
> 
>+	err = mlxbf_gige_request_irqs(priv);
>+	if (err)
>+		goto napi_deinit;
>+
> 	/* Set bits in INT_EN that we care about */
> 	int_en = MLXBF_GIGE_INT_EN_HW_ACCESS_ERROR |
> 		 MLXBF_GIGE_INT_EN_TX_CHECKSUM_INPUTS |
>@@ -182,11 +183,17 @@ static int mlxbf_gige_open(struct net_device *netdev)
> 
> 	return 0;
> 
>+napi_deinit:
>+	netif_stop_queue(netdev);
>+	napi_disable(&priv->napi);
>+	netif_napi_del(&priv->napi);
>+	mlxbf_gige_rx_deinit(priv);
>+
> tx_deinit:
> 	mlxbf_gige_tx_deinit(priv);
> 
>-free_irqs:
>-	mlxbf_gige_free_irqs(priv);
>+phy_deinit:
>+	phy_stop(phydev);
> 	return err;
> }
> 
>-- 
>2.30.1
>
>
Paolo Abeni March 19, 2024, 2:32 p.m. UTC | #2
On Fri, 2024-03-15 at 10:56 -0400, David Thompson wrote:
> This patch fixes an exception that occurs during open()
> when kdump is enabled.  The sequence to reproduce the
> exception is as follows:
> a) enable kdump
> b) trigger kdump via "echo c > /proc/sysrq-trigger"
> c) kdump kernel executes
> d) kdump kernel loads mlxbf_gige module
> e) the mlxbf_gige module runs its open() as the
>    the "oob_net0" interface is brought up
> f) mlxbf_gige module will experience an exception
>    during its open(), something like:
> 
>      Unable to handle kernel NULL pointer dereference at virtual address 0000000000000000
>      Mem abort info:
>        ESR = 0x0000000086000004
>        EC = 0x21: IABT (current EL), IL = 32 bits
>        SET = 0, FnV = 0
>        EA = 0, S1PTW = 0
>        FSC = 0x04: level 0 translation fault
>      user pgtable: 4k pages, 48-bit VAs, pgdp=00000000e29a4000
>      [0000000000000000] pgd=0000000000000000, p4d=0000000000000000
>      Internal error: Oops: 0000000086000004 [#1] SMP
>      CPU: 0 PID: 812 Comm: NetworkManager Tainted: G           OE     5.15.0-1035-bluefield #37-Ubuntu
>      Hardware name: https://www.mellanox.com BlueField-3 SmartNIC Main Card/BlueField-3 SmartNIC Main Card, BIOS 4.6.0.13024 Jan 19 2024
>      pstate: 80400009 (Nzcv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--)
>      pc : 0x0
>      lr : __napi_poll+0x40/0x230
>      sp : ffff800008003e00
>      x29: ffff800008003e00 x28: 0000000000000000 x27: 00000000ffffffff
>      x26: ffff000066027238 x25: ffff00007cedec00 x24: ffff800008003ec8
>      x23: 000000000000012c x22: ffff800008003eb7 x21: 0000000000000000
>      x20: 0000000000000001 x19: ffff000066027238 x18: 0000000000000000
>      x17: ffff578fcb450000 x16: ffffa870b083c7c0 x15: 0000aaab010441d0
>      x14: 0000000000000001 x13: 00726f7272655f65 x12: 6769675f6662786c
>      x11: 0000000000000000 x10: 0000000000000000 x9 : ffffa870b0842398
>      x8 : 0000000000000004 x7 : fe5a48b9069706ea x6 : 17fdb11fc84ae0d2
>      x5 : d94a82549d594f35 x4 : 0000000000000000 x3 : 0000000000400100
>      x2 : 0000000000000000 x1 : 0000000000000000 x0 : ffff000066027238
>      Call trace:
>       0x0
>       net_rx_action+0x178/0x360
>       __do_softirq+0x15c/0x428
>       __irq_exit_rcu+0xac/0xec
>       irq_exit+0x18/0x2c
>       handle_domain_irq+0x6c/0xa0
>       gic_handle_irq+0xec/0x1b0
>       call_on_irq_stack+0x20/0x2c
>       do_interrupt_handler+0x5c/0x70
>       el1_interrupt+0x30/0x50
>       el1h_64_irq_handler+0x18/0x2c
>       el1h_64_irq+0x7c/0x80
>       __setup_irq+0x4c0/0x950
>       request_threaded_irq+0xf4/0x1bc
>       mlxbf_gige_request_irqs+0x68/0x110 [mlxbf_gige]
>       mlxbf_gige_open+0x5c/0x170 [mlxbf_gige]
>       __dev_open+0x100/0x220
>       __dev_change_flags+0x16c/0x1f0
>       dev_change_flags+0x2c/0x70
>       do_setlink+0x220/0xa40
>       __rtnl_newlink+0x56c/0x8a0
>       rtnl_newlink+0x58/0x84
>       rtnetlink_rcv_msg+0x138/0x3c4
>       netlink_rcv_skb+0x64/0x130
>       rtnetlink_rcv+0x20/0x30
>       netlink_unicast+0x2ec/0x360
>       netlink_sendmsg+0x278/0x490
>       __sock_sendmsg+0x5c/0x6c
>       ____sys_sendmsg+0x290/0x2d4
>       ___sys_sendmsg+0x84/0xd0
>       __sys_sendmsg+0x70/0xd0
>       __arm64_sys_sendmsg+0x2c/0x40
>       invoke_syscall+0x78/0x100
>       el0_svc_common.constprop.0+0x54/0x184
>       do_el0_svc+0x30/0xac
>       el0_svc+0x48/0x160
>       el0t_64_sync_handler+0xa4/0x12c
>       el0t_64_sync+0x1a4/0x1a8
>      Code: bad PC value
>      ---[ end trace 7d1c3f3bf9d81885 ]---
>      Kernel panic - not syncing: Oops: Fatal exception in interrupt
>      Kernel Offset: 0x2870a7a00000 from 0xffff800008000000
>      PHYS_OFFSET: 0x80000000
>      CPU features: 0x0,000005c1,a3332a5a
>      Memory Limit: none
>      ---[ end Kernel panic - not syncing: Oops: Fatal exception in interrupt ]---
> 
> The exception happens because there is a pending RX interrupt before the
> call to request_irq(RX IRQ) executes.  Then, the RX IRQ handler fires
> immediately after this request_irq() completes. The RX IRQ handler runs
> "napi_schedule()" before NAPI is fully initialized via "netif_napi_add()"
> and "napi_enable()", both which happen later in the open() logic.
> 
> This patch fixes the issue by re-ordering the logic in mlxbf_gige open()
> so that the request_irq() calls execute after NAPI is fully initialized.

It also adds a missing phy_stop(phydev) in the error path. It's worth
mentioning in the changelog or possibly place into a separate patch.

Cheers,

Paolo
diff mbox series

Patch

diff --git a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c
index 3d09fa54598f..77134ca92938 100644
--- a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c
+++ b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c
@@ -139,13 +139,10 @@  static int mlxbf_gige_open(struct net_device *netdev)
 	control |= MLXBF_GIGE_CONTROL_PORT_EN;
 	writeq(control, priv->base + MLXBF_GIGE_CONTROL);
 
-	err = mlxbf_gige_request_irqs(priv);
-	if (err)
-		return err;
 	mlxbf_gige_cache_stats(priv);
 	err = mlxbf_gige_clean_port(priv);
 	if (err)
-		goto free_irqs;
+		return err;
 
 	/* Clear driver's valid_polarity to match hardware,
 	 * since the above call to clean_port() resets the
@@ -157,7 +154,7 @@  static int mlxbf_gige_open(struct net_device *netdev)
 
 	err = mlxbf_gige_tx_init(priv);
 	if (err)
-		goto free_irqs;
+		goto phy_deinit;
 	err = mlxbf_gige_rx_init(priv);
 	if (err)
 		goto tx_deinit;
@@ -166,6 +163,10 @@  static int mlxbf_gige_open(struct net_device *netdev)
 	napi_enable(&priv->napi);
 	netif_start_queue(netdev);
 
+	err = mlxbf_gige_request_irqs(priv);
+	if (err)
+		goto napi_deinit;
+
 	/* Set bits in INT_EN that we care about */
 	int_en = MLXBF_GIGE_INT_EN_HW_ACCESS_ERROR |
 		 MLXBF_GIGE_INT_EN_TX_CHECKSUM_INPUTS |
@@ -182,11 +183,17 @@  static int mlxbf_gige_open(struct net_device *netdev)
 
 	return 0;
 
+napi_deinit:
+	netif_stop_queue(netdev);
+	napi_disable(&priv->napi);
+	netif_napi_del(&priv->napi);
+	mlxbf_gige_rx_deinit(priv);
+
 tx_deinit:
 	mlxbf_gige_tx_deinit(priv);
 
-free_irqs:
-	mlxbf_gige_free_irqs(priv);
+phy_deinit:
+	phy_stop(phydev);
 	return err;
 }