diff mbox series

[rdma-next,v2,6/7] RDMA/nldev: Add support for RDMA monitoring

Message ID 20240830073130.29982-7-michaelgur@nvidia.com (mailing list archive)
State Superseded
Headers show
Series Support RDMA events monitoring through | expand

Commit Message

Michael Guralnik Aug. 30, 2024, 7:31 a.m. UTC
From: Chiara Meiohas <cmeiohas@nvidia.com>

Introduce a new netlink command to allow rdma event monitoring.
The rdma events supported now are IB device
registration/unregistration and net device attachment/detachment.

Example output of rdma monitor and the commands which trigger
the events:

$ rdma monitor
$ rmmod mlx5_ib
[UNREGISTER]    dev 3
[UNREGISTER]    dev 0

$modprobe mlx5_ib
[REGISTER]      dev 4
[NETDEV_ATTACH] dev 4 port 1 netdev 4
[REGISTER]      dev 5
[NETDEV_ATTACH] dev 5 port 1 netdev 5

$ devlink dev eswitch set pci/0000:08:00.0 mode switchdev
[UNREGISTER]    dev 4
[REGISTER]      dev 6
[NETDEV_ATTACH] dev 6 port 6 netdev 4

$ echo 4 > /sys/class/net/eth2/device/sriov_numvfs
[NETDEV_ATTACH] dev 6 port 2 netdev 7
[NETDEV_ATTACH] dev 6 port 3 netdev 8
[NETDEV_ATTACH] dev 6 port 4 netdev 9
[NETDEV_ATTACH] dev 6 port 5 netdev 10
[REGISTER]      dev 7
[NETDEV_ATTACH] dev 7 port 1 netdev 11
[REGISTER]      dev 8
[NETDEV_ATTACH] dev 8 port 1 netdev 12
[REGISTER]      dev 9
[NETDEV_ATTACH] dev 9 port 1 netdev 13
[REGISTER]      dev 10
[NETDEV_ATTACH] dev 10 port 1 netdev 14

$ echo 0 > /sys/class/net/eth2/device/sriov_numvfs
[UNREGISTER]    dev 7
[UNREGISTER]    dev 8
[UNREGISTER]    dev 9
[UNREGISTER]    dev 10
[NETDEV_DETACH] dev 6 port 2
[NETDEV_DETACH] dev 6 port 3
[NETDEV_DETACH] dev 6 port 4
[NETDEV_DETACH] dev 6 port 5

Signed-off-by: Chiara Meiohas <cmeiohas@nvidia.com>
Signed-off-by: Michael Guralnik <michaelgur@nvidia.com>
Reviewed-by: Leon Romanovsky <leonro@nvidia.com>
---
 drivers/infiniband/core/device.c  |  38 ++++++++++
 drivers/infiniband/core/netlink.c |   1 +
 drivers/infiniband/core/nldev.c   | 118 ++++++++++++++++++++++++++++++
 include/rdma/rdma_netlink.h       |  12 +++
 include/uapi/rdma/rdma_netlink.h  |  15 ++++
 5 files changed, 184 insertions(+)

Comments

Leon Romanovsky Sept. 2, 2024, 12:29 p.m. UTC | #1
On Fri, Aug 30, 2024 at 10:31:29AM +0300, Michael Guralnik wrote:
> From: Chiara Meiohas <cmeiohas@nvidia.com>
> 
> Introduce a new netlink command to allow rdma event monitoring.
> The rdma events supported now are IB device
> registration/unregistration and net device attachment/detachment.
> 
> Example output of rdma monitor and the commands which trigger
> the events:
> 
> $ rdma monitor
> $ rmmod mlx5_ib
> [UNREGISTER]    dev 3
> [UNREGISTER]    dev 0
> 
> $modprobe mlx5_ib
> [REGISTER]      dev 4
> [NETDEV_ATTACH] dev 4 port 1 netdev 4
> [REGISTER]      dev 5
> [NETDEV_ATTACH] dev 5 port 1 netdev 5
> 
> $ devlink dev eswitch set pci/0000:08:00.0 mode switchdev
> [UNREGISTER]    dev 4
> [REGISTER]      dev 6
> [NETDEV_ATTACH] dev 6 port 6 netdev 4
> 
> $ echo 4 > /sys/class/net/eth2/device/sriov_numvfs
> [NETDEV_ATTACH] dev 6 port 2 netdev 7
> [NETDEV_ATTACH] dev 6 port 3 netdev 8
> [NETDEV_ATTACH] dev 6 port 4 netdev 9
> [NETDEV_ATTACH] dev 6 port 5 netdev 10
> [REGISTER]      dev 7
> [NETDEV_ATTACH] dev 7 port 1 netdev 11
> [REGISTER]      dev 8
> [NETDEV_ATTACH] dev 8 port 1 netdev 12
> [REGISTER]      dev 9
> [NETDEV_ATTACH] dev 9 port 1 netdev 13
> [REGISTER]      dev 10
> [NETDEV_ATTACH] dev 10 port 1 netdev 14
> 
> $ echo 0 > /sys/class/net/eth2/device/sriov_numvfs
> [UNREGISTER]    dev 7
> [UNREGISTER]    dev 8
> [UNREGISTER]    dev 9
> [UNREGISTER]    dev 10
> [NETDEV_DETACH] dev 6 port 2
> [NETDEV_DETACH] dev 6 port 3
> [NETDEV_DETACH] dev 6 port 4
> [NETDEV_DETACH] dev 6 port 5
> 
> Signed-off-by: Chiara Meiohas <cmeiohas@nvidia.com>
> Signed-off-by: Michael Guralnik <michaelgur@nvidia.com>
> Reviewed-by: Leon Romanovsky <leonro@nvidia.com>
> ---
>  drivers/infiniband/core/device.c  |  38 ++++++++++
>  drivers/infiniband/core/netlink.c |   1 +
>  drivers/infiniband/core/nldev.c   | 118 ++++++++++++++++++++++++++++++
>  include/rdma/rdma_netlink.h       |  12 +++
>  include/uapi/rdma/rdma_netlink.h  |  15 ++++
>  5 files changed, 184 insertions(+)

This patch breaks RXE and the following splat can be reproduced with
"sudo rdma link add rxe1 type rxe netdev eth1" command:

[   16.871877][  T344] rdma_rxe: loaded
[   17.057211][  T343] infiniband rxe1: set active
[   17.057493][  T343] infiniband rxe1: added eth1
[   17.080757][  T343]
[   17.080891][  T343] ======================================================
[   17.081170][  T343] WARNING: possible circular locking dependency detected
[   17.081465][  T343] 6.11.0-rc5+ #2367 Not tainted
[   17.081675][  T343] ------------------------------------------------------
[   17.081886][  T343] rdma/343 is trying to acquire lock:
[   17.082048][  T343] ffff88800ef6d188 (&rxe->usdev_lock){+.+.}-{3:3}, at: rxe_query_port+0x41/0x170 [rdma_rxe]
[   17.082385][  T343]
[   17.082385][  T343] but task is already holding lock:
[   17.082628][  T343] ffff88800ef6ce90 (&device->compat_devs_mutex){+.+.}-{3:3}, at: add_one_compat_dev+0xe4/0x6e0 [ib_core]
[   17.083002][  T343]
[   17.083002][  T343] which lock already depends on the new lock.
[   17.083002][  T343]
[   17.083302][  T343]
[   17.083302][  T343] the existing dependency chain (in reverse order) is:
[   17.083580][  T343]
[   17.083580][  T343] -> #3 (&device->compat_devs_mutex){+.+.}-{3:3}:
[   17.083866][  T343]        __mutex_lock+0x14a/0x1940
[   17.084038][  T343]        ib_device_rename+0x110/0x3b0 [ib_core]
[   17.084274][  T343]        nldev_set_doit+0x2ef/0x3d0 [ib_core]
[   17.084500][  T343]        rdma_nl_rcv_msg+0x2b0/0x4f0 [ib_core]
[   17.084715][  T343]        rdma_nl_rcv_skb.constprop.0.isra.0+0x238/0x390 [ib_core]
[   17.084981][  T343]        netlink_unicast+0x438/0x730
[   17.085194][  T343]        netlink_sendmsg+0x72a/0xbc0                                                                                                                                                                  15:20:28 [177/6052]
[   17.085438][  T343]        __sock_sendmsg+0xc5/0x190
[   17.085668][  T343]        ____sys_sendmsg+0x52e/0x6a0
[   17.085901][  T343]        ___sys_sendmsg+0xdf/0x150
[   17.086128][  T343]        __sys_sendmsg+0x161/0x1d0
[   17.086354][  T343]        do_syscall_64+0x6d/0x140
[   17.086584][  T343]        entry_SYSCALL_64_after_hwframe+0x4b/0x53
[   17.086872][  T343]
[   17.086872][  T343] -> #2 (devices_rwsem){++++}-{3:3}:
[   17.087171][  T343]        down_read+0x96/0x450
[   17.087322][  T343]        ib_device_set_netdev.part.0+0x36b/0x640 [ib_core]
[   17.087554][  T343]        ib_device_set_netdev+0xb7/0xe0 [ib_core]
[   17.087749][  T343]        mlx5_netdev_event+0x428/0x990 [mlx5_ib]
[   17.087945][  T343]        call_netdevice_register_net_notifiers+0xdb/0x290
[   17.088113][  T343]        __register_netdevice_notifier_net+0x4b/0x70
[   17.088277][  T343]        register_netdevice_notifier_dev_net+0x53/0x160
[   17.088448][  T343]        mlx5e_mdev_notifier_event+0x8a/0xf0 [mlx5_ib]
[   17.088630][  T343]        notifier_call_chain+0x96/0x270
[   17.088773][  T343]        blocking_notifier_call_chain+0x60/0x80
[   17.088970][  T343]        mlx5_core_uplink_netdev_event_replay+0x4d/0x60 [mlx5_core]
[   17.089289][  T343]        mlx5_ib_roce_init+0x1f5/0x720 [mlx5_ib]
[   17.089509][  T343]        __mlx5_ib_add+0x6b/0x140 [mlx5_ib]
[   17.089727][  T343]        mlx5r_probe+0x24f/0x5d0 [mlx5_ib]
[   17.089951][  T343]        auxiliary_bus_probe+0x9d/0xe0
[   17.090112][  T343]        really_probe+0x1cf/0x8b0
[   17.090278][  T343]        __driver_probe_device+0x190/0x370
[   17.090464][  T343]        driver_probe_device+0x4a/0x120
[   17.090614][  T343]        __driver_attach+0x195/0x470                                                                                                                                                                  15:20:28 [150/6052]
[   17.090761][  T343]        bus_for_each_dev+0xf0/0x170
[   17.090928][  T343]        bus_add_driver+0x21d/0x4d0
[   17.091080][  T343]        driver_register+0x1a1/0x350
[   17.091238][  T343]        __auxiliary_driver_register+0x14e/0x230
[   17.091440][  T343]        cm_dev_release+0xb7/0x170 [ib_cm]
[   17.091657][  T343]        do_one_initcall+0xbf/0x390
[   17.091830][  T343]        do_init_module+0x22e/0x710
[   17.091987][  T343]        load_module+0x4e40/0x65a0
[   17.092142][  T343]        init_module_from_file+0xcf/0x120
[   17.092305][  T343]        idempotent_init_module+0x22d/0x720
[   17.092510][  T343]        __x64_sys_finit_module+0xc1/0x130
[   17.092703][  T343]        do_syscall_64+0x6d/0x140
[   17.092876][  T343]        entry_SYSCALL_64_after_hwframe+0x4b/0x53
[   17.093091][  T343]
[   17.093091][  T343] -> #1 (rtnl_mutex){+.+.}-{3:3}:
[   17.093338][  T343]        __mutex_lock+0x14a/0x1940
[   17.093506][  T343]        ib_get_eth_speed+0xe8/0x9c0 [ib_core]
[   17.093735][  T343]        rxe_query_port+0x56/0x170 [rdma_rxe]
[   17.093950][  T343]        ib_query_port+0x338/0x670 [ib_core]
[   17.094178][  T343]        rxe_port_immutable+0x10f/0x230 [rdma_rxe]
[   17.094388][  T343]        ib_register_device+0x3a2/0xac0 [ib_core]
[   17.094618][  T343]        rxe_register_device+0x2cd/0x3a0 [rdma_rxe]
[   17.094842][  T343]        rxe_net_add+0xaf/0x100 [rdma_rxe]
[   17.095065][  T343]        rxe_newlink+0x4f/0xe0 [rdma_rxe]
[   17.095231][  T343]        nldev_newlink+0x29d/0x4b0 [ib_core]
[   17.095468][  T343]        rdma_nl_rcv_msg+0x2b0/0x4f0 [ib_core]
[   17.095712][  T343]        rdma_nl_rcv_skb.constprop.0.isra.0+0x238/0x390 [ib_core]                                                                                                                                     15:20:28 [123/6052]
[   17.096026][  T343]        netlink_unicast+0x438/0x730
[   17.096206][  T343]        netlink_sendmsg+0x72a/0xbc0
[   17.096379][  T343]        __sock_sendmsg+0xc5/0x190
[   17.096570][  T343]        __sys_sendto+0x25d/0x310
[   17.096742][  T343]        __x64_sys_sendto+0xdc/0x1b0
[   17.096928][  T343]        do_syscall_64+0x6d/0x140
[   17.097138][  T343]        entry_SYSCALL_64_after_hwframe+0x4b/0x53                                                 
[   17.097362][  T343]                                     
[   17.097362][  T343] -> #0 (&rxe->usdev_lock){+.+.}-{3:3}:                                                           
[   17.097630][  T343]        __lock_acquire+0x2be0/0x6490
[   17.097803][  T343]        lock_acquire+0x1b2/0x4e0
[   17.097974][  T343]        __mutex_lock+0x14a/0x1940
[   17.098164][  T343]        rxe_query_port+0x41/0x170 [rdma_rxe]                                                     
[   17.098397][  T343]        ib_query_port+0x338/0x670 [ib_core]                                                      
[   17.098639][  T343]        ib_setup_port_attrs+0x194/0x4b0 [ib_core]                                                
[   17.098878][  T343]        add_one_compat_dev+0x450/0x6e0 [ib_core]                                                 
[   17.099122][  T343]        enable_device_and_get+0x2ae/0x330 [ib_core]                                              
[   17.099363][  T343]        ib_register_device+0x6c0/0xac0 [ib_core]                                                 
[   17.099598][  T343]        rxe_register_device+0x2cd/0x3a0 [rdma_rxe]                                               
[   17.099824][  T343]        rxe_net_add+0xaf/0x100 [rdma_rxe]                                                        
[   17.100049][  T343]        rxe_newlink+0x4f/0xe0 [rdma_rxe]                                                         
[   17.100272][  T343]        nldev_newlink+0x29d/0x4b0 [ib_core]                                                      
[   17.100496][  T343]        rdma_nl_rcv_msg+0x2b0/0x4f0 [ib_core]                                                    
[   17.100755][  T343]        rdma_nl_rcv_skb.constprop.0.isra.0+0x238/0x390 [ib_core]                                 
[   17.101023][  T343]        netlink_unicast+0x438/0x730
[   17.101204][  T343]        netlink_sendmsg+0x72a/0xbc0
[   17.101354][  T343]        __sock_sendmsg+0xc5/0x190                                                                                                                                                                     15:20:28 [96/6052]
[   17.101511][  T343]        __sys_sendto+0x25d/0x310
[   17.101660][  T343]        __x64_sys_sendto+0xdc/0x1b0
[   17.101819][  T343]        do_syscall_64+0x6d/0x140
[   17.101969][  T343]        entry_SYSCALL_64_after_hwframe+0x4b/0x53                                                 
[   17.102149][  T343]                                     
[   17.102149][  T343] other info that might help us debug this:                                                       
[   17.102149][  T343]                                     
[   17.102462][  T343] Chain exists of:
[   17.102462][  T343]   &rxe->usdev_lock --> devices_rwsem --> &device->compat_devs_mutex                             
[   17.102462][  T343]                                     
[   17.102835][  T343]  Possible unsafe locking scenario:
[   17.102835][  T343]                                     
[   17.103073][  T343]        CPU0                    CPU1
[   17.103219][  T343]        ----                    ----
[   17.103364][  T343]   lock(&device->compat_devs_mutex);
[   17.103514][  T343]                                lock(devices_rwsem);                                             
[   17.103692][  T343]                                lock(&device->compat_devs_mutex);                                
[   17.103909][  T343]   lock(&rxe->usdev_lock);
[   17.104057][  T343]                                     
[   17.104057][  T343]  *** DEADLOCK ***
[   17.104057][  T343]                                     
[   17.104290][  T343] 5 locks held by rdma/343:
[   17.104450][  T343]  #0: ffffffffa06fcff8 (&rdma_nl_types[idx].sem){.+.+}-{3:3}, at: rdma_nl_rcv_msg+0x125/0x4f0 [ib_core]
[   17.104767][  T343]  #1: ffffffffa06f4f50 (link_ops_rwsem){++++}-{3:3}, at: nldev_newlink+0x37f/0x4b0 [ib_core]     
[   17.105109][  T343]  #2: ffffffffa06e79d0 (devices_rwsem){++++}-{3:3}, at: enable_device_and_get+0xf9/0x330 [ib_core]
[   17.105472][  T343]  #3: ffffffffa06e7750 (rdma_nets_rwsem){.+.+}-{3:3}, at: enable_device_and_get+0x250/0x330 [ib_core]
[   17.105874][  T343]  #4: ffff88800ef6ce90 (&device->compat_devs_mutex){+.+.}-{3:3}, at: add_one_compat_dev+0xe4/0x6e0 [ib_core]                                                                                          15:20:28 [69/6052]
[   17.106365][  T343]                                     
[   17.106365][  T343] stack backtrace:
[   17.106586][  T343] CPU: 3 UID: 0 PID: 343 Comm: rdma Not tainted 6.11.0-rc5+ #2367                                 
[   17.106828][  T343] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.16.3-0-ga6ed6b701f0a-prebuilt.qemu.org 04/01/2014
[   17.107172][  T343] Call Trace:
[   17.107301][  T343]  <TASK>
[   17.107397][  T343]  dump_stack_lvl+0x57/0x80
[   17.107560][  T343]  check_noncircular+0x2f4/0x3d0
[   17.107736][  T343]  ? print_circular_bug+0x410/0x410
[   17.107893][  T343]  ? __fprop_add_percpu_max+0xb3/0x130
[   17.108043][  T343]  ? lockdep_hardirqs_on_prepare+0x3e0/0x3e0                                                      
[   17.108219][  T343]  ? find_held_lock+0x2d/0x110
[   17.108369][  T343]  __lock_acquire+0x2be0/0x6490
[   17.108519][  T343]  ? lockdep_hardirqs_on_prepare+0x3e0/0x3e0                                                      
[   17.108700][  T343]  ? lock_release+0x221/0x780
[   17.108857][  T343]  ? reacquire_held_locks+0x4a0/0x4a0
[   17.109013][  T343]  lock_acquire+0x1b2/0x4e0
[   17.109172][  T343]  ? rxe_query_port+0x41/0x170 [rdma_rxe]                                                         
[   17.109342][  T343]  ? __lock_acquire+0x6490/0x6490
[   17.109506][  T343]  ? kernfs_add_one+0x397/0x490
[   17.109671][  T343]  ? kernfs_new_node+0x133/0x240
[   17.109841][  T343]  ? lock_is_held_type+0x81/0xe0
[   17.110003][  T343]  __mutex_lock+0x14a/0x1940
[   17.110168][  T343]  ? rxe_query_port+0x41/0x170 [rdma_rxe]                                                         
[   17.110339][  T343]  ? rxe_query_port+0x41/0x170 [rdma_rxe]                                                         
[   17.110513][  T343]  ? lock_release+0x221/0x780
[   17.110679][  T343]  ? kfree+0x167/0x2e0                                                                                                                                                                                 15:20:28 [42/6052]
[   17.110814][  T343]  ? mutex_lock_io_nested+0x16e0/0x16e0                                                           
[   17.110978][  T343]  ? kobject_add_internal+0x292/0x920
[   17.111149][  T343]  ? kobject_add+0x117/0x180
[   17.111315][  T343]  ? kset_create_and_add+0x160/0x160
[   17.111484][  T343]  ? rxe_query_port+0x41/0x170 [rdma_rxe]                                                         
[   17.111658][  T343]  rxe_query_port+0x41/0x170 [rdma_rxe]                                                           
[   17.111836][  T343]  ib_query_port+0x338/0x670 [ib_core]
[   17.112036][  T343]  ib_setup_port_attrs+0x194/0x4b0 [ib_core]                                                      
[   17.112307][  T343]  ? ib_free_port_attrs+0x3c0/0x3c0 [ib_core]                                                     
[   17.112539][  T343]  ? __init_waitqueue_head+0xcb/0x150
[   17.112710][  T343]  add_one_compat_dev+0x450/0x6e0 [ib_core]                                                       
[   17.112950][  T343]  enable_device_and_get+0x2ae/0x330 [ib_core]                                                    
[   17.113185][  T343]  ? add_client_context+0x430/0x430 [ib_core]                                                     
[   17.113416][  T343]  ? rdma_counter_init+0x139/0x390 [ib_core]                                                      
[   17.113656][  T343]  ib_register_device+0x6c0/0xac0 [ib_core]                                                       
[   17.113894][  T343]  ? ib_device_get_netdev+0x3a0/0x3a0 [ib_core]                                                   
[   17.114124][  T343]  ? crypto_alg_mod_lookup+0x23b/0x3d0
[   17.114289][  T343]  ? crypto_alloc_tfm_node+0xd5/0x1e0
[   17.114455][  T343]  rxe_register_device+0x2cd/0x3a0 [rdma_rxe]                                                     
[   17.114667][  T343]  rxe_net_add+0xaf/0x100 [rdma_rxe]
[   17.114846][  T343]  rxe_newlink+0x4f/0xe0 [rdma_rxe]
[   17.115020][  T343]  nldev_newlink+0x29d/0x4b0 [ib_core]
[   17.115216][  T343]  ? nldev_port_get_dumpit+0x7a0/0x7a0 [ib_core]                                                  
[   17.115454][  T343]  ? __lock_acquire+0x6490/0x6490
[   17.115621][  T343]  ? lock_release+0x221/0x780
[   17.115795][  T343]  ? lock_chain_count+0x20/0x20
[   17.115965][  T343]  ? security_capable+0x68/0xa0                                                                                                                                                                                 [15/6052]
[   17.116130][  T343]  rdma_nl_rcv_msg+0x2b0/0x4f0 [ib_core]                                                          
[   17.116321][  T343]  ? rdma_nl_multicast+0xf0/0xf0 [ib_core]                                                        
[   17.116552][  T343]  ? lockdep_hardirqs_on_prepare+0x3e0/0x3e0                                                      
[   17.116752][  T343]  ? lock_acquire+0x1b2/0x4e0
[   17.116921][  T343]  ? find_held_lock+0x2d/0x110
[   17.117098][  T343]  ? __netlink_lookup+0x339/0x670
[   17.117277][  T343]  rdma_nl_rcv_skb.constprop.0.isra.0+0x238/0x390 [ib_core]                                       
[   17.117560][  T343]  ? rdma_nl_rcv_msg+0x4f0/0x4f0 [ib_core]                                                        
[   17.117809][  T343]  ? lock_release+0x221/0x780
[   17.117970][  T343]  ? netlink_deliver_tap+0xcd/0xa20
[   17.118127][  T343]  ? netlink_deliver_tap+0x152/0xa20
[   17.118283][  T343]  netlink_unicast+0x438/0x730
[   17.118434][  T343]  ? netlink_attachskb+0x710/0x710
[   17.118602][  T343]  ? lock_acquire+0x1b2/0x4e0
[   17.118772][  T343]  netlink_sendmsg+0x72a/0xbc0
[   17.118933][  T343]  ? netlink_unicast+0x730/0x730
[   17.119099][  T343]  ? reacquire_held_locks+0x4a0/0x4a0
[   17.119270][  T343]  ? __might_fault+0xae/0x120
[   17.119423][  T343]  ? netlink_unicast+0x730/0x730
[   17.119589][  T343]  __sock_sendmsg+0xc5/0x190
[   17.119757][  T343]  ? _copy_from_user+0x56/0xa0
[   17.119921][  T343]  __sys_sendto+0x25d/0x310
[   17.120085][  T343]  ? __x64_sys_getpeername+0xb0/0xb0
[   17.120273][  T343]  ? move_addr_to_user+0x54/0x80
[   17.120446][  T343]  ? __sys_getsockname+0x19d/0x230
[   17.120617][  T343]  ? fd_install+0x1c4/0x510
[   17.120798][  T343]  ? __sys_setsockopt+0xdc/0x160
[   17.120969][  T343]  __x64_sys_sendto+0xdc/0x1b0
[   17.121154][  T343]  ? lockdep_hardirqs_on_prepare+0x268/0x3e0                                                      
[   17.121358][  T343]  do_syscall_64+0x6d/0x140
[   17.121524][  T343]  entry_SYSCALL_64_after_hwframe+0x4b/0x53                                                       
[   17.121723][  T343] RIP: 0033:0x7f2aadc078b7
[   17.121895][  T343] Code: c7 c0 ff ff ff ff eb be 66 2e 0f 1f 84 00 00 00 00 00 90 f3 0f 1e fa 80 3d 95 17 0d 00 00 41 89 ca 74 10 b8 2c 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 69 c3 55 48 89 e5 53 48 83 ec 38 44 89 4d d0
[   17.122385][  T343] RSP: 002b:00007ffccd9b5638 EFLAGS: 00000202 ORIG_RAX: 000000000000002c                          
[   17.122651][  T343] RAX: ffffffffffffffda RBX: 00005625dd51d320 RCX: 00007f2aadc078b7                               
[   17.122908][  T343] RDX: 0000000000000030 RSI: 00005625dd51c2a0 RDI: 0000000000000004                               
[   17.123158][  T343] RBP: 00007ffccd9b5670 R08: 00007f2aadcec200 R09: 000000000000000c                               
[   17.123401][  T343] R10: 0000000000000000 R11: 0000000000000202 R12: 00007ffccd9b58b0                               
[   17.123642][  T343] R13: 00007ffccd9b5644 R14: 0000000066d5ad8b R15: 0000000000000000                               
[   17.123898][  T343]  </TASK>
diff mbox series

Patch

diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index b2fc5a13577c..2113eb7c7573 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -1351,6 +1351,30 @@  static void prevent_dealloc_device(struct ib_device *ib_dev)
 {
 }
 
+static void ib_device_notify_register(struct ib_device *device)
+{
+	struct net_device *netdev;
+	u32 port;
+	int ret;
+
+	ret = rdma_nl_notify_event(device, 0, RDMA_REGISTER_EVENT);
+	if (ret)
+		return;
+
+	rdma_for_each_port(device, port) {
+		netdev = ib_device_get_netdev(device, port);
+		if (!netdev)
+			continue;
+
+		ret = rdma_nl_notify_event(device, port,
+					   RDMA_NETDEV_ATTACH_EVENT);
+		dev_put(netdev);
+		if (ret)
+			return;
+	}
+	return;
+}
+
 /**
  * ib_register_device - Register an IB device with IB core
  * @device: Device to register
@@ -1449,6 +1473,8 @@  int ib_register_device(struct ib_device *device, const char *name,
 	dev_set_uevent_suppress(&device->dev, false);
 	/* Mark for userspace that device is ready */
 	kobject_uevent(&device->dev.kobj, KOBJ_ADD);
+
+	ib_device_notify_register(device);
 	ib_device_put(device);
 
 	return 0;
@@ -1491,6 +1517,7 @@  static void __ib_unregister_device(struct ib_device *ib_dev)
 		goto out;
 
 	disable_device(ib_dev);
+	rdma_nl_notify_event(ib_dev, 0, RDMA_UNREGISTER_EVENT);
 
 	/* Expedite removing unregistered pointers from the hash table */
 	free_netdevs(ib_dev);
@@ -2159,6 +2186,7 @@  static void add_ndev_hash(struct ib_port_data *pdata)
 int ib_device_set_netdev(struct ib_device *ib_dev, struct net_device *ndev,
 			 u32 port)
 {
+	enum rdma_nl_notify_event_type etype;
 	struct net_device *old_ndev;
 	struct ib_port_data *pdata;
 	unsigned long flags;
@@ -2190,6 +2218,16 @@  int ib_device_set_netdev(struct ib_device *ib_dev, struct net_device *ndev,
 	spin_unlock_irqrestore(&pdata->netdev_lock, flags);
 
 	add_ndev_hash(pdata);
+
+	down_read(&devices_rwsem);
+	if (xa_get_mark(&devices, ib_dev->index, DEVICE_REGISTERED) &&
+	    xa_load(&devices, ib_dev->index) == ib_dev) {
+		etype = ndev ?
+			RDMA_NETDEV_ATTACH_EVENT : RDMA_NETDEV_DETACH_EVENT;
+		rdma_nl_notify_event(ib_dev, port, etype);
+	}
+	up_read(&devices_rwsem);
+
 	return 0;
 }
 EXPORT_SYMBOL(ib_device_set_netdev);
diff --git a/drivers/infiniband/core/netlink.c b/drivers/infiniband/core/netlink.c
index ae2db0c70788..def14c54b648 100644
--- a/drivers/infiniband/core/netlink.c
+++ b/drivers/infiniband/core/netlink.c
@@ -311,6 +311,7 @@  int rdma_nl_net_init(struct rdma_dev_net *rnet)
 	struct net *net = read_pnet(&rnet->net);
 	struct netlink_kernel_cfg cfg = {
 		.input	= rdma_nl_rcv,
+		.flags = NL_CFG_F_NONROOT_RECV,
 	};
 	struct sock *nls;
 
diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c
index 4d4a1f90e484..b0354bb8ba0d 100644
--- a/drivers/infiniband/core/nldev.c
+++ b/drivers/infiniband/core/nldev.c
@@ -170,6 +170,7 @@  static const struct nla_policy nldev_policy[RDMA_NLDEV_ATTR_MAX] = {
 	[RDMA_NLDEV_ATTR_DEV_TYPE]		= { .type = NLA_U8 },
 	[RDMA_NLDEV_ATTR_PARENT_NAME]		= { .type = NLA_NUL_STRING },
 	[RDMA_NLDEV_ATTR_NAME_ASSIGN_TYPE]	= { .type = NLA_U8 },
+	[RDMA_NLDEV_ATTR_EVENT_TYPE]		= { .type = NLA_U8 },
 };
 
 static int put_driver_name_print_type(struct sk_buff *msg, const char *name,
@@ -2722,6 +2723,123 @@  static const struct rdma_nl_cbs nldev_cb_table[RDMA_NLDEV_NUM_OPS] = {
 	},
 };
 
+static int fill_mon_netdev_association(struct sk_buff *msg,
+				       struct ib_device *device, u32 port,
+				       const struct net *net)
+{
+	struct net_device *netdev = ib_device_get_netdev(device, port);
+	int ret = 0;
+
+	if (netdev && !net_eq(dev_net(netdev), net))
+		goto out;
+
+	ret = nla_put_u32(msg, RDMA_NLDEV_ATTR_DEV_INDEX, device->index);
+	if (ret)
+		goto out;
+	ret = nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port);
+	if (ret)
+		goto out;
+	if (netdev)
+		ret = nla_put_u32(msg, RDMA_NLDEV_ATTR_NDEV_INDEX,
+				  netdev->ifindex);
+
+out:
+	dev_put(netdev);
+	return ret;
+}
+
+static int fill_mon_register(struct sk_buff *msg, struct ib_device *device,
+			     const struct net *net)
+{
+	return nla_put_u32(msg, RDMA_NLDEV_ATTR_DEV_INDEX, device->index);
+}
+
+static void rdma_nl_notify_err_msg(struct ib_device *device, u32 port_num,
+				    enum rdma_nl_notify_event_type type)
+{
+	struct net_device *netdev;
+
+	switch (type) {
+	case RDMA_REGISTER_EVENT:
+		dev_warn_ratelimited(&device->dev,
+				     "Failed to send RDMA monitor register device event\n");
+		break;
+	case RDMA_UNREGISTER_EVENT:
+		dev_warn_ratelimited(&device->dev,
+				     "Failed to send RDMA monitor unregister device event\n");
+		break;
+	case RDMA_NETDEV_ATTACH_EVENT:
+		netdev = ib_device_get_netdev(device, port_num);
+		dev_warn_ratelimited(&device->dev,
+				     "Failed to send RDMA monitor netdev attach event: port %d netdev %d\n",
+				     port_num, netdev->ifindex);
+		dev_put(netdev);
+		break;
+	case RDMA_NETDEV_DETACH_EVENT:
+		dev_warn_ratelimited(&device->dev,
+				     "Failed to send RDMA monitor netdev detach event: port %d\n",
+				     port_num);
+	default:
+		break;
+	};
+}
+
+int rdma_nl_notify_event(struct ib_device *device, u32 port_num,
+			  enum rdma_nl_notify_event_type type)
+{
+	struct sk_buff *skb;
+	struct net *net;
+	int ret = 0;
+	void *nlh;
+
+	net = read_pnet(&device->coredev.rdma_net);
+	if (!net)
+		return -EINVAL;
+
+	skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!skb)
+		return -ENOMEM;
+	nlh = nlmsg_put(skb, 0, 0,
+			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_MONITOR),
+			0, 0);
+
+	switch (type) {
+	case RDMA_REGISTER_EVENT:
+	case RDMA_UNREGISTER_EVENT:
+		ret = fill_mon_register(skb, device, net);
+		if (ret)
+			goto err_free;
+		break;
+	case RDMA_NETDEV_ATTACH_EVENT:
+	case RDMA_NETDEV_DETACH_EVENT:
+		ret = fill_mon_netdev_association(skb, device,
+						  port_num, net);
+		if (ret)
+			goto err_free;
+		break;
+	default:
+		ret = -EINVAL;
+		goto err_free;
+	}
+
+	ret = nla_put_u8(skb, RDMA_NLDEV_ATTR_EVENT_TYPE, type);
+	if (ret)
+		goto err_free;
+
+	nlmsg_end(skb, nlh);
+	ret = rdma_nl_multicast(net, skb, RDMA_NL_GROUP_NOTIFY, GFP_KERNEL);
+	if (ret && ret != -ESRCH) {
+		skb = NULL; /* skb is freed in the netlink send-op handling */
+		goto err_free;
+	}
+	return 0;
+
+err_free:
+	rdma_nl_notify_err_msg(device, port_num, type);
+	nlmsg_free(skb);
+	return ret;
+}
+
 void __init nldev_init(void)
 {
 	rdma_nl_register(RDMA_NL_NLDEV, nldev_cb_table);
diff --git a/include/rdma/rdma_netlink.h b/include/rdma/rdma_netlink.h
index c2a79aeee113..326deaf56d5d 100644
--- a/include/rdma/rdma_netlink.h
+++ b/include/rdma/rdma_netlink.h
@@ -6,6 +6,8 @@ 
 #include <linux/netlink.h>
 #include <uapi/rdma/rdma_netlink.h>
 
+struct ib_device;
+
 enum {
 	RDMA_NLDEV_ATTR_EMPTY_STRING = 1,
 	RDMA_NLDEV_ATTR_ENTRY_STRLEN = 16,
@@ -110,6 +112,16 @@  int rdma_nl_multicast(struct net *net, struct sk_buff *skb,
  */
 bool rdma_nl_chk_listeners(unsigned int group);
 
+/**
+ * Prepare and send an event message
+ * @ib: the IB device which triggered the event
+ * @port_num: the port number which triggered the event - 0 if unused
+ * @type: the event type
+ * Returns 0 on success or a negative error code
+ */
+int rdma_nl_notify_event(struct ib_device *ib, u32 port_num,
+			 enum rdma_nl_notify_event_type type);
+
 struct rdma_link_ops {
 	struct list_head list;
 	const char *type;
diff --git a/include/uapi/rdma/rdma_netlink.h b/include/uapi/rdma/rdma_netlink.h
index 2f37568f5556..5f9636d26050 100644
--- a/include/uapi/rdma/rdma_netlink.h
+++ b/include/uapi/rdma/rdma_netlink.h
@@ -15,6 +15,7 @@  enum {
 enum {
 	RDMA_NL_GROUP_IWPM = 2,
 	RDMA_NL_GROUP_LS,
+	RDMA_NL_GROUP_NOTIFY,
 	RDMA_NL_NUM_GROUPS
 };
 
@@ -305,6 +306,8 @@  enum rdma_nldev_command {
 
 	RDMA_NLDEV_CMD_DELDEV,
 
+	RDMA_NLDEV_CMD_MONITOR,
+
 	RDMA_NLDEV_NUM_OPS
 };
 
@@ -574,6 +577,8 @@  enum rdma_nldev_attr {
 
 	RDMA_NLDEV_ATTR_NAME_ASSIGN_TYPE,	/* u8 */
 
+	RDMA_NLDEV_ATTR_EVENT_TYPE,		/* u8 */
+
 	/*
 	 * Always the end
 	 */
@@ -624,4 +629,14 @@  enum rdma_nl_name_assign_type {
 	RDMA_NAME_ASSIGN_TYPE_USER = 1, /* Provided by user-space */
 };
 
+/*
+ * Supported rdma monitoring event types.
+ */
+enum rdma_nl_notify_event_type {
+	RDMA_REGISTER_EVENT,
+	RDMA_UNREGISTER_EVENT,
+	RDMA_NETDEV_ATTACH_EVENT,
+	RDMA_NETDEV_DETACH_EVENT,
+};
+
 #endif /* _UAPI_RDMA_NETLINK_H */