diff mbox series

[for-next,v3,2/2] IB/usnic: fix deadlock

Message ID 1549662824-23439-2-git-send-email-pkaustub@cisco.com (mailing list archive)
State Changes Requested
Delegated to: Jason Gunthorpe
Headers show
Series [for-next,v3,1/2] IB/usnic: Fix locking when unregistering | expand

Commit Message

Parvi Kaustubhi (pkaustub) Feb. 8, 2019, 9:53 p.m. UTC
There is a dead lock in usnic ib_register and netdev_notify
path.

	usnic_ib_discover_pf()
	| mutex_lock(&usnic_ib_ibdev_list_lock);
	 | usnic_ib_device_add();
	  | ib_register_device()
	   | usnic_ib_query_port()
	    | mutex_lock(&us_ibdev->usdev_lock);
	     | ib_get_eth_speed()
	      | rtnl_lock()

order of lock: &usnic_ib_ibdev_list_lock -> usdev_lock -> rtnl_lock

	rtnl_lock()
	 | usnic_ib_netdevice_event()
	  | mutex_lock(&usnic_ib_ibdev_list_lock);

order of lock: rtnl_lock -> &usnic_ib_ibdev_list_lock

Solution is to use ib_device_get_by_netdev() to lookup ib_dev while
handling netdev/ inet events.

Signed-off-by: Parvi Kaustubhi <pkaustub@cisco.com>
Reviewed-by: Govindarajulu Varadarajan <gvaradar@cisco.com>
Reviewed-by: Tanmay Inamdar <tinamdar@cisco.com>
---
Changelog:

v2->v3:
* Jason: drivers should not hold any locks while calling ib_unregister_device()
* Jason: use https://github.com/jgunthorpe/linux/commits/device_locking_cleanup

v1->v2:
* Have notifier blocks in usnic_ib_dev instead of using workqueue to defer
event handling.
---
 drivers/infiniband/hw/usnic/usnic_ib_main.c | 34 ++++++++++++++---------------
 1 file changed, 16 insertions(+), 18 deletions(-)

Comments

Jason Gunthorpe Feb. 8, 2019, 11:24 p.m. UTC | #1
On Fri, Feb 08, 2019 at 01:53:44PM -0800, Parvi Kaustubhi wrote:
> There is a dead lock in usnic ib_register and netdev_notify
> path.
> 
> 	usnic_ib_discover_pf()
> 	| mutex_lock(&usnic_ib_ibdev_list_lock);
> 	 | usnic_ib_device_add();
> 	  | ib_register_device()
> 	   | usnic_ib_query_port()
> 	    | mutex_lock(&us_ibdev->usdev_lock);
> 	     | ib_get_eth_speed()
> 	      | rtnl_lock()
> 
> order of lock: &usnic_ib_ibdev_list_lock -> usdev_lock -> rtnl_lock
> 
> 	rtnl_lock()
> 	 | usnic_ib_netdevice_event()
> 	  | mutex_lock(&usnic_ib_ibdev_list_lock);
> 
> order of lock: rtnl_lock -> &usnic_ib_ibdev_list_lock
> 
> Solution is to use ib_device_get_by_netdev() to lookup ib_dev while
> handling netdev/ inet events.
> 
> Signed-off-by: Parvi Kaustubhi <pkaustub@cisco.com>
> Reviewed-by: Govindarajulu Varadarajan <gvaradar@cisco.com>
> Reviewed-by: Tanmay Inamdar <tinamdar@cisco.com>
> Changelog:
> 
> v2->v3:
> * Jason: drivers should not hold any locks while calling ib_unregister_device()
> * Jason: use https://github.com/jgunthorpe/linux/commits/device_locking_cleanup
> 
> v1->v2:
> * Have notifier blocks in usnic_ib_dev instead of using workqueue to defer
> event handling.
>  drivers/infiniband/hw/usnic/usnic_ib_main.c | 34 ++++++++++++++---------------
>  1 file changed, 16 insertions(+), 18 deletions(-)
> 
> diff --git a/drivers/infiniband/hw/usnic/usnic_ib_main.c b/drivers/infiniband/hw/usnic/usnic_ib_main.c
> index 9529a08..53d6c12 100644
> +++ b/drivers/infiniband/hw/usnic/usnic_ib_main.c
> @@ -216,18 +216,17 @@ static int usnic_ib_netdevice_event(struct notifier_block *notifier,
>  					unsigned long event, void *ptr)
>  {
>  	struct usnic_ib_dev *us_ibdev;
> +	struct ib_device *ibdev;
>  
>  	struct net_device *netdev = netdev_notifier_info_to_dev(ptr);
>  
> -	mutex_lock(&usnic_ib_ibdev_list_lock);
> -	list_for_each_entry(us_ibdev, &usnic_ib_ibdev_list, ib_dev_link) {
> -		if (us_ibdev->netdev == netdev) {
> -			usnic_ib_handle_usdev_event(us_ibdev, event);
> -			break;
> -		}
> -	}
> -	mutex_unlock(&usnic_ib_ibdev_list_lock);
> -
> +	ibdev = ib_device_get_by_netdev(netdev, RDMA_DRIVER_USNIC);
> +	if (!ibdev)
> +		goto exit;
> +	else
> +		us_ibdev = container_of(ibdev, struct usnic_ib_dev, ib_dev);

Just do 

if (!ibdev)
    return NOTIFY_DONE

instead of all this wonky control flow.

> +	usnic_ib_handle_usdev_event(us_ibdev, event);

All the ib_device_get_* calls need to be paired with an
ib_device_put()

Jason
kernel test robot Feb. 9, 2019, 2:24 a.m. UTC | #2
Hi Parvi,

I love your patch! Yet something to improve:

[auto build test ERROR on rdma/for-next]
[also build test ERROR on v5.0-rc4 next-20190208]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]

url:    https://github.com/0day-ci/linux/commits/Parvi-Kaustubhi/IB-usnic-Fix-locking-when-unregistering/20190209-083958
base:   https://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma.git for-next
config: ia64-allyesconfig (attached as .config)
compiler: ia64-linux-gcc (GCC) 8.2.0
reproduce:
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # save the attached .config to linux build tree
        GCC_VERSION=8.2.0 make.cross ARCH=ia64 

All error/warnings (new ones prefixed by >>):

   drivers/infiniband/hw/usnic/usnic_ib_main.c: In function 'usnic_ib_netdevice_event':
>> drivers/infiniband/hw/usnic/usnic_ib_main.c:223:10: error: implicit declaration of function 'ib_device_get_by_netdev'; did you mean 'ib_device_try_get'? [-Werror=implicit-function-declaration]
     ibdev = ib_device_get_by_netdev(netdev, RDMA_DRIVER_USNIC);
             ^~~~~~~~~~~~~~~~~~~~~~~
             ib_device_try_get
>> drivers/infiniband/hw/usnic/usnic_ib_main.c:223:8: warning: assignment to 'struct ib_device *' from 'int' makes pointer from integer without a cast [-Wint-conversion]
     ibdev = ib_device_get_by_netdev(netdev, RDMA_DRIVER_USNIC);
           ^
   drivers/infiniband/hw/usnic/usnic_ib_main.c: In function 'usnic_ib_inetaddr_event':
   drivers/infiniband/hw/usnic/usnic_ib_main.c:286:8: warning: assignment to 'struct ib_device *' from 'int' makes pointer from integer without a cast [-Wint-conversion]
     ibdev = ib_device_get_by_netdev(netdev, RDMA_DRIVER_USNIC);
           ^
   cc1: some warnings being treated as errors

vim +223 drivers/infiniband/hw/usnic/usnic_ib_main.c

   214	
   215	static int usnic_ib_netdevice_event(struct notifier_block *notifier,
   216						unsigned long event, void *ptr)
   217	{
   218		struct usnic_ib_dev *us_ibdev;
   219		struct ib_device *ibdev;
   220	
   221		struct net_device *netdev = netdev_notifier_info_to_dev(ptr);
   222	
 > 223		ibdev = ib_device_get_by_netdev(netdev, RDMA_DRIVER_USNIC);
   224		if (!ibdev)
   225			goto exit;
   226		else
   227			us_ibdev = container_of(ibdev, struct usnic_ib_dev, ib_dev);
   228		usnic_ib_handle_usdev_event(us_ibdev, event);
   229	exit:
   230		return NOTIFY_DONE;
   231	}
   232	

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation
kernel test robot Feb. 9, 2019, 6:10 a.m. UTC | #3
Hi Parvi,

I love your patch! Perhaps something to improve:

[auto build test WARNING on rdma/for-next]
[also build test WARNING on v5.0-rc4 next-20190208]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]

url:    https://github.com/0day-ci/linux/commits/Parvi-Kaustubhi/IB-usnic-Fix-locking-when-unregistering/20190209-083958
base:   https://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma.git for-next
config: x86_64-fedora-25 (attached as .config)
compiler: gcc-7 (Debian 7.4.0-2) 7.4.0
reproduce:
        # save the attached .config to linux build tree
        make ARCH=x86_64 

All warnings (new ones prefixed by >>):

   drivers/infiniband/hw/usnic/usnic_ib_main.c: In function 'usnic_ib_netdevice_event':
   drivers/infiniband/hw/usnic/usnic_ib_main.c:223:10: error: implicit declaration of function 'ib_device_get_by_netdev'; did you mean 'ib_device_try_get'? [-Werror=implicit-function-declaration]
     ibdev = ib_device_get_by_netdev(netdev, RDMA_DRIVER_USNIC);
             ^~~~~~~~~~~~~~~~~~~~~~~
             ib_device_try_get
>> drivers/infiniband/hw/usnic/usnic_ib_main.c:223:8: warning: assignment makes pointer from integer without a cast [-Wint-conversion]
     ibdev = ib_device_get_by_netdev(netdev, RDMA_DRIVER_USNIC);
           ^
   drivers/infiniband/hw/usnic/usnic_ib_main.c: In function 'usnic_ib_inetaddr_event':
   drivers/infiniband/hw/usnic/usnic_ib_main.c:286:8: warning: assignment makes pointer from integer without a cast [-Wint-conversion]
     ibdev = ib_device_get_by_netdev(netdev, RDMA_DRIVER_USNIC);
           ^
   cc1: some warnings being treated as errors

vim +223 drivers/infiniband/hw/usnic/usnic_ib_main.c

   214	
   215	static int usnic_ib_netdevice_event(struct notifier_block *notifier,
   216						unsigned long event, void *ptr)
   217	{
   218		struct usnic_ib_dev *us_ibdev;
   219		struct ib_device *ibdev;
   220	
   221		struct net_device *netdev = netdev_notifier_info_to_dev(ptr);
   222	
 > 223		ibdev = ib_device_get_by_netdev(netdev, RDMA_DRIVER_USNIC);
   224		if (!ibdev)
   225			goto exit;
   226		else
   227			us_ibdev = container_of(ibdev, struct usnic_ib_dev, ib_dev);
   228		usnic_ib_handle_usdev_event(us_ibdev, event);
   229	exit:
   230		return NOTIFY_DONE;
   231	}
   232	

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation
diff mbox series

Patch

diff --git a/drivers/infiniband/hw/usnic/usnic_ib_main.c b/drivers/infiniband/hw/usnic/usnic_ib_main.c
index 9529a08..53d6c12 100644
--- a/drivers/infiniband/hw/usnic/usnic_ib_main.c
+++ b/drivers/infiniband/hw/usnic/usnic_ib_main.c
@@ -216,18 +216,17 @@  static int usnic_ib_netdevice_event(struct notifier_block *notifier,
 					unsigned long event, void *ptr)
 {
 	struct usnic_ib_dev *us_ibdev;
+	struct ib_device *ibdev;
 
 	struct net_device *netdev = netdev_notifier_info_to_dev(ptr);
 
-	mutex_lock(&usnic_ib_ibdev_list_lock);
-	list_for_each_entry(us_ibdev, &usnic_ib_ibdev_list, ib_dev_link) {
-		if (us_ibdev->netdev == netdev) {
-			usnic_ib_handle_usdev_event(us_ibdev, event);
-			break;
-		}
-	}
-	mutex_unlock(&usnic_ib_ibdev_list_lock);
-
+	ibdev = ib_device_get_by_netdev(netdev, RDMA_DRIVER_USNIC);
+	if (!ibdev)
+		goto exit;
+	else
+		us_ibdev = container_of(ibdev, struct usnic_ib_dev, ib_dev);
+	usnic_ib_handle_usdev_event(us_ibdev, event);
+exit:
 	return NOTIFY_DONE;
 }
 
@@ -282,16 +281,15 @@  static int usnic_ib_inetaddr_event(struct notifier_block *notifier,
 	struct usnic_ib_dev *us_ibdev;
 	struct in_ifaddr *ifa = ptr;
 	struct net_device *netdev = ifa->ifa_dev->dev;
+	struct ib_device *ibdev;
 
-	mutex_lock(&usnic_ib_ibdev_list_lock);
-	list_for_each_entry(us_ibdev, &usnic_ib_ibdev_list, ib_dev_link) {
-		if (us_ibdev->netdev == netdev) {
-			usnic_ib_handle_inet_event(us_ibdev, event, ptr);
-			break;
-		}
-	}
-	mutex_unlock(&usnic_ib_ibdev_list_lock);
-
+	ibdev = ib_device_get_by_netdev(netdev, RDMA_DRIVER_USNIC);
+	if (!ibdev)
+		goto exit;
+	else
+		us_ibdev = container_of(ibdev, struct usnic_ib_dev, ib_dev);
+	usnic_ib_handle_inet_event(us_ibdev, event, ptr);
+exit:
 	return NOTIFY_DONE;
 }
 static struct notifier_block usnic_ib_inetaddr_notifier = {