diff mbox series

[for-next,v2,2/3] RDMA/bnxt_re: Refactor device add/remove functionalities

Message ID 1582541395-19409-3-git-send-email-selvin.xavier@broadcom.com (mailing list archive)
State Superseded
Headers show
Series RDMA/bnxt_re driver update | expand

Commit Message

Selvin Xavier Feb. 24, 2020, 10:49 a.m. UTC
- bnxt_re_ib_reg() handles two main functionalities - initializing
   the device and registering with the IB stack.  Split it into 2
   functions i.e. bnxt_re_dev_init() and bnxt_re_ib_init()  to account
   for the same thereby improve modularity. Do the same for
   bnxt_re_ib_unreg()i.e. split into two functions - bnxt_re_dev_uninit()
   and  bnxt_re_ib_uninit().
 - Simplify the code by combining the different steps to add and
   remove the device into two functions.

Signed-off-by: Selvin Xavier <selvin.xavier@broadcom.com>
---
 drivers/infiniband/hw/bnxt_re/main.c | 135 ++++++++++++++++++++---------------
 1 file changed, 78 insertions(+), 57 deletions(-)

Comments

Jason Gunthorpe Feb. 24, 2020, 1:53 p.m. UTC | #1
>  static void bnxt_re_stop_irq(void *handle)
> @@ -1317,7 +1320,37 @@ static void bnxt_re_query_hwrm_intf_version(struct bnxt_re_dev *rdev)
>  		le16_to_cpu(resp.hwrm_intf_patch);
>  }
>  
> -static void bnxt_re_ib_unreg(struct bnxt_re_dev *rdev)
> +static void bnxt_re_ib_uninit(struct bnxt_re_dev *rdev)
> +{
> +	/* Cleanup ib dev */
> +	if (test_bit(BNXT_RE_FLAG_IBDEV_REGISTERED, &rdev->flags)) {
> +		ib_unregister_device(&rdev->ibdev);
> +		clear_bit(BNXT_RE_FLAG_IBDEV_REGISTERED, &rdev->flags);
> +	}

The reason ib_unregister_device_queued exists is because you can't
call unregistration while holding RTNL.

>  	case NETDEV_UNREGISTER:
> @@ -1704,9 +1727,8 @@ static int bnxt_re_netdev_event(struct notifier_block *notifier,
>  		 */
>  		if (atomic_read(&rdev->sched_count) > 0)
>  			goto exit;
> -		bnxt_re_ib_unreg(rdev);
> -		bnxt_re_remove_one(rdev);
> -		bnxt_re_dev_unreg(rdev);
> +		bnxt_re_ib_uninit(rdev);
> +		bnxt_re_remove_device(rdev);
>  		break;

ie here.

This *must* simply be a call to ib_unregister_device_queued() and all
this other stuff has to go into the dealloc.

As written this is all kinds of deadlocking and racy

Jason
Selvin Xavier Feb. 25, 2020, 4:22 a.m. UTC | #2
On Mon, Feb 24, 2020 at 7:24 PM Jason Gunthorpe <jgg@mellanox.com> wrote:
>
> >  static void bnxt_re_stop_irq(void *handle)
> > @@ -1317,7 +1320,37 @@ static void bnxt_re_query_hwrm_intf_version(struct bnxt_re_dev *rdev)
> >               le16_to_cpu(resp.hwrm_intf_patch);
> >  }
> >
> > -static void bnxt_re_ib_unreg(struct bnxt_re_dev *rdev)
> > +static void bnxt_re_ib_uninit(struct bnxt_re_dev *rdev)
> > +{
> > +     /* Cleanup ib dev */
> > +     if (test_bit(BNXT_RE_FLAG_IBDEV_REGISTERED, &rdev->flags)) {
> > +             ib_unregister_device(&rdev->ibdev);
> > +             clear_bit(BNXT_RE_FLAG_IBDEV_REGISTERED, &rdev->flags);
> > +     }
>
> The reason ib_unregister_device_queued exists is because you can't
> call unregistration while holding RTNL.
>
> >       case NETDEV_UNREGISTER:
> > @@ -1704,9 +1727,8 @@ static int bnxt_re_netdev_event(struct notifier_block *notifier,
> >                */
> >               if (atomic_read(&rdev->sched_count) > 0)
> >                       goto exit;
> > -             bnxt_re_ib_unreg(rdev);
> > -             bnxt_re_remove_one(rdev);
> > -             bnxt_re_dev_unreg(rdev);
> > +             bnxt_re_ib_uninit(rdev);
> > +             bnxt_re_remove_device(rdev);
> >               break;
>
> ie here.
>
> This *must* simply be a call to ib_unregister_device_queued() and all
> this other stuff has to go into the dealloc.
>
> As written this is all kinds of deadlocking and racy
>

This patch (patch 2 of this series) was to refactor the existing code and
group the reg/unreg operations as bnxt_re_add_device/bnxt_re_remove_device.  The
third patch in this series is introducing the dealloc_driver hook and
changing the code as
you suggested by calling ib_unregister_device_queued/ ib_unregister_driver.
I didn't want to squash these two patches together.

-Selvin
> Jason
diff mbox series

Patch

diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c
index 1402130..3a548b1 100644
--- a/drivers/infiniband/hw/bnxt_re/main.c
+++ b/drivers/infiniband/hw/bnxt_re/main.c
@@ -78,7 +78,8 @@  static struct list_head bnxt_re_dev_list = LIST_HEAD_INIT(bnxt_re_dev_list);
 /* Mutex to protect the list of bnxt_re devices added */
 static DEFINE_MUTEX(bnxt_re_dev_lock);
 static struct workqueue_struct *bnxt_re_wq;
-static void bnxt_re_ib_unreg(struct bnxt_re_dev *rdev);
+static void bnxt_re_remove_device(struct bnxt_re_dev *rdev);
+static void bnxt_re_ib_uninit(struct bnxt_re_dev *rdev);
 
 static void bnxt_re_destroy_chip_ctx(struct bnxt_re_dev *rdev)
 {
@@ -237,7 +238,9 @@  static void bnxt_re_shutdown(void *p)
 	if (!rdev)
 		return;
 
-	bnxt_re_ib_unreg(rdev);
+	bnxt_re_ib_uninit(rdev);
+	ASSERT_RTNL();
+	bnxt_re_remove_device(rdev);
 }
 
 static void bnxt_re_stop_irq(void *handle)
@@ -1317,7 +1320,37 @@  static void bnxt_re_query_hwrm_intf_version(struct bnxt_re_dev *rdev)
 		le16_to_cpu(resp.hwrm_intf_patch);
 }
 
-static void bnxt_re_ib_unreg(struct bnxt_re_dev *rdev)
+static void bnxt_re_ib_uninit(struct bnxt_re_dev *rdev)
+{
+	/* Cleanup ib dev */
+	if (test_bit(BNXT_RE_FLAG_IBDEV_REGISTERED, &rdev->flags)) {
+		ib_unregister_device(&rdev->ibdev);
+		clear_bit(BNXT_RE_FLAG_IBDEV_REGISTERED, &rdev->flags);
+	}
+}
+
+int bnxt_re_ib_init(struct bnxt_re_dev *rdev)
+{
+	int rc = 0;
+
+	/* Register ib dev */
+	rc = bnxt_re_register_ib(rdev);
+	if (rc) {
+		pr_err("Failed to register with IB: %#x\n", rc);
+		return rc;
+	}
+	set_bit(BNXT_RE_FLAG_IBDEV_REGISTERED, &rdev->flags);
+	dev_info(rdev_to_dev(rdev), "Device registered successfully");
+	ib_get_eth_speed(&rdev->ibdev, 1, &rdev->active_speed,
+			 &rdev->active_width);
+	set_bit(BNXT_RE_FLAG_ISSUE_ROCE_STATS, &rdev->flags);
+	bnxt_re_dispatch_event(&rdev->ibdev, NULL, 1, IB_EVENT_PORT_ACTIVE);
+	bnxt_re_dispatch_event(&rdev->ibdev, NULL, 1, IB_EVENT_GID_CHANGE);
+
+	return rc;
+}
+
+static void bnxt_re_dev_uninit(struct bnxt_re_dev *rdev)
 {
 	u8 type;
 	int rc;
@@ -1378,20 +1411,15 @@  static void bnxt_re_worker(struct work_struct *work)
 	schedule_delayed_work(&rdev->worker, msecs_to_jiffies(30000));
 }
 
-static int bnxt_re_ib_reg(struct bnxt_re_dev *rdev)
+static int bnxt_re_dev_init(struct bnxt_re_dev *rdev)
 {
 	struct bnxt_qplib_creq_ctx *creq;
 	struct bnxt_re_ring_attr rattr;
 	u32 db_offt;
-	bool locked;
 	int vid;
 	u8 type;
 	int rc;
 
-	/* Acquire rtnl lock through out this function */
-	rtnl_lock();
-	locked = true;
-
 	/* Registered a new RoCE device instance to netdev */
 	memset(&rattr, 0, sizeof(rattr));
 	rc = bnxt_re_register_netdev(rdev);
@@ -1524,30 +1552,10 @@  static int bnxt_re_ib_reg(struct bnxt_re_dev *rdev)
 		schedule_delayed_work(&rdev->worker, msecs_to_jiffies(30000));
 	}
 
-	rtnl_unlock();
-	locked = false;
-
-	/* Register ib dev */
-	rc = bnxt_re_register_ib(rdev);
-	if (rc) {
-		ibdev_err(&rdev->ibdev,
-			  "Failed to register with IB: %#x\n", rc);
-		goto fail;
-	}
-	set_bit(BNXT_RE_FLAG_IBDEV_REGISTERED, &rdev->flags);
-	ibdev_info(&rdev->ibdev, "Device registered successfully");
-	ib_get_eth_speed(&rdev->ibdev, 1, &rdev->active_speed,
-			 &rdev->active_width);
-	set_bit(BNXT_RE_FLAG_ISSUE_ROCE_STATS, &rdev->flags);
-	bnxt_re_dispatch_event(&rdev->ibdev, NULL, 1, IB_EVENT_PORT_ACTIVE);
-
 	return 0;
 fail:
-	if (!locked)
-		rtnl_lock();
-	bnxt_re_ib_unreg(rdev);
-	rtnl_unlock();
 
+	bnxt_re_dev_uninit(rdev);
 	return rc;
 }
 
@@ -1588,9 +1596,35 @@  static int bnxt_re_dev_reg(struct bnxt_re_dev **rdev, struct net_device *netdev)
 	return rc;
 }
 
-static void bnxt_re_remove_one(struct bnxt_re_dev *rdev)
+static void bnxt_re_remove_device(struct bnxt_re_dev *rdev)
 {
+	bnxt_re_dev_uninit(rdev);
 	pci_dev_put(rdev->en_dev->pdev);
+	bnxt_re_dev_unreg(rdev);
+}
+
+static int bnxt_re_add_device(struct bnxt_re_dev **rdev,
+			      struct net_device *netdev)
+{
+	int rc;
+
+	rc = bnxt_re_dev_reg(rdev, netdev);
+	if (rc == -ENODEV)
+		return rc;
+	if (rc) {
+		pr_err("Failed to register with the device %s: %#x\n",
+		       netdev->name, rc);
+		return rc;
+	}
+
+	pci_dev_get((*rdev)->en_dev->pdev);
+	rc = bnxt_re_dev_init(*rdev);
+	if (rc) {
+		pci_dev_put((*rdev)->en_dev->pdev);
+		bnxt_re_dev_unreg(*rdev);
+	}
+
+	return rc;
 }
 
 /* Handle all deferred netevents tasks */
@@ -1605,16 +1639,17 @@  static void bnxt_re_task(struct work_struct *work)
 
 	if (re_work->event != NETDEV_REGISTER &&
 	    !test_bit(BNXT_RE_FLAG_IBDEV_REGISTERED, &rdev->flags))
-		return;
+		goto done;
 
 	switch (re_work->event) {
 	case NETDEV_REGISTER:
-		rc = bnxt_re_ib_reg(rdev);
+		rc = bnxt_re_ib_init(rdev);
 		if (rc) {
 			ibdev_err(&rdev->ibdev,
 				  "Failed to register with IB: %#x", rc);
-			bnxt_re_remove_one(rdev);
-			bnxt_re_dev_unreg(rdev);
+			rtnl_lock();
+			bnxt_re_remove_device(rdev);
+			rtnl_unlock();
 			goto exit;
 		}
 		break;
@@ -1637,17 +1672,13 @@  static void bnxt_re_task(struct work_struct *work)
 	default:
 		break;
 	}
+done:
 	smp_mb__before_atomic();
 	atomic_dec(&rdev->sched_count);
 exit:
 	kfree(re_work);
 }
 
-static void bnxt_re_init_one(struct bnxt_re_dev *rdev)
-{
-	pci_dev_get(rdev->en_dev->pdev);
-}
-
 /*
  * "Notifier chain callback can be invoked for the same chain from
  * different CPUs at the same time".
@@ -1685,17 +1716,9 @@  static int bnxt_re_netdev_event(struct notifier_block *notifier,
 	case NETDEV_REGISTER:
 		if (rdev)
 			break;
-		rc = bnxt_re_dev_reg(&rdev, real_dev);
-		if (rc == -ENODEV)
-			break;
-		if (rc) {
-			ibdev_err(&rdev->ibdev,
-				  "Failed to register with the device %s: %#x\n",
-				  real_dev->name, rc);
-			break;
-		}
-		bnxt_re_init_one(rdev);
-		sch_work = true;
+		rc = bnxt_re_add_device(&rdev, real_dev);
+		if (!rc)
+			sch_work = true;
 		break;
 
 	case NETDEV_UNREGISTER:
@@ -1704,9 +1727,8 @@  static int bnxt_re_netdev_event(struct notifier_block *notifier,
 		 */
 		if (atomic_read(&rdev->sched_count) > 0)
 			goto exit;
-		bnxt_re_ib_unreg(rdev);
-		bnxt_re_remove_one(rdev);
-		bnxt_re_dev_unreg(rdev);
+		bnxt_re_ib_uninit(rdev);
+		bnxt_re_remove_device(rdev);
 		break;
 
 	default:
@@ -1783,12 +1805,11 @@  static void __exit bnxt_re_mod_exit(void)
 		 */
 		flush_workqueue(bnxt_re_wq);
 		bnxt_re_dev_stop(rdev);
+		bnxt_re_ib_uninit(rdev);
 		/* Acquire the rtnl_lock as the L2 resources are freed here */
 		rtnl_lock();
-		bnxt_re_ib_unreg(rdev);
+		bnxt_re_remove_device(rdev);
 		rtnl_unlock();
-		bnxt_re_remove_one(rdev);
-		bnxt_re_dev_unreg(rdev);
 	}
 	unregister_netdevice_notifier(&bnxt_re_netdev_notifier);
 	if (bnxt_re_wq)