diff mbox series

[rdma-next,v5,2/2] RDMA/mana_ib: Handle net event for pointing to the current netdev

Message ID 1741289079-18744-2-git-send-email-longli@linuxonhyperv.com (mailing list archive)
State New
Headers show
Series [rdma-next,v5,1/2] net: mana: Change the function signature of mana_get_primary_netdev_rcu | expand

Commit Message

Long Li March 6, 2025, 7:24 p.m. UTC
From: Long Li <longli@microsoft.com>

When running under Hyper-V, the master device to the RDMA device is always
bonded to this RDMA device. This is not user-configurable.

The master device can be unbind/bind from the kernel. During those events,
the RDMA device should set to the current netdev to reflect the change of
master device from those events.

Signed-off-by: Long Li <longli@microsoft.com>
---
Changes
v2: Add missing error handling when register_netdevice_notifier() fails.
v3: Change mana_get_primary_netdev() to return with netdev refcount held.
v4: use netdev_put().
v5: use netdevice_tracker for netdev_hold()/netdev_put().

 drivers/infiniband/hw/mana/device.c  | 47 ++++++++++++++++++++++++++--
 drivers/infiniband/hw/mana/mana_ib.h |  1 +
 2 files changed, 46 insertions(+), 2 deletions(-)

Comments

Jason Gunthorpe March 6, 2025, 7:53 p.m. UTC | #1
On Thu, Mar 06, 2025 at 11:24:39AM -0800, longli@linuxonhyperv.com wrote:
> +	switch (event) {
> +	case NETDEV_CHANGEUPPER:
> +		ndev = mana_get_primary_netdev(mc, 0, &dev->dev_tracker);
> +		/*
> +		 * RDMA core will setup GID based on updated netdev.
> +		 * It's not possible to race with the core as rtnl lock is being
> +		 * held.
> +		 */
> +		ib_device_set_netdev(&dev->ib_dev, ndev, 1);
> +
> +		/* mana_get_primary_netdev() returns ndev with refcount held */
> +		netdev_put(ndev, &dev->dev_tracker);

? What is the point of a tracker in dev if it never lasts outside this
scope?

ib_device_set_netdev() already has a tracker built into it.

Jason
Long Li March 6, 2025, 8:01 p.m. UTC | #2
> Subject: [EXTERNAL] Re: [patch rdma-next v5 2/2] RDMA/mana_ib: Handle net
> event for pointing to the current netdev
> 
> On Thu, Mar 06, 2025 at 11:24:39AM -0800, longli@linuxonhyperv.com wrote:
> > +	switch (event) {
> > +	case NETDEV_CHANGEUPPER:
> > +		ndev = mana_get_primary_netdev(mc, 0, &dev->dev_tracker);
> > +		/*
> > +		 * RDMA core will setup GID based on updated netdev.
> > +		 * It's not possible to race with the core as rtnl lock is being
> > +		 * held.
> > +		 */
> > +		ib_device_set_netdev(&dev->ib_dev, ndev, 1);
> > +
> > +		/* mana_get_primary_netdev() returns ndev with refcount held
> */
> > +		netdev_put(ndev, &dev->dev_tracker);
> 
> ? What is the point of a tracker in dev if it never lasts outside this scope?
> 
> ib_device_set_netdev() already has a tracker built into it.
> 
> Jason

I was asked to use a tracker for netdev_hold()/netdev_put(). But this code (and the code in mana_ib_probe() of the 1st patch) is simple enough that everything is done in one scope.

Jakub, do you think it's okay to use NULL as the tracker in both patches?

Long
Long Li March 10, 2025, 9:46 p.m. UTC | #3
> Subject: RE: [EXTERNAL] Re: [patch rdma-next v5 2/2] RDMA/mana_ib:
> Handle net event for pointing to the current netdev
> 
> > Subject: [EXTERNAL] Re: [patch rdma-next v5 2/2] RDMA/mana_ib: Handle
> > net event for pointing to the current netdev
> >
> > On Thu, Mar 06, 2025 at 11:24:39AM -0800, longli@linuxonhyperv.com
> wrote:
> > > +	switch (event) {
> > > +	case NETDEV_CHANGEUPPER:
> > > +		ndev = mana_get_primary_netdev(mc, 0, &dev->dev_tracker);
> > > +		/*
> > > +		 * RDMA core will setup GID based on updated netdev.
> > > +		 * It's not possible to race with the core as rtnl lock is being
> > > +		 * held.
> > > +		 */
> > > +		ib_device_set_netdev(&dev->ib_dev, ndev, 1);
> > > +
> > > +		/* mana_get_primary_netdev() returns ndev with refcount
> held
> > */
> > > +		netdev_put(ndev, &dev->dev_tracker);
> >
> > ? What is the point of a tracker in dev if it never lasts outside this scope?
> >
> > ib_device_set_netdev() already has a tracker built into it.
> >
> > Jason
> 
> I was asked to use a tracker for netdev_hold()/netdev_put(). But this code
> (and the code in mana_ib_probe() of the 1st patch) is simple enough that
> everything is done in one scope.
> 
> Jakub, do you think it's okay to use NULL as the tracker in both patches?
> 
> Long

Hi, 

If we don't want to use a tracker, can we take the v4 version of the patch set?

Otherwise, please take v5 (this patch) if a tracker is required.

Thanks,
Long
diff mbox series

Patch

diff --git a/drivers/infiniband/hw/mana/device.c b/drivers/infiniband/hw/mana/device.c
index 363566095501..b0b866b574a0 100644
--- a/drivers/infiniband/hw/mana/device.c
+++ b/drivers/infiniband/hw/mana/device.c
@@ -51,6 +51,38 @@  static const struct ib_device_ops mana_ib_dev_ops = {
 			   ib_ind_table),
 };
 
+static int mana_ib_netdev_event(struct notifier_block *this,
+				unsigned long event, void *ptr)
+{
+	struct mana_ib_dev *dev = container_of(this, struct mana_ib_dev, nb);
+	struct net_device *event_dev = netdev_notifier_info_to_dev(ptr);
+	struct gdma_context *gc = dev->gdma_dev->gdma_context;
+	struct mana_context *mc = gc->mana.driver_data;
+	struct net_device *ndev;
+
+	/* Only process events from our parent device */
+	if (event_dev != mc->ports[0])
+		return NOTIFY_DONE;
+
+	switch (event) {
+	case NETDEV_CHANGEUPPER:
+		ndev = mana_get_primary_netdev(mc, 0, &dev->dev_tracker);
+		/*
+		 * RDMA core will setup GID based on updated netdev.
+		 * It's not possible to race with the core as rtnl lock is being
+		 * held.
+		 */
+		ib_device_set_netdev(&dev->ib_dev, ndev, 1);
+
+		/* mana_get_primary_netdev() returns ndev with refcount held */
+		netdev_put(ndev, &dev->dev_tracker);
+
+		return NOTIFY_OK;
+	default:
+		return NOTIFY_DONE;
+	}
+}
+
 static int mana_ib_probe(struct auxiliary_device *adev,
 			 const struct auxiliary_device_id *id)
 {
@@ -108,17 +140,25 @@  static int mana_ib_probe(struct auxiliary_device *adev,
 	}
 	dev->gdma_dev = &mdev->gdma_context->mana_ib;
 
+	dev->nb.notifier_call = mana_ib_netdev_event;
+	ret = register_netdevice_notifier(&dev->nb);
+	if (ret) {
+		ibdev_err(&dev->ib_dev, "Failed to register net notifier, %d",
+			  ret);
+		goto deregister_device;
+	}
+
 	ret = mana_ib_gd_query_adapter_caps(dev);
 	if (ret) {
 		ibdev_err(&dev->ib_dev, "Failed to query device caps, ret %d",
 			  ret);
-		goto deregister_device;
+		goto deregister_net_notifier;
 	}
 
 	ret = mana_ib_create_eqs(dev);
 	if (ret) {
 		ibdev_err(&dev->ib_dev, "Failed to create EQs, ret %d", ret);
-		goto deregister_device;
+		goto deregister_net_notifier;
 	}
 
 	ret = mana_ib_gd_create_rnic_adapter(dev);
@@ -147,6 +187,8 @@  static int mana_ib_probe(struct auxiliary_device *adev,
 	mana_ib_gd_destroy_rnic_adapter(dev);
 destroy_eqs:
 	mana_ib_destroy_eqs(dev);
+deregister_net_notifier:
+	unregister_netdevice_notifier(&dev->nb);
 deregister_device:
 	mana_gd_deregister_device(dev->gdma_dev);
 free_ib_device:
@@ -162,6 +204,7 @@  static void mana_ib_remove(struct auxiliary_device *adev)
 	xa_destroy(&dev->qp_table_wq);
 	mana_ib_gd_destroy_rnic_adapter(dev);
 	mana_ib_destroy_eqs(dev);
+	unregister_netdevice_notifier(&dev->nb);
 	mana_gd_deregister_device(dev->gdma_dev);
 	ib_dealloc_device(&dev->ib_dev);
 }
diff --git a/drivers/infiniband/hw/mana/mana_ib.h b/drivers/infiniband/hw/mana/mana_ib.h
index 2638688f2505..bb9c6b1af24e 100644
--- a/drivers/infiniband/hw/mana/mana_ib.h
+++ b/drivers/infiniband/hw/mana/mana_ib.h
@@ -65,6 +65,7 @@  struct mana_ib_dev {
 	struct xarray qp_table_wq;
 	struct mana_ib_adapter_caps adapter_caps;
 	netdevice_tracker dev_tracker;
+	struct notifier_block nb;
 };
 
 struct mana_ib_wq {