diff mbox series

[rdma-next] bnxt_re: Rely on Kconfig to keep module dependency

Message ID 20210324142524.1135319-1-leon@kernel.org (mailing list archive)
State Superseded
Headers show
Series [rdma-next] bnxt_re: Rely on Kconfig to keep module dependency | expand

Commit Message

Leon Romanovsky March 24, 2021, 2:25 p.m. UTC
From: Leon Romanovsky <leonro@nvidia.com>

Instead of manually messing with parent driver module reference
counting, rely on "depends on" keyword to ensure that proper
probe/remove chain is performed.

Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
---
 drivers/infiniband/hw/bnxt_re/Kconfig |  4 +---
 drivers/infiniband/hw/bnxt_re/main.c  | 20 +++++---------------
 2 files changed, 6 insertions(+), 18 deletions(-)

Comments

Jason Gunthorpe March 24, 2021, 3:07 p.m. UTC | #1
On Wed, Mar 24, 2021 at 04:25:24PM +0200, Leon Romanovsky wrote:
> From: Leon Romanovsky <leonro@nvidia.com>
> 
> Instead of manually messing with parent driver module reference
> counting, rely on "depends on" keyword to ensure that proper
> probe/remove chain is performed.

?? kconfig doesn't impact module ordering.

To have a proper remove chain there should be a symbol reference from
bnxt_re to whatever the other module is

> Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
>  drivers/infiniband/hw/bnxt_re/Kconfig |  4 +---
>  drivers/infiniband/hw/bnxt_re/main.c  | 20 +++++---------------
>  2 files changed, 6 insertions(+), 18 deletions(-)
> 
> diff --git a/drivers/infiniband/hw/bnxt_re/Kconfig b/drivers/infiniband/hw/bnxt_re/Kconfig
> index 0feac5132ce1..b4779a6cd565 100644
> +++ b/drivers/infiniband/hw/bnxt_re/Kconfig
> @@ -2,9 +2,7 @@
>  config INFINIBAND_BNXT_RE
>  	tristate "Broadcom Netxtreme HCA support"
>  	depends on 64BIT
> -	depends on ETHERNET && NETDEVICES && PCI && INET && DCB
> -	select NET_VENDOR_BROADCOM
> -	select BNXT
> +	depends on ETHERNET && NETDEVICES && PCI && INET && DCB && BNXT

Though this is correct, BNXT is a 'tristate' so it should be
referenced with depends on select.

> diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c
> index fdb8c2478258..a81adb07e5d9 100644
> +++ b/drivers/infiniband/hw/bnxt_re/main.c
> @@ -561,13 +561,6 @@ static struct bnxt_re_dev *bnxt_re_from_netdev(struct net_device *netdev)
>  	return container_of(ibdev, struct bnxt_re_dev, ibdev);
>  }
>  
> -static void bnxt_re_dev_unprobe(struct net_device *netdev,
> -				struct bnxt_en_dev *en_dev)
> -{
> -	dev_put(netdev);
> -	module_put(en_dev->pdev->driver->driver.owner);
> -}

And you are right to be wondering WTF is this

Jason
Leon Romanovsky March 24, 2021, 3:16 p.m. UTC | #2
On Wed, Mar 24, 2021 at 12:07:59PM -0300, Jason Gunthorpe wrote:
> On Wed, Mar 24, 2021 at 04:25:24PM +0200, Leon Romanovsky wrote:
> > From: Leon Romanovsky <leonro@nvidia.com>
> > 
> > Instead of manually messing with parent driver module reference
> > counting, rely on "depends on" keyword to ensure that proper
> > probe/remove chain is performed.
> 
> ?? kconfig doesn't impact module ordering.

Yeah, I was fast with the typing.

> 
> To have a proper remove chain there should be a symbol reference from
> bnxt_re to whatever the other module is

Right, they have probe_ulp() calls or something.

> 
> > Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
> >  drivers/infiniband/hw/bnxt_re/Kconfig |  4 +---
> >  drivers/infiniband/hw/bnxt_re/main.c  | 20 +++++---------------
> >  2 files changed, 6 insertions(+), 18 deletions(-)
> > 
> > diff --git a/drivers/infiniband/hw/bnxt_re/Kconfig b/drivers/infiniband/hw/bnxt_re/Kconfig
> > index 0feac5132ce1..b4779a6cd565 100644
> > +++ b/drivers/infiniband/hw/bnxt_re/Kconfig
> > @@ -2,9 +2,7 @@
> >  config INFINIBAND_BNXT_RE
> >  	tristate "Broadcom Netxtreme HCA support"
> >  	depends on 64BIT
> > -	depends on ETHERNET && NETDEVICES && PCI && INET && DCB
> > -	select NET_VENDOR_BROADCOM
> > -	select BNXT
> > +	depends on ETHERNET && NETDEVICES && PCI && INET && DCB && BNXT
> 
> Though this is correct, BNXT is a 'tristate' so it should be
> referenced with depends on select.
> 
> > diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c
> > index fdb8c2478258..a81adb07e5d9 100644
> > +++ b/drivers/infiniband/hw/bnxt_re/main.c
> > @@ -561,13 +561,6 @@ static struct bnxt_re_dev *bnxt_re_from_netdev(struct net_device *netdev)
> >  	return container_of(ibdev, struct bnxt_re_dev, ibdev);
> >  }
> >  
> > -static void bnxt_re_dev_unprobe(struct net_device *netdev,
> > -				struct bnxt_en_dev *en_dev)
> > -{
> > -	dev_put(netdev);
> > -	module_put(en_dev->pdev->driver->driver.owner);
> > -}
> 
> And you are right to be wondering WTF is this
> 
> Jason
Devesh Sharma March 24, 2021, 4:30 p.m. UTC | #3
On Wed, Mar 24, 2021 at 8:46 PM Leon Romanovsky <leon@kernel.org> wrote:
>
> On Wed, Mar 24, 2021 at 12:07:59PM -0300, Jason Gunthorpe wrote:
> > On Wed, Mar 24, 2021 at 04:25:24PM +0200, Leon Romanovsky wrote:
> > > From: Leon Romanovsky <leonro@nvidia.com>
> > >
> > > Instead of manually messing with parent driver module reference
> > > counting, rely on "depends on" keyword to ensure that proper
> > > probe/remove chain is performed.
> >
> > ?? kconfig doesn't impact module ordering.
>
> Yeah, I was fast with the typing.
>
> >
> > To have a proper remove chain there should be a symbol reference from
> > bnxt_re to whatever the other module is
>
> Right, they have probe_ulp() calls or something.
>
> >
> > > Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
> > >  drivers/infiniband/hw/bnxt_re/Kconfig |  4 +---
> > >  drivers/infiniband/hw/bnxt_re/main.c  | 20 +++++---------------
> > >  2 files changed, 6 insertions(+), 18 deletions(-)
> > >
> > > diff --git a/drivers/infiniband/hw/bnxt_re/Kconfig b/drivers/infiniband/hw/bnxt_re/Kconfig
> > > index 0feac5132ce1..b4779a6cd565 100644
> > > +++ b/drivers/infiniband/hw/bnxt_re/Kconfig
> > > @@ -2,9 +2,7 @@
> > >  config INFINIBAND_BNXT_RE
> > >     tristate "Broadcom Netxtreme HCA support"
> > >     depends on 64BIT
> > > -   depends on ETHERNET && NETDEVICES && PCI && INET && DCB
> > > -   select NET_VENDOR_BROADCOM
> > > -   select BNXT
> > > +   depends on ETHERNET && NETDEVICES && PCI && INET && DCB && BNXT
> >
> > Though this is correct, BNXT is a 'tristate' so it should be
> > referenced with depends on select.
> >
> > > diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c
> > > index fdb8c2478258..a81adb07e5d9 100644
> > > +++ b/drivers/infiniband/hw/bnxt_re/main.c
> > > @@ -561,13 +561,6 @@ static struct bnxt_re_dev *bnxt_re_from_netdev(struct net_device *netdev)
> > >     return container_of(ibdev, struct bnxt_re_dev, ibdev);
> > >  }
> > >
> > > -static void bnxt_re_dev_unprobe(struct net_device *netdev,
> > > -                           struct bnxt_en_dev *en_dev)
> > > -{
> > > -   dev_put(netdev);
> > > -   module_put(en_dev->pdev->driver->driver.owner);
> > > -}
> >
> > And you are right to be wondering WTF is this
> >
> > Jason

Hi Leon and Jason,

Still trying to understand but what's the big idea here may be I can help.
Jason Gunthorpe March 24, 2021, 4:56 p.m. UTC | #4
On Wed, Mar 24, 2021 at 10:00:05PM +0530, Devesh Sharma wrote:

> > > > -static void bnxt_re_dev_unprobe(struct net_device *netdev,
> > > > -                           struct bnxt_en_dev *en_dev)
> > > > -{
> > > > -   dev_put(netdev);
> > > > -   module_put(en_dev->pdev->driver->driver.owner);
> > > > -}
> > >
> > > And you are right to be wondering WTF is this
> 
> Still trying to understand but what's the big idea here may be I can help.

A driver should not have module put things like the above

It should not be accessing ->driver without holding the device_lock()

Basically it is all nonsense coding, Leon suggests to delete it and he
is probably right.

Can you explain what it thinks it is doing?

Jason
Devesh Sharma March 24, 2021, 5:24 p.m. UTC | #5
On Wed, Mar 24, 2021 at 10:26 PM Jason Gunthorpe <jgg@nvidia.com> wrote:
>
> On Wed, Mar 24, 2021 at 10:00:05PM +0530, Devesh Sharma wrote:
>
> > > > > -static void bnxt_re_dev_unprobe(struct net_device *netdev,
> > > > > -                           struct bnxt_en_dev *en_dev)
> > > > > -{
> > > > > -   dev_put(netdev);
> > > > > -   module_put(en_dev->pdev->driver->driver.owner);
> > > > > -}
> > > >
> > > > And you are right to be wondering WTF is this
> >
> > Still trying to understand but what's the big idea here may be I can help.
>
> A driver should not have module put things like the above
>
> It should not be accessing ->driver without holding the device_lock()
>
> Basically it is all nonsense coding, Leon suggests to delete it and he
> is probably right.
>
> Can you explain what it thinks it is doing?
That F'ed up  code is trying to prevent a situation where someone
tries to remove the bnxt_en driver while bnxt_re driver is using it.
All because bnxt_re driver is at the mercy of bnxt_en drive and there
is not symbole dependence, Do you suggest anything to prevent that
unload of bnxt_en other than doing this jargon. I did not follow how
DEPENDS_ON would prevent things.
>
> Jason
Jason Gunthorpe March 24, 2021, 5:35 p.m. UTC | #6
On Wed, Mar 24, 2021 at 10:54:58PM +0530, Devesh Sharma wrote:
> On Wed, Mar 24, 2021 at 10:26 PM Jason Gunthorpe <jgg@nvidia.com> wrote:
> >
> > On Wed, Mar 24, 2021 at 10:00:05PM +0530, Devesh Sharma wrote:
> >
> > > > > > -static void bnxt_re_dev_unprobe(struct net_device *netdev,
> > > > > > -                           struct bnxt_en_dev *en_dev)
> > > > > > -{
> > > > > > -   dev_put(netdev);
> > > > > > -   module_put(en_dev->pdev->driver->driver.owner);
> > > > > > -}
> > > > >
> > > > > And you are right to be wondering WTF is this
> > >
> > > Still trying to understand but what's the big idea here may be I can help.
> >
> > A driver should not have module put things like the above
> >
> > It should not be accessing ->driver without holding the device_lock()
> >
> > Basically it is all nonsense coding, Leon suggests to delete it and he
> > is probably right.
> >
> > Can you explain what it thinks it is doing?
> That F'ed up  code is trying to prevent a situation where someone
> tries to remove the bnxt_en driver while bnxt_re driver is using it.
> All because bnxt_re driver is at the mercy of bnxt_en drive and there
> is not symbole dependence, Do you suggest anything to prevent that
> unload of bnxt_en other than doing this jargon.

Well, the module put says nothing about the validity of the 'struct
bnxt' and related it extracted from the netdev - you should have a
mechanism that prevents that from going invalid which in turn will
ensure the function pointers you want to touch are still valid
too. (as the struct containing function pointers must become invalid
before the module unloads)

Probably the netdev refcount does that already but I always forget the
exact point during unregister that it waits on that...

As far as strict module dependencies go, replace the pointless
brp->ulp_probe function pointer with an actual call to
bnxt_ulp_probe() and you get the same effect as the module_get.

Jason
Leon Romanovsky March 25, 2021, 8:40 a.m. UTC | #7
On Wed, Mar 24, 2021 at 02:35:56PM -0300, Jason Gunthorpe wrote:
> On Wed, Mar 24, 2021 at 10:54:58PM +0530, Devesh Sharma wrote:
> > On Wed, Mar 24, 2021 at 10:26 PM Jason Gunthorpe <jgg@nvidia.com> wrote:
> > >
> > > On Wed, Mar 24, 2021 at 10:00:05PM +0530, Devesh Sharma wrote:
> > >
> > > > > > > -static void bnxt_re_dev_unprobe(struct net_device *netdev,
> > > > > > > -                           struct bnxt_en_dev *en_dev)
> > > > > > > -{
> > > > > > > -   dev_put(netdev);
> > > > > > > -   module_put(en_dev->pdev->driver->driver.owner);
> > > > > > > -}
> > > > > >
> > > > > > And you are right to be wondering WTF is this
> > > >
> > > > Still trying to understand but what's the big idea here may be I can help.
> > >
> > > A driver should not have module put things like the above
> > >
> > > It should not be accessing ->driver without holding the device_lock()
> > >
> > > Basically it is all nonsense coding, Leon suggests to delete it and he
> > > is probably right.
> > >
> > > Can you explain what it thinks it is doing?
> > That F'ed up  code is trying to prevent a situation where someone
> > tries to remove the bnxt_en driver while bnxt_re driver is using it.
> > All because bnxt_re driver is at the mercy of bnxt_en drive and there
> > is not symbole dependence, Do you suggest anything to prevent that
> > unload of bnxt_en other than doing this jargon.
> 
> Well, the module put says nothing about the validity of the 'struct
> bnxt' and related it extracted from the netdev - you should have a
> mechanism that prevents that from going invalid which in turn will
> ensure the function pointers you want to touch are still valid
> too. (as the struct containing function pointers must become invalid
> before the module unloads)
> 
> Probably the netdev refcount does that already but I always forget the
> exact point during unregister that it waits on that...
> 
> As far as strict module dependencies go, replace the pointless
> brp->ulp_probe function pointer with an actual call to
> bnxt_ulp_probe() and you get the same effect as the module_get.

Yeah, I'll update it.

Thanks

> 
> Jason
Devesh Sharma March 26, 2021, 6:09 a.m. UTC | #8
On Thu, Mar 25, 2021 at 2:10 PM Leon Romanovsky <leon@kernel.org> wrote:
>
> On Wed, Mar 24, 2021 at 02:35:56PM -0300, Jason Gunthorpe wrote:
> > On Wed, Mar 24, 2021 at 10:54:58PM +0530, Devesh Sharma wrote:
> > > On Wed, Mar 24, 2021 at 10:26 PM Jason Gunthorpe <jgg@nvidia.com> wrote:
> > > >
> > > > On Wed, Mar 24, 2021 at 10:00:05PM +0530, Devesh Sharma wrote:
> > > >
> > > > > > > > -static void bnxt_re_dev_unprobe(struct net_device *netdev,
> > > > > > > > -                           struct bnxt_en_dev *en_dev)
> > > > > > > > -{
> > > > > > > > -   dev_put(netdev);
> > > > > > > > -   module_put(en_dev->pdev->driver->driver.owner);
> > > > > > > > -}
> > > > > > >
> > > > > > > And you are right to be wondering WTF is this
> > > > >
> > > > > Still trying to understand but what's the big idea here may be I can help.
> > > >
> > > > A driver should not have module put things like the above
> > > >
> > > > It should not be accessing ->driver without holding the device_lock()
> > > >
> > > > Basically it is all nonsense coding, Leon suggests to delete it and he
> > > > is probably right.
> > > >
> > > > Can you explain what it thinks it is doing?
> > > That F'ed up  code is trying to prevent a situation where someone
> > > tries to remove the bnxt_en driver while bnxt_re driver is using it.
> > > All because bnxt_re driver is at the mercy of bnxt_en drive and there
> > > is not symbole dependence, Do you suggest anything to prevent that
> > > unload of bnxt_en other than doing this jargon.
> >
> > Well, the module put says nothing about the validity of the 'struct
> > bnxt' and related it extracted from the netdev - you should have a
> > mechanism that prevents that from going invalid which in turn will
> > ensure the function pointers you want to touch are still valid
> > too. (as the struct containing function pointers must become invalid
> > before the module unloads)
> >
> > Probably the netdev refcount does that already but I always forget the
> > exact point during unregister that it waits on that...
> >
> > As far as strict module dependencies go, replace the pointless
> > brp->ulp_probe function pointer with an actual call to
> > bnxt_ulp_probe() and you get the same effect as the module_get.
>
> Yeah, I'll update it.
>
> Thanks
>
Yes, making it an exported symbol will help. It needs radical changes
in the driver load/unload path. Let me as well take this feedback to
my internal team .
> >
> > Jason
diff mbox series

Patch

diff --git a/drivers/infiniband/hw/bnxt_re/Kconfig b/drivers/infiniband/hw/bnxt_re/Kconfig
index 0feac5132ce1..b4779a6cd565 100644
--- a/drivers/infiniband/hw/bnxt_re/Kconfig
+++ b/drivers/infiniband/hw/bnxt_re/Kconfig
@@ -2,9 +2,7 @@ 
 config INFINIBAND_BNXT_RE
 	tristate "Broadcom Netxtreme HCA support"
 	depends on 64BIT
-	depends on ETHERNET && NETDEVICES && PCI && INET && DCB
-	select NET_VENDOR_BROADCOM
-	select BNXT
+	depends on ETHERNET && NETDEVICES && PCI && INET && DCB && BNXT
 	help
 	  This driver supports Broadcom NetXtreme-E 10/25/40/50 gigabit
 	  RoCE HCAs.  To compile this driver as a module, choose M here:
diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c
index fdb8c2478258..a81adb07e5d9 100644
--- a/drivers/infiniband/hw/bnxt_re/main.c
+++ b/drivers/infiniband/hw/bnxt_re/main.c
@@ -561,13 +561,6 @@  static struct bnxt_re_dev *bnxt_re_from_netdev(struct net_device *netdev)
 	return container_of(ibdev, struct bnxt_re_dev, ibdev);
 }
 
-static void bnxt_re_dev_unprobe(struct net_device *netdev,
-				struct bnxt_en_dev *en_dev)
-{
-	dev_put(netdev);
-	module_put(en_dev->pdev->driver->driver.owner);
-}
-
 static struct bnxt_en_dev *bnxt_re_dev_probe(struct net_device *netdev)
 {
 	struct bnxt *bp = netdev_priv(netdev);
@@ -593,10 +586,6 @@  static struct bnxt_en_dev *bnxt_re_dev_probe(struct net_device *netdev)
 		return ERR_PTR(-ENODEV);
 	}
 
-	/* Bump net device reference count */
-	if (!try_module_get(pdev->driver->driver.owner))
-		return ERR_PTR(-ENODEV);
-
 	dev_hold(netdev);
 
 	return en_dev;
@@ -1523,13 +1512,14 @@  static int bnxt_re_dev_init(struct bnxt_re_dev *rdev, u8 wqe_mode)
 
 static void bnxt_re_dev_unreg(struct bnxt_re_dev *rdev)
 {
-	struct bnxt_en_dev *en_dev = rdev->en_dev;
 	struct net_device *netdev = rdev->netdev;
 
 	bnxt_re_dev_remove(rdev);
 
-	if (netdev)
-		bnxt_re_dev_unprobe(netdev, en_dev);
+	if (!netdev)
+		return;
+
+	dev_put(netdev);
 }
 
 static int bnxt_re_dev_reg(struct bnxt_re_dev **rdev, struct net_device *netdev)
@@ -1551,7 +1541,7 @@  static int bnxt_re_dev_reg(struct bnxt_re_dev **rdev, struct net_device *netdev)
 	*rdev = bnxt_re_dev_add(netdev, en_dev);
 	if (!*rdev) {
 		rc = -ENOMEM;
-		bnxt_re_dev_unprobe(netdev, en_dev);
+		dev_put(netdev);
 		goto exit;
 	}
 exit: