diff mbox series

[v3,04/20] i40e: Register a virtbus device to provide RDMA

Message ID 20191209224935.1780117-5-jeffrey.t.kirsher@intel.com (mailing list archive)
State Changes Requested
Headers show
Series Intel Wired LAN Driver Updates 2019-12-09 | expand

Commit Message

Kirsher, Jeffrey T Dec. 9, 2019, 10:49 p.m. UTC
From: Shiraz Saleem <shiraz.saleem@intel.com>

Register client virtbus device on the virtbus for the RDMA
virtbus driver (irdma) to bind to. It allows to realize a
single RDMA driver capable of working with multiple netdev
drivers over multi-generation Intel HW supporting RDMA.
There is also no load ordering dependencies between i40e and
irdma.

Summary of changes:
* Support to add/remove virtbus devices
* Add 2 new client ops.
	* i40e_client_device_register() which is called during RDMA
	  probe() per PF. Validate client drv OPs and schedule service
	  task to call open()
	* i40e_client_device_unregister() called during RDMA remove()
	  per PF. Call client close() and release_qvlist.
* The global register/unregister calls exported for i40iw are retained
  until i40iw is removed from the kernel.

Signed-off-by: Mustafa Ismail <mustafa.ismail@intel.com>
Signed-off-by: Shiraz Saleem <shiraz.saleem@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/infiniband/hw/i40iw/Makefile          |   1 -
 drivers/infiniband/hw/i40iw/i40iw.h           |   2 +-
 drivers/net/ethernet/intel/Kconfig            |   1 +
 drivers/net/ethernet/intel/i40e/i40e.h        |   3 +-
 drivers/net/ethernet/intel/i40e/i40e_client.c | 109 +++++++++++++++---
 .../linux/net/intel}/i40e_client.h            |  20 +++-
 6 files changed, 112 insertions(+), 24 deletions(-)
 rename {drivers/net/ethernet/intel/i40e => include/linux/net/intel}/i40e_client.h (92%)

Comments

Greg KH Dec. 10, 2019, 3:33 p.m. UTC | #1
On Mon, Dec 09, 2019 at 02:49:19PM -0800, Jeff Kirsher wrote:
> --- a/drivers/infiniband/hw/i40iw/Makefile
> +++ b/drivers/infiniband/hw/i40iw/Makefile
> @@ -1,5 +1,4 @@
>  # SPDX-License-Identifier: GPL-2.0
> -ccflags-y :=  -I $(srctree)/drivers/net/ethernet/intel/i40e

Isn't dropping this cflags good for a nice simple separate patch to keep
the #include mess from cluttering up a patch that is trying to add
new functionality?
Greg KH Dec. 10, 2019, 3:39 p.m. UTC | #2
On Mon, Dec 09, 2019 at 02:49:19PM -0800, Jeff Kirsher wrote:
> From: Shiraz Saleem <shiraz.saleem@intel.com>
> 
> Register client virtbus device on the virtbus for the RDMA
> virtbus driver (irdma) to bind to. It allows to realize a
> single RDMA driver capable of working with multiple netdev
> drivers over multi-generation Intel HW supporting RDMA.
> There is also no load ordering dependencies between i40e and
> irdma.
> 
> Summary of changes:
> * Support to add/remove virtbus devices
> * Add 2 new client ops.
> 	* i40e_client_device_register() which is called during RDMA
> 	  probe() per PF. Validate client drv OPs and schedule service
> 	  task to call open()
> 	* i40e_client_device_unregister() called during RDMA remove()
> 	  per PF. Call client close() and release_qvlist.
> * The global register/unregister calls exported for i40iw are retained
>   until i40iw is removed from the kernel.
> 
> Signed-off-by: Mustafa Ismail <mustafa.ismail@intel.com>
> Signed-off-by: Shiraz Saleem <shiraz.saleem@intel.com>
> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
> ---
>  drivers/infiniband/hw/i40iw/Makefile          |   1 -
>  drivers/infiniband/hw/i40iw/i40iw.h           |   2 +-
>  drivers/net/ethernet/intel/Kconfig            |   1 +
>  drivers/net/ethernet/intel/i40e/i40e.h        |   3 +-
>  drivers/net/ethernet/intel/i40e/i40e_client.c | 109 +++++++++++++++---
>  .../linux/net/intel}/i40e_client.h            |  20 +++-
>  6 files changed, 112 insertions(+), 24 deletions(-)
>  rename {drivers/net/ethernet/intel/i40e => include/linux/net/intel}/i40e_client.h (92%)
> 
> diff --git a/drivers/infiniband/hw/i40iw/Makefile b/drivers/infiniband/hw/i40iw/Makefile
> index 8942f8229945..34da9eba8a7c 100644
> --- a/drivers/infiniband/hw/i40iw/Makefile
> +++ b/drivers/infiniband/hw/i40iw/Makefile
> @@ -1,5 +1,4 @@
>  # SPDX-License-Identifier: GPL-2.0
> -ccflags-y :=  -I $(srctree)/drivers/net/ethernet/intel/i40e
>  
>  obj-$(CONFIG_INFINIBAND_I40IW) += i40iw.o
>  
> diff --git a/drivers/infiniband/hw/i40iw/i40iw.h b/drivers/infiniband/hw/i40iw/i40iw.h
> index 8feec35f95a7..3197e3536d5c 100644
> --- a/drivers/infiniband/hw/i40iw/i40iw.h
> +++ b/drivers/infiniband/hw/i40iw/i40iw.h
> @@ -57,7 +57,7 @@
>  #include "i40iw_d.h"
>  #include "i40iw_hmc.h"
>  
> -#include <i40e_client.h>
> +#include <linux/net/intel/i40e_client.h>
>  #include "i40iw_type.h"
>  #include "i40iw_p.h"
>  #include <rdma/i40iw-abi.h>
> diff --git a/drivers/net/ethernet/intel/Kconfig b/drivers/net/ethernet/intel/Kconfig
> index b88328fea1d0..8595f578fbe7 100644
> --- a/drivers/net/ethernet/intel/Kconfig
> +++ b/drivers/net/ethernet/intel/Kconfig
> @@ -241,6 +241,7 @@ config I40E
>  	tristate "Intel(R) Ethernet Controller XL710 Family support"
>  	imply PTP_1588_CLOCK
>  	depends on PCI
> +	select VIRTUAL_BUS
>  	---help---
>  	  This driver supports Intel(R) Ethernet Controller XL710 Family of
>  	  devices.  For more information on how to identify your adapter, go
> diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h
> index cb6367334ca7..4321e81d347c 100644
> --- a/drivers/net/ethernet/intel/i40e/i40e.h
> +++ b/drivers/net/ethernet/intel/i40e/i40e.h
> @@ -38,7 +38,7 @@
>  #include <net/xdp_sock.h>
>  #include "i40e_type.h"
>  #include "i40e_prototype.h"
> -#include "i40e_client.h"
> +#include <linux/net/intel/i40e_client.h>
>  #include <linux/avf/virtchnl.h>
>  #include "i40e_virtchnl_pf.h"
>  #include "i40e_txrx.h"
> @@ -655,6 +655,7 @@ struct i40e_pf {
>  	u16 last_sw_conf_valid_flags;
>  	/* List to keep previous DDP profiles to be rolled back in the future */
>  	struct list_head ddp_old_prof;
> +	int peer_idx;
>  };
>  
>  /**
> diff --git a/drivers/net/ethernet/intel/i40e/i40e_client.c b/drivers/net/ethernet/intel/i40e/i40e_client.c
> index e81530ca08d0..a3dee729719b 100644
> --- a/drivers/net/ethernet/intel/i40e/i40e_client.c
> +++ b/drivers/net/ethernet/intel/i40e/i40e_client.c
> @@ -1,12 +1,12 @@
>  // SPDX-License-Identifier: GPL-2.0
>  /* Copyright(c) 2013 - 2018 Intel Corporation. */
>  
> +#include <linux/net/intel/i40e_client.h>
>  #include <linux/list.h>
>  #include <linux/errno.h>
>  
>  #include "i40e.h"
>  #include "i40e_prototype.h"
> -#include "i40e_client.h"
>  
>  static const char i40e_client_interface_version_str[] = I40E_CLIENT_VERSION_STR;
>  static struct i40e_client *registered_client;
> @@ -30,11 +30,17 @@ static int i40e_client_update_vsi_ctxt(struct i40e_info *ldev,
>  				       bool is_vf, u32 vf_id,
>  				       u32 flag, u32 valid_flag);
>  
> +static int i40e_client_device_register(struct i40e_info *ldev);
> +
> +static void i40e_client_device_unregister(struct i40e_info *ldev);
> +
>  static struct i40e_ops i40e_lan_ops = {
>  	.virtchnl_send = i40e_client_virtchnl_send,
>  	.setup_qvlist = i40e_client_setup_qvlist,
>  	.request_reset = i40e_client_request_reset,
>  	.update_vsi_ctxt = i40e_client_update_vsi_ctxt,
> +	.client_device_register = i40e_client_device_register,
> +	.client_device_unregister = i40e_client_device_unregister,
>  };
>  
>  /**
> @@ -275,6 +281,27 @@ void i40e_client_update_msix_info(struct i40e_pf *pf)
>  	cdev->lan_info.msix_entries = &pf->msix_entries[pf->iwarp_base_vector];
>  }
>  
> +static int i40e_init_client_virtdev(struct i40e_pf *pf)
> +{
> +	struct i40e_info *ldev = &pf->cinst->lan_info;
> +	struct pci_dev *pdev = pf->pdev;
> +	struct virtbus_device *vdev;
> +	int ret;
> +
> +	vdev = &ldev->vdev;
> +	vdev->name = I40E_PEER_RDMA_NAME;
> +	vdev->dev.parent = &pf->pdev->dev;

What a total and complete mess of a tangled web you just wove here.

Ok, so you pass in a single pointer, that then dereferences 3 pointers
deep to find the pointer to the virtbus_device structure, but then you
point the parent of that device, back at the original structure's
sub-pointer's device itself.

WTF?

And who owns the memory of this thing that is supposed to be
dynamically controlled by something OUTSIDE of this driver?  Who created
that thing 3 pointers deep?  What happens when you leak the memory below
(hint, you did), and who is supposed to clean it up if you need to
properly clean it up if something bad happens?

> +
> +	ret = virtbus_dev_register(vdev);
> +	if (ret) {
> +		dev_err(&pdev->dev, "Failure adding client virtbus dev %s %d\n",
> +			I40E_PEER_RDMA_NAME, ret);

Again, the core should handle this, right?

> +		return ret;

Did you just leak memory?

Yup, you did, you never actually checked the return value of this
function :(

Ugh.

I feel like the virtual bus code is getting better, but this use of the
code, um, no, not ok.

Either way, this series is NOT ready to be merged anywhere, please do
not try to rush things.

Also, what ever happened to my "YOU ALL MUST AGREE TO WORK TOGETHER"
requirement between this group, and the other group trying to do the
same thing?  I want to see signed-off-by from EVERYONE involved before
we are going to consider this thing.

thanks,

greg k-h
Shiraz Saleem Dec. 13, 2019, 11:08 p.m. UTC | #3
> Subject: Re: [PATCH v3 04/20] i40e: Register a virtbus device to provide RDMA
> 

[....]

> >  /**
> > @@ -275,6 +281,27 @@ void i40e_client_update_msix_info(struct i40e_pf *pf)
> >  	cdev->lan_info.msix_entries =
> > &pf->msix_entries[pf->iwarp_base_vector];
> >  }
> >
> > +static int i40e_init_client_virtdev(struct i40e_pf *pf) {
> > +	struct i40e_info *ldev = &pf->cinst->lan_info;
> > +	struct pci_dev *pdev = pf->pdev;
> > +	struct virtbus_device *vdev;
> > +	int ret;
> > +
> > +	vdev = &ldev->vdev;
> > +	vdev->name = I40E_PEER_RDMA_NAME;
> > +	vdev->dev.parent = &pf->pdev->dev;
> 
> What a total and complete mess of a tangled web you just wove here.
> 
> Ok, so you pass in a single pointer, that then dereferences 3 pointers deep to find
> the pointer to the virtbus_device structure, but then you point the parent of that
> device, back at the original structure's sub-pointer's device itself.
> 
> WTF?

OK. This is convoluted. Passing a pointer to the i40e_info object should suffice. So something like,

+static int i40e_init_client_virtdev(struct i40e_info *ldev) {
+       struct pci_dev *pdev = ldev->pcidev;
+       struct virtbus_device *vdev = &ldev->vdev;
+       int ret;
+
+       vdev->name = I40E_PEER_RDMA_NAME;
+       vdev->dev.parent = &pdev->dev;
+       ret = virtbus_dev_register(vdev);
+       if (ret)
+               return ret;
+
+       return 0;
+}
+

> 
> And who owns the memory of this thing that is supposed to be dynamically
> controlled by something OUTSIDE of this driver?  Who created that thing 3
> pointers deep?  What happens when you leak the memory below (hint, you did),
> and who is supposed to clean it up if you need to properly clean it up if something
> bad happens?

The i40e_info object memory is tied to the PF driver.

The object hierarchy is,

i40e_pf: pointer to i40e_client_instance 
	----- i40e_client_instance: i40e_info
		----- i40e_info: virtbus_device

For each PF, there is a client_instance object allocated.
The i40e_info object is populated and the virtbus_device hanging off this object is registered.
In irdma probe(), we use the container_of macro to get to this i40e_info object from the
virtbus_device. It contains all the ops/info which RDMA driver needs from the PCI function driver.

The lifetime of the i40e_info object (and the virtbus device) is tied to the PF.
When PF goes away, virtbus_device is unregistered and the client_instance object memory
is freed.

> 
> > +
> > +	ret = virtbus_dev_register(vdev);
> > +	if (ret) {
> > +		dev_err(&pdev->dev, "Failure adding client virtbus dev %s %d\n",
> > +			I40E_PEER_RDMA_NAME, ret);
> 
> Again, the core should handle this, right?

Right. Will fix.

> 
> > +		return ret;
> 
> Did you just leak memory?

Thanks! Will fix.

> Also, what ever happened to my "YOU ALL MUST AGREE TO WORK TOGETHER"
> requirement between this group, and the other group trying to do the same thing?  I
> want to see signed-off-by from EVERYONE involved before we are going to
> consider this thing.
> 
We will have all parties cc'ed in the next submission. Would encourage folks to review
and hopefully we can get some consensus.
Greg KH Dec. 14, 2019, 8:37 a.m. UTC | #4
On Fri, Dec 13, 2019 at 11:08:34PM +0000, Saleem, Shiraz wrote:
> > Subject: Re: [PATCH v3 04/20] i40e: Register a virtbus device to provide RDMA
> > 
> 
> [....]
> 
> > >  /**
> > > @@ -275,6 +281,27 @@ void i40e_client_update_msix_info(struct i40e_pf *pf)
> > >  	cdev->lan_info.msix_entries =
> > > &pf->msix_entries[pf->iwarp_base_vector];
> > >  }
> > >
> > > +static int i40e_init_client_virtdev(struct i40e_pf *pf) {
> > > +	struct i40e_info *ldev = &pf->cinst->lan_info;
> > > +	struct pci_dev *pdev = pf->pdev;
> > > +	struct virtbus_device *vdev;
> > > +	int ret;
> > > +
> > > +	vdev = &ldev->vdev;
> > > +	vdev->name = I40E_PEER_RDMA_NAME;
> > > +	vdev->dev.parent = &pf->pdev->dev;
> > 
> > What a total and complete mess of a tangled web you just wove here.
> > 
> > Ok, so you pass in a single pointer, that then dereferences 3 pointers deep to find
> > the pointer to the virtbus_device structure, but then you point the parent of that
> > device, back at the original structure's sub-pointer's device itself.
> > 
> > WTF?
> 
> OK. This is convoluted. Passing a pointer to the i40e_info object should suffice. So something like,
> 
> +static int i40e_init_client_virtdev(struct i40e_info *ldev) {
> +       struct pci_dev *pdev = ldev->pcidev;
> +       struct virtbus_device *vdev = &ldev->vdev;
> +       int ret;
> +
> +       vdev->name = I40E_PEER_RDMA_NAME;
> +       vdev->dev.parent = &pdev->dev;
> +       ret = virtbus_dev_register(vdev);
> +       if (ret)
> +               return ret;
> +
> +       return 0;
> +}
> +
> 
> > 
> > And who owns the memory of this thing that is supposed to be dynamically
> > controlled by something OUTSIDE of this driver?  Who created that thing 3
> > pointers deep?  What happens when you leak the memory below (hint, you did),
> > and who is supposed to clean it up if you need to properly clean it up if something
> > bad happens?
> 
> The i40e_info object memory is tied to the PF driver.

What is a "PF"?

> The object hierarchy is,
> 
> i40e_pf: pointer to i40e_client_instance 
> 	----- i40e_client_instance: i40e_info
> 		----- i40e_info: virtbus_device

So you are 3 pointers deep to get a structure that is dynamically
controlled?  Why are those "3 pointers" not also represented in sysfs?
You have a heiarchy within the kernel that is not being represented that
way to userspace, why?

Hint, I think this is totally wrong, you need to rework this to be sane.

> For each PF, there is a client_instance object allocated.

Great, make it dynamic and in the device tree.

> The i40e_info object is populated and the virtbus_device hanging off this object is registered.

Great, make that dynamic and inthe device tree.

If you think this is too much, then your whole mess here is too much and
needs to be made a lot simpler.

> In irdma probe(), we use the container_of macro to get to this i40e_info object from the
> virtbus_device. It contains all the ops/info which RDMA driver needs from the PCI function driver.

Ok, that's what you are supposed to do, but why walk back 3 levels?

> The lifetime of the i40e_info object (and the virtbus device) is tied to the PF.

Careful, these are dynamic structures, you don't get to fully "tie"
anything here.

> When PF goes away, virtbus_device is unregistered and the client_instance object memory
> is freed.

Hopefully, if no one else has a reference (hint, you never know this.)
If you are relying on this, then you are not using the driver model
correctly.

> > Also, what ever happened to my "YOU ALL MUST AGREE TO WORK TOGETHER"
> > requirement between this group, and the other group trying to do the same thing?  I
> > want to see signed-off-by from EVERYONE involved before we are going to
> > consider this thing.
> > 
> We will have all parties cc'ed in the next submission. Would encourage folks to review
> and hopefully we can get some consensus.

Then when you post your patches, do so to be obvious that is what you
are asking for, don't try to do a "here's a pull request to take!" type
request like you did here, that's not nice.

thanks,

greg k-h
Parav Pandit Dec. 16, 2019, 3:48 a.m. UTC | #5
Hi Greg,

On 12/10/2019 9:09 PM, Greg KH wrote:
> On Mon, Dec 09, 2019 at 02:49:19PM -0800, Jeff Kirsher wrote:
>> From: Shiraz Saleem <shiraz.saleem@intel.com>
>>
>> Register client virtbus device on the virtbus for the RDMA
>> virtbus driver (irdma) to bind to. It allows to realize a
>> single RDMA driver capable of working with multiple netdev
>> drivers over multi-generation Intel HW supporting RDMA.
>> There is also no load ordering dependencies between i40e and
>> irdma.
>>
>> Summary of changes:
>> * Support to add/remove virtbus devices
>> * Add 2 new client ops.
>> 	* i40e_client_device_register() which is called during RDMA
>> 	  probe() per PF. Validate client drv OPs and schedule service
>> 	  task to call open()
>> 	* i40e_client_device_unregister() called during RDMA remove()
>> 	  per PF. Call client close() and release_qvlist.
>> * The global register/unregister calls exported for i40iw are retained
>>   until i40iw is removed from the kernel.
>>
>> Signed-off-by: Mustafa Ismail <mustafa.ismail@intel.com>
>> Signed-off-by: Shiraz Saleem <shiraz.saleem@intel.com>
>> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
>> ---
>>  drivers/infiniband/hw/i40iw/Makefile          |   1 -
>>  drivers/infiniband/hw/i40iw/i40iw.h           |   2 +-
>>  drivers/net/ethernet/intel/Kconfig            |   1 +
>>  drivers/net/ethernet/intel/i40e/i40e.h        |   3 +-
>>  drivers/net/ethernet/intel/i40e/i40e_client.c | 109 +++++++++++++++---
>>  .../linux/net/intel}/i40e_client.h            |  20 +++-
>>  6 files changed, 112 insertions(+), 24 deletions(-)
>>  rename {drivers/net/ethernet/intel/i40e => include/linux/net/intel}/i40e_client.h (92%)
>>
>> diff --git a/drivers/infiniband/hw/i40iw/Makefile b/drivers/infiniband/hw/i40iw/Makefile
>> index 8942f8229945..34da9eba8a7c 100644
>> --- a/drivers/infiniband/hw/i40iw/Makefile
>> +++ b/drivers/infiniband/hw/i40iw/Makefile
>> @@ -1,5 +1,4 @@
>>  # SPDX-License-Identifier: GPL-2.0
>> -ccflags-y :=  -I $(srctree)/drivers/net/ethernet/intel/i40e
>>  
>>  obj-$(CONFIG_INFINIBAND_I40IW) += i40iw.o
>>  
>> diff --git a/drivers/infiniband/hw/i40iw/i40iw.h b/drivers/infiniband/hw/i40iw/i40iw.h
>> index 8feec35f95a7..3197e3536d5c 100644
>> --- a/drivers/infiniband/hw/i40iw/i40iw.h
>> +++ b/drivers/infiniband/hw/i40iw/i40iw.h
>> @@ -57,7 +57,7 @@
>>  #include "i40iw_d.h"
>>  #include "i40iw_hmc.h"
>>  
>> -#include <i40e_client.h>
>> +#include <linux/net/intel/i40e_client.h>
>>  #include "i40iw_type.h"
>>  #include "i40iw_p.h"
>>  #include <rdma/i40iw-abi.h>
>> diff --git a/drivers/net/ethernet/intel/Kconfig b/drivers/net/ethernet/intel/Kconfig
>> index b88328fea1d0..8595f578fbe7 100644
>> --- a/drivers/net/ethernet/intel/Kconfig
>> +++ b/drivers/net/ethernet/intel/Kconfig
>> @@ -241,6 +241,7 @@ config I40E
>>  	tristate "Intel(R) Ethernet Controller XL710 Family support"
>>  	imply PTP_1588_CLOCK
>>  	depends on PCI
>> +	select VIRTUAL_BUS
>>  	---help---
>>  	  This driver supports Intel(R) Ethernet Controller XL710 Family of
>>  	  devices.  For more information on how to identify your adapter, go
>> diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h
>> index cb6367334ca7..4321e81d347c 100644
>> --- a/drivers/net/ethernet/intel/i40e/i40e.h
>> +++ b/drivers/net/ethernet/intel/i40e/i40e.h
>> @@ -38,7 +38,7 @@
>>  #include <net/xdp_sock.h>
>>  #include "i40e_type.h"
>>  #include "i40e_prototype.h"
>> -#include "i40e_client.h"
>> +#include <linux/net/intel/i40e_client.h>
>>  #include <linux/avf/virtchnl.h>
>>  #include "i40e_virtchnl_pf.h"
>>  #include "i40e_txrx.h"
>> @@ -655,6 +655,7 @@ struct i40e_pf {
>>  	u16 last_sw_conf_valid_flags;
>>  	/* List to keep previous DDP profiles to be rolled back in the future */
>>  	struct list_head ddp_old_prof;
>> +	int peer_idx;
>>  };
>>  
>>  /**
>> diff --git a/drivers/net/ethernet/intel/i40e/i40e_client.c b/drivers/net/ethernet/intel/i40e/i40e_client.c
>> index e81530ca08d0..a3dee729719b 100644
>> --- a/drivers/net/ethernet/intel/i40e/i40e_client.c
>> +++ b/drivers/net/ethernet/intel/i40e/i40e_client.c
>> @@ -1,12 +1,12 @@
>>  // SPDX-License-Identifier: GPL-2.0
>>  /* Copyright(c) 2013 - 2018 Intel Corporation. */
>>  
>> +#include <linux/net/intel/i40e_client.h>
>>  #include <linux/list.h>
>>  #include <linux/errno.h>
>>  
>>  #include "i40e.h"
>>  #include "i40e_prototype.h"
>> -#include "i40e_client.h"
>>  
>>  static const char i40e_client_interface_version_str[] = I40E_CLIENT_VERSION_STR;
>>  static struct i40e_client *registered_client;
>> @@ -30,11 +30,17 @@ static int i40e_client_update_vsi_ctxt(struct i40e_info *ldev,
>>  				       bool is_vf, u32 vf_id,
>>  				       u32 flag, u32 valid_flag);
>>  
>> +static int i40e_client_device_register(struct i40e_info *ldev);
>> +
>> +static void i40e_client_device_unregister(struct i40e_info *ldev);
>> +
>>  static struct i40e_ops i40e_lan_ops = {
>>  	.virtchnl_send = i40e_client_virtchnl_send,
>>  	.setup_qvlist = i40e_client_setup_qvlist,
>>  	.request_reset = i40e_client_request_reset,
>>  	.update_vsi_ctxt = i40e_client_update_vsi_ctxt,
>> +	.client_device_register = i40e_client_device_register,
>> +	.client_device_unregister = i40e_client_device_unregister,
>>  };
>>  
>>  /**
>> @@ -275,6 +281,27 @@ void i40e_client_update_msix_info(struct i40e_pf *pf)
>>  	cdev->lan_info.msix_entries = &pf->msix_entries[pf->iwarp_base_vector];
>>  }
>>  
>> +static int i40e_init_client_virtdev(struct i40e_pf *pf)
>> +{
>> +	struct i40e_info *ldev = &pf->cinst->lan_info;
>> +	struct pci_dev *pdev = pf->pdev;
>> +	struct virtbus_device *vdev;
>> +	int ret;
>> +
>> +	vdev = &ldev->vdev;
>> +	vdev->name = I40E_PEER_RDMA_NAME;
>> +	vdev->dev.parent = &pf->pdev->dev;
> 
> What a total and complete mess of a tangled web you just wove here.
> 
> Ok, so you pass in a single pointer, that then dereferences 3 pointers
> deep to find the pointer to the virtbus_device structure, but then you
> point the parent of that device, back at the original structure's
> sub-pointer's device itself.
> 
> WTF?
> 
> And who owns the memory of this thing that is supposed to be
> dynamically controlled by something OUTSIDE of this driver?  Who created
> that thing 3 pointers deep?  What happens when you leak the memory below
> (hint, you did), and who is supposed to clean it up if you need to
> properly clean it up if something bad happens?
> 
>> +
>> +	ret = virtbus_dev_register(vdev);
>> +	if (ret) {
>> +		dev_err(&pdev->dev, "Failure adding client virtbus dev %s %d\n",
>> +			I40E_PEER_RDMA_NAME, ret);
> 
> Again, the core should handle this, right?
> 
>> +		return ret;
> 
> Did you just leak memory?
> 
> Yup, you did, you never actually checked the return value of this
> function :(
> 
> Ugh.
> 
> I feel like the virtual bus code is getting better, but this use of the
> code, um, no, not ok.
> 
> Either way, this series is NOT ready to be merged anywhere, please do
> not try to rush things.
> 
> Also, what ever happened to my "YOU ALL MUST AGREE TO WORK TOGETHER"
> requirement between this group, and the other group trying to do the
> same thing?  I want to see signed-off-by from EVERYONE involved before
> we are going to consider this thing.

I am working on RFC where PCI device is sliced to create sub-functions.
Each sub-function/slice is created dynamically by the user.
User gives sf-number at creation time which will be used for plumbing by
systemd/udev, devlink ports.

This sub-function will have sysfs attributes = sfnumber, irq vectors,
PCI BAR resource files.
sfnumber as sysfs file will be used by systemd/udev to have
deterministic names of netdev and rdma device created on top of
sub-function's 'struct device'.

As opposed to that, matching service devices won't have such attributes.

We stayed away from using mdev bus for such dual purpose in past.

Should we have virtbus that holds 'struct device' created for different
purpose and have different sysfs attributes? Is it ok?

Your opinion will help us do it correctly.
Greg KH Dec. 16, 2019, 7:15 a.m. UTC | #6
On Mon, Dec 16, 2019 at 03:48:05AM +0000, Parav Pandit wrote:
> Hi Greg,
> 
> On 12/10/2019 9:09 PM, Greg KH wrote:
> > On Mon, Dec 09, 2019 at 02:49:19PM -0800, Jeff Kirsher wrote:
> >> From: Shiraz Saleem <shiraz.saleem@intel.com>
> >>
> >> Register client virtbus device on the virtbus for the RDMA
> >> virtbus driver (irdma) to bind to. It allows to realize a
> >> single RDMA driver capable of working with multiple netdev
> >> drivers over multi-generation Intel HW supporting RDMA.
> >> There is also no load ordering dependencies between i40e and
> >> irdma.
> >>
> >> Summary of changes:
> >> * Support to add/remove virtbus devices
> >> * Add 2 new client ops.
> >> 	* i40e_client_device_register() which is called during RDMA
> >> 	  probe() per PF. Validate client drv OPs and schedule service
> >> 	  task to call open()
> >> 	* i40e_client_device_unregister() called during RDMA remove()
> >> 	  per PF. Call client close() and release_qvlist.
> >> * The global register/unregister calls exported for i40iw are retained
> >>   until i40iw is removed from the kernel.
> >>
> >> Signed-off-by: Mustafa Ismail <mustafa.ismail@intel.com>
> >> Signed-off-by: Shiraz Saleem <shiraz.saleem@intel.com>
> >> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
> >> ---
> >>  drivers/infiniband/hw/i40iw/Makefile          |   1 -
> >>  drivers/infiniband/hw/i40iw/i40iw.h           |   2 +-
> >>  drivers/net/ethernet/intel/Kconfig            |   1 +
> >>  drivers/net/ethernet/intel/i40e/i40e.h        |   3 +-
> >>  drivers/net/ethernet/intel/i40e/i40e_client.c | 109 +++++++++++++++---
> >>  .../linux/net/intel}/i40e_client.h            |  20 +++-
> >>  6 files changed, 112 insertions(+), 24 deletions(-)
> >>  rename {drivers/net/ethernet/intel/i40e => include/linux/net/intel}/i40e_client.h (92%)
> >>
> >> diff --git a/drivers/infiniband/hw/i40iw/Makefile b/drivers/infiniband/hw/i40iw/Makefile
> >> index 8942f8229945..34da9eba8a7c 100644
> >> --- a/drivers/infiniband/hw/i40iw/Makefile
> >> +++ b/drivers/infiniband/hw/i40iw/Makefile
> >> @@ -1,5 +1,4 @@
> >>  # SPDX-License-Identifier: GPL-2.0
> >> -ccflags-y :=  -I $(srctree)/drivers/net/ethernet/intel/i40e
> >>  
> >>  obj-$(CONFIG_INFINIBAND_I40IW) += i40iw.o
> >>  
> >> diff --git a/drivers/infiniband/hw/i40iw/i40iw.h b/drivers/infiniband/hw/i40iw/i40iw.h
> >> index 8feec35f95a7..3197e3536d5c 100644
> >> --- a/drivers/infiniband/hw/i40iw/i40iw.h
> >> +++ b/drivers/infiniband/hw/i40iw/i40iw.h
> >> @@ -57,7 +57,7 @@
> >>  #include "i40iw_d.h"
> >>  #include "i40iw_hmc.h"
> >>  
> >> -#include <i40e_client.h>
> >> +#include <linux/net/intel/i40e_client.h>
> >>  #include "i40iw_type.h"
> >>  #include "i40iw_p.h"
> >>  #include <rdma/i40iw-abi.h>
> >> diff --git a/drivers/net/ethernet/intel/Kconfig b/drivers/net/ethernet/intel/Kconfig
> >> index b88328fea1d0..8595f578fbe7 100644
> >> --- a/drivers/net/ethernet/intel/Kconfig
> >> +++ b/drivers/net/ethernet/intel/Kconfig
> >> @@ -241,6 +241,7 @@ config I40E
> >>  	tristate "Intel(R) Ethernet Controller XL710 Family support"
> >>  	imply PTP_1588_CLOCK
> >>  	depends on PCI
> >> +	select VIRTUAL_BUS
> >>  	---help---
> >>  	  This driver supports Intel(R) Ethernet Controller XL710 Family of
> >>  	  devices.  For more information on how to identify your adapter, go
> >> diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h
> >> index cb6367334ca7..4321e81d347c 100644
> >> --- a/drivers/net/ethernet/intel/i40e/i40e.h
> >> +++ b/drivers/net/ethernet/intel/i40e/i40e.h
> >> @@ -38,7 +38,7 @@
> >>  #include <net/xdp_sock.h>
> >>  #include "i40e_type.h"
> >>  #include "i40e_prototype.h"
> >> -#include "i40e_client.h"
> >> +#include <linux/net/intel/i40e_client.h>
> >>  #include <linux/avf/virtchnl.h>
> >>  #include "i40e_virtchnl_pf.h"
> >>  #include "i40e_txrx.h"
> >> @@ -655,6 +655,7 @@ struct i40e_pf {
> >>  	u16 last_sw_conf_valid_flags;
> >>  	/* List to keep previous DDP profiles to be rolled back in the future */
> >>  	struct list_head ddp_old_prof;
> >> +	int peer_idx;
> >>  };
> >>  
> >>  /**
> >> diff --git a/drivers/net/ethernet/intel/i40e/i40e_client.c b/drivers/net/ethernet/intel/i40e/i40e_client.c
> >> index e81530ca08d0..a3dee729719b 100644
> >> --- a/drivers/net/ethernet/intel/i40e/i40e_client.c
> >> +++ b/drivers/net/ethernet/intel/i40e/i40e_client.c
> >> @@ -1,12 +1,12 @@
> >>  // SPDX-License-Identifier: GPL-2.0
> >>  /* Copyright(c) 2013 - 2018 Intel Corporation. */
> >>  
> >> +#include <linux/net/intel/i40e_client.h>
> >>  #include <linux/list.h>
> >>  #include <linux/errno.h>
> >>  
> >>  #include "i40e.h"
> >>  #include "i40e_prototype.h"
> >> -#include "i40e_client.h"
> >>  
> >>  static const char i40e_client_interface_version_str[] = I40E_CLIENT_VERSION_STR;
> >>  static struct i40e_client *registered_client;
> >> @@ -30,11 +30,17 @@ static int i40e_client_update_vsi_ctxt(struct i40e_info *ldev,
> >>  				       bool is_vf, u32 vf_id,
> >>  				       u32 flag, u32 valid_flag);
> >>  
> >> +static int i40e_client_device_register(struct i40e_info *ldev);
> >> +
> >> +static void i40e_client_device_unregister(struct i40e_info *ldev);
> >> +
> >>  static struct i40e_ops i40e_lan_ops = {
> >>  	.virtchnl_send = i40e_client_virtchnl_send,
> >>  	.setup_qvlist = i40e_client_setup_qvlist,
> >>  	.request_reset = i40e_client_request_reset,
> >>  	.update_vsi_ctxt = i40e_client_update_vsi_ctxt,
> >> +	.client_device_register = i40e_client_device_register,
> >> +	.client_device_unregister = i40e_client_device_unregister,
> >>  };
> >>  
> >>  /**
> >> @@ -275,6 +281,27 @@ void i40e_client_update_msix_info(struct i40e_pf *pf)
> >>  	cdev->lan_info.msix_entries = &pf->msix_entries[pf->iwarp_base_vector];
> >>  }
> >>  
> >> +static int i40e_init_client_virtdev(struct i40e_pf *pf)
> >> +{
> >> +	struct i40e_info *ldev = &pf->cinst->lan_info;
> >> +	struct pci_dev *pdev = pf->pdev;
> >> +	struct virtbus_device *vdev;
> >> +	int ret;
> >> +
> >> +	vdev = &ldev->vdev;
> >> +	vdev->name = I40E_PEER_RDMA_NAME;
> >> +	vdev->dev.parent = &pf->pdev->dev;
> > 
> > What a total and complete mess of a tangled web you just wove here.
> > 
> > Ok, so you pass in a single pointer, that then dereferences 3 pointers
> > deep to find the pointer to the virtbus_device structure, but then you
> > point the parent of that device, back at the original structure's
> > sub-pointer's device itself.
> > 
> > WTF?
> > 
> > And who owns the memory of this thing that is supposed to be
> > dynamically controlled by something OUTSIDE of this driver?  Who created
> > that thing 3 pointers deep?  What happens when you leak the memory below
> > (hint, you did), and who is supposed to clean it up if you need to
> > properly clean it up if something bad happens?
> > 
> >> +
> >> +	ret = virtbus_dev_register(vdev);
> >> +	if (ret) {
> >> +		dev_err(&pdev->dev, "Failure adding client virtbus dev %s %d\n",
> >> +			I40E_PEER_RDMA_NAME, ret);
> > 
> > Again, the core should handle this, right?
> > 
> >> +		return ret;
> > 
> > Did you just leak memory?
> > 
> > Yup, you did, you never actually checked the return value of this
> > function :(
> > 
> > Ugh.
> > 
> > I feel like the virtual bus code is getting better, but this use of the
> > code, um, no, not ok.
> > 
> > Either way, this series is NOT ready to be merged anywhere, please do
> > not try to rush things.
> > 
> > Also, what ever happened to my "YOU ALL MUST AGREE TO WORK TOGETHER"
> > requirement between this group, and the other group trying to do the
> > same thing?  I want to see signed-off-by from EVERYONE involved before
> > we are going to consider this thing.
> 
> I am working on RFC where PCI device is sliced to create sub-functions.
> Each sub-function/slice is created dynamically by the user.
> User gives sf-number at creation time which will be used for plumbing by
> systemd/udev, devlink ports.

That sounds exactly what is wanted here as well, right?

> This sub-function will have sysfs attributes = sfnumber, irq vectors,
> PCI BAR resource files.
> sfnumber as sysfs file will be used by systemd/udev to have
> deterministic names of netdev and rdma device created on top of
> sub-function's 'struct device'.
> 
> As opposed to that, matching service devices won't have such attributes.
> 
> We stayed away from using mdev bus for such dual purpose in past.

That is good.

> Should we have virtbus that holds 'struct device' created for different
> purpose and have different sysfs attributes? Is it ok?

That's fine to do, I was expecting that to happen.

thanks,

greg k-h
Parav Pandit Dec. 16, 2019, 8:36 a.m. UTC | #7
On 12/16/2019 12:45 PM, Greg KH wrote:
> On Mon, Dec 16, 2019 at 03:48:05AM +0000, Parav Pandit wrote:
[..]
>>> I feel like the virtual bus code is getting better, but this use of the
>>> code, um, no, not ok.
>>>
>>> Either way, this series is NOT ready to be merged anywhere, please do
>>> not try to rush things.
>>>
>>> Also, what ever happened to my "YOU ALL MUST AGREE TO WORK TOGETHER"
>>> requirement between this group, and the other group trying to do the
>>> same thing?  I want to see signed-off-by from EVERYONE involved before
>>> we are going to consider this thing.
>>
>> I am working on RFC where PCI device is sliced to create sub-functions.
>> Each sub-function/slice is created dynamically by the user.
>> User gives sf-number at creation time which will be used for plumbing by
>> systemd/udev, devlink ports.
> 
> That sounds exactly what is wanted here as well, right?

Not exactly.
Here, in i40 use case - there is a PCI function.
This PCI function is used by two drivers:
(1) vendor_foo_netdev.ko creating Netdevice (class net)
(2) vendor_foo_rdma.ko creating RDMA device (class infiniband)

And both drivers are notified using matching service virtbus, which
attempts to create to two virtbus_devices with different driver-id, one
for each class of device.

However, devices of both class (net, infiniband) will have parent device
as PCI device.

In case of sub-functions, created rdma and netdevice will have parent as
the sub-function 'struct device'. This way those SFs gets their
systemd/udev plumbing done rightly.

> 
>> This sub-function will have sysfs attributes = sfnumber, irq vectors,
>> PCI BAR resource files.
>> sfnumber as sysfs file will be used by systemd/udev to have
>> deterministic names of netdev and rdma device created on top of
>> sub-function's 'struct device'.
>>
>> As opposed to that, matching service devices won't have such attributes.
>>
>> We stayed away from using mdev bus for such dual purpose in past.
> 
> That is good.
> 
>> Should we have virtbus that holds 'struct device' created for different
>> purpose and have different sysfs attributes? Is it ok?
> 
> That's fine to do, I was expecting that to happen.
> 
ok. Thanks a lot.
Lets understand above additional (non sysfs) difference as well on how
virtbus device is getting used differently between sub-functions and
matching service purposes.
Greg KH Dec. 16, 2019, 8:58 a.m. UTC | #8
On Mon, Dec 16, 2019 at 08:36:02AM +0000, Parav Pandit wrote:
> On 12/16/2019 12:45 PM, Greg KH wrote:
> > On Mon, Dec 16, 2019 at 03:48:05AM +0000, Parav Pandit wrote:
> [..]
> >>> I feel like the virtual bus code is getting better, but this use of the
> >>> code, um, no, not ok.
> >>>
> >>> Either way, this series is NOT ready to be merged anywhere, please do
> >>> not try to rush things.
> >>>
> >>> Also, what ever happened to my "YOU ALL MUST AGREE TO WORK TOGETHER"
> >>> requirement between this group, and the other group trying to do the
> >>> same thing?  I want to see signed-off-by from EVERYONE involved before
> >>> we are going to consider this thing.
> >>
> >> I am working on RFC where PCI device is sliced to create sub-functions.
> >> Each sub-function/slice is created dynamically by the user.
> >> User gives sf-number at creation time which will be used for plumbing by
> >> systemd/udev, devlink ports.
> > 
> > That sounds exactly what is wanted here as well, right?
> 
> Not exactly.
> Here, in i40 use case - there is a PCI function.
> This PCI function is used by two drivers:
> (1) vendor_foo_netdev.ko creating Netdevice (class net)
> (2) vendor_foo_rdma.ko creating RDMA device (class infiniband)
> 
> And both drivers are notified using matching service virtbus, which
> attempts to create to two virtbus_devices with different driver-id, one
> for each class of device.

Yes, that is fine.

> However, devices of both class (net, infiniband) will have parent device
> as PCI device.

That is fine.

> In case of sub-functions, created rdma and netdevice will have parent as
> the sub-function 'struct device'. This way those SFs gets their
> systemd/udev plumbing done rightly.

huh?  The rdma and netdevice will have as their parent device the
virtdevice that is on the virtbus.  Not the PCI device's 'struct
device'.

thanks,

greg k-h
Parav Pandit Dec. 16, 2019, 9:17 a.m. UTC | #9
On 12/16/2019 2:28 PM, Greg KH wrote:
> On Mon, Dec 16, 2019 at 08:36:02AM +0000, Parav Pandit wrote:
>> On 12/16/2019 12:45 PM, Greg KH wrote:
>>> On Mon, Dec 16, 2019 at 03:48:05AM +0000, Parav Pandit wrote:
>> [..]
>>>>> I feel like the virtual bus code is getting better, but this use of the
>>>>> code, um, no, not ok.
>>>>>
>>>>> Either way, this series is NOT ready to be merged anywhere, please do
>>>>> not try to rush things.
>>>>>
>>>>> Also, what ever happened to my "YOU ALL MUST AGREE TO WORK TOGETHER"
>>>>> requirement between this group, and the other group trying to do the
>>>>> same thing?  I want to see signed-off-by from EVERYONE involved before
>>>>> we are going to consider this thing.
>>>>
>>>> I am working on RFC where PCI device is sliced to create sub-functions.
>>>> Each sub-function/slice is created dynamically by the user.
>>>> User gives sf-number at creation time which will be used for plumbing by
>>>> systemd/udev, devlink ports.
>>>
>>> That sounds exactly what is wanted here as well, right?
>>
>> Not exactly.
>> Here, in i40 use case - there is a PCI function.
>> This PCI function is used by two drivers:
>> (1) vendor_foo_netdev.ko creating Netdevice (class net)
>> (2) vendor_foo_rdma.ko creating RDMA device (class infiniband)
>>
>> And both drivers are notified using matching service virtbus, which
>> attempts to create to two virtbus_devices with different driver-id, one
>> for each class of device.
> 
> Yes, that is fine.
> 
>> However, devices of both class (net, infiniband) will have parent device
>> as PCI device.
> 
> That is fine.
> 
>> In case of sub-functions, created rdma and netdevice will have parent as
>> the sub-function 'struct device'. This way those SFs gets their
>> systemd/udev plumbing done rightly.
> 
> huh?  The rdma and netdevice will have as their parent device the
> virtdevice that is on the virtbus.  Not the PCI device's 'struct
> device'.
> 
Yes. I meant same when I said "sub-function 'struct device'", which is
nothing bug a virtdevice.

ok. Great. We are on same page now.

As we discussed, if sub-functions uses the bus, it will use the virtbus
and virtdevice device with above discussed differences.
Once I finish internal RFC review for sub-functions, will post it on the
list.
I do not expect RFC to finish before Christmas holidays.

Thanks,
Parav

> thanks,
> 
> greg k-h
>
Shiraz Saleem Dec. 18, 2019, 6:57 p.m. UTC | #10
> > > Subject: Re: [PATCH v3 04/20] i40e: Register a virtbus device to
> > > provide RDMA
[.....]

> > >
> > > And who owns the memory of this thing that is supposed to be
> > > dynamically controlled by something OUTSIDE of this driver?  Who
> > > created that thing 3 pointers deep?  What happens when you leak the
> > > memory below (hint, you did), and who is supposed to clean it up if
> > > you need to properly clean it up if something bad happens?
> >
> > The i40e_info object memory is tied to the PF driver.
> 
> What is a "PF"?

physical function.

> 
> > The object hierarchy is,
> >
> > i40e_pf: pointer to i40e_client_instance
> > 	----- i40e_client_instance: i40e_info
> > 		----- i40e_info: virtbus_device
> 
> So you are 3 pointers deep to get a structure that is dynamically controlled?  Why
> are those "3 pointers" not also represented in sysfs?
> You have a heiarchy within the kernel that is not being represented that way to
> userspace, why?
> 
> Hint, I think this is totally wrong, you need to rework this to be sane.
> 
> > For each PF, there is a client_instance object allocated.
> 
> Great, make it dynamic and in the device tree.
> 
> > The i40e_info object is populated and the virtbus_device hanging off this object
> is registered.
> 
> Great, make that dynamic and inthe device tree.
> 
> If you think this is too much, then your whole mess here is too much and needs to
> be made a lot simpler.
>

I think we can decouple the virtbus_device object from i40e_info object.

Instead allocate a i40e_virtbus_device object which contains
the virtbus_device and a pointer to i40e_info object for the
RDMA driver to consume on probe(). Register it the virtbus, and provide
a release callback to free up its memory.

Sending a patch snippet to hopefully make it clearer.

diff --git a/drivers/net/ethernet/intel/i40e/i40e_client.c b/drivers/net/ethernet/intel/i40e/i40e_client.c
index b80ffaf..c6cf2eb 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_client.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_client.c
@@ -281,6 +281,42 @@ void i40e_client_update_msix_info(struct i40e_pf *pf)
 	cdev->lan_info.msix_entries = &pf->msix_entries[pf->iwarp_base_vector];
 }
 
+static void i40e_virtdev_release(struct device *dev)
+{
+	struct virtbus_device *vdev = container_of(dev, struct virtbus_device, dev);
+	struct i40e_virtbus_device *i40e_vdev =
+		container_of(vdev, struct i40e_virtbus_device, vdev);
+
+	kfree(i40e_vdev);
+}
+
+static int i40e_init_client_virtdev(struct i40e_info *ldev)
+{
+	struct pci_dev *pdev = ldev->pcidev;
+	struct i40e_virtbus_device *i40e_vdev;
+	int ret;
+
+	i40e_vdev = kzalloc(sizeof(*i40e_vdev), GFP_KERNEL);
+	if (!i40e_vdev)
+		return -ENOMEM;
+
+	i40e_vdev->vdev.name = I40E_PEER_RDMA_NAME;
+	i40e_vdev->vdev.dev.parent = &pdev->dev;
+	i40e_vdev->vdev.dev.release = i40e_virtdev_release;
+	i40e_vdev->ldev = ldev;
+
+	ret = virtbus_dev_register(&i40e_vdev->vdev);
+	if (ret)
+		/* TBD: Assuming virtbus_dev_register() does put_device
+		 * on err clean-up
+		 */
+		return ret;
+
+	ldev->vdev = &i40e_vdev->vdev;
+
+	return 0;
+}
+
 /**
  * i40e_client_add_instance - add a client instance struct to the instance list
  * @pf: pointer to the board struct
@@ -313,11 +349,8 @@ static void i40e_client_add_instance(struct i40e_pf *pf)
 	cdev->lan_info.fw_build = pf->hw.aq.fw_build;
 	set_bit(__I40E_CLIENT_INSTANCE_NONE, &cdev->state);
 
-	if (i40e_client_get_params(vsi, &cdev->lan_info.params)) {
-		kfree(cdev);
-		cdev = NULL;
-		return;
-	}
+	if (i40e_client_get_params(vsi, &cdev->lan_info.params))
+		goto free_cdev;
 
 	mac = list_first_entry(&cdev->lan_info.netdev->dev_addrs.list,
 			       struct netdev_hw_addr, list);
@@ -332,6 +365,14 @@ static void i40e_client_add_instance(struct i40e_pf *pf)
 	cdev->lan_info.msix_count = pf->num_iwarp_msix;
 	cdev->lan_info.msix_entries = &pf->msix_entries[pf->iwarp_base_vector];
 
+	if (i40e_init_client_virtdev(&cdev->lan_info))
+		goto free_cdev;
+
+	return;
+
+free_cdev:
+	kfree(cdev);
+	cdev = NULL;
 }
 
 /**
@@ -452,7 +493,7 @@ int i40e_lan_del_device(struct i40e_pf *pf)
 	struct i40e_device *ldev, *tmp;
 	int ret = -ENODEV;
 
-	virtbus_dev_unregister(&pf->cinst->lan_info.vdev);
+	virtbus_dev_unregister(pf->cinst->lan_info.vdev);
 
 	/* First, remove any client instance. */
 	i40e_client_del_instance(pf);
diff --git a/include/linux/net/intel/i40e_client.h b/include/linux/net/intel/i40e_client.h
index 7e147d3..5c81261 100644
--- a/include/linux/net/intel/i40e_client.h
+++ b/include/linux/net/intel/i40e_client.h
@@ -83,11 +83,11 @@ struct i40e_params {
 
 /* Structure to hold Lan device info for a client device */
 struct i40e_info {
 	struct i40e_client_version version;
 	u8 lanmac[6];
 	struct net_device *netdev;
 	struct pci_dev *pcidev;
+	struct virtbus_device *vdev;
 	u8 __iomem *hw_addr;
 	u8 fid;	/* function id, PF id or VF id */
 #define I40E_CLIENT_FTYPE_PF 0
@@ -112,6 +112,11 @@ struct i40e_info {
 	u32 fw_build;                   /* firmware build number */
 };
 
+struct i40e_virtbus_device {
+	struct virtbus_device vdev;
+	struct i40e_info *ldev;
+};
+

Is this more in line with correct usage and what you're expecting?

Shiraz
Jason Gunthorpe Dec. 18, 2019, 7:20 p.m. UTC | #11
On Wed, Dec 18, 2019 at 06:57:10PM +0000, Saleem, Shiraz wrote:
> diff --git a/include/linux/net/intel/i40e_client.h b/include/linux/net/intel/i40e_client.h
> index 7e147d3..5c81261 100644
> +++ b/include/linux/net/intel/i40e_client.h
> @@ -83,11 +83,11 @@ struct i40e_params {
>  
>  /* Structure to hold Lan device info for a client device */
>  struct i40e_info {
>  	struct i40e_client_version version;

I hope this isn't the inter-module versioning stuff we already Nak'd?

>  	u8 lanmac[6];

Is this different from the mac reachable from the netdev?

>  	struct net_device *netdev;
>  	struct pci_dev *pcidev;
> +	struct virtbus_device *vdev;

If there is only one of these per virtbus_device then why do we need
to split the structure?

>  	u8 __iomem *hw_addr;
>  	u8 fid;	/* function id, PF id or VF id */
>  #define I40E_CLIENT_FTYPE_PF 0
> @@ -112,6 +112,11 @@ struct i40e_info {
>  	u32 fw_build;                   /* firmware build number */
>  };
>  
> +struct i40e_virtbus_device {
> +	struct virtbus_device vdev;
> +	struct i40e_info *ldev;

Is the lifetime actually any better? Will ldev be freed and left
danling before virtbus_device is released?

Jason
Greg KH Dec. 19, 2019, 8:46 a.m. UTC | #12
On Wed, Dec 18, 2019 at 06:57:10PM +0000, Saleem, Shiraz wrote:
> > > > Subject: Re: [PATCH v3 04/20] i40e: Register a virtbus device to
> > > > provide RDMA
> [.....]
> 
> > > >
> > > > And who owns the memory of this thing that is supposed to be
> > > > dynamically controlled by something OUTSIDE of this driver?  Who
> > > > created that thing 3 pointers deep?  What happens when you leak the
> > > > memory below (hint, you did), and who is supposed to clean it up if
> > > > you need to properly clean it up if something bad happens?
> > >
> > > The i40e_info object memory is tied to the PF driver.
> > 
> > What is a "PF"?
> 
> physical function.
> 
> > 
> > > The object hierarchy is,
> > >
> > > i40e_pf: pointer to i40e_client_instance
> > > 	----- i40e_client_instance: i40e_info
> > > 		----- i40e_info: virtbus_device
> > 
> > So you are 3 pointers deep to get a structure that is dynamically controlled?  Why
> > are those "3 pointers" not also represented in sysfs?
> > You have a heiarchy within the kernel that is not being represented that way to
> > userspace, why?
> > 
> > Hint, I think this is totally wrong, you need to rework this to be sane.
> > 
> > > For each PF, there is a client_instance object allocated.
> > 
> > Great, make it dynamic and in the device tree.
> > 
> > > The i40e_info object is populated and the virtbus_device hanging off this object
> > is registered.
> > 
> > Great, make that dynamic and inthe device tree.
> > 
> > If you think this is too much, then your whole mess here is too much and needs to
> > be made a lot simpler.
> >
> 
> I think we can decouple the virtbus_device object from i40e_info object.
> 
> Instead allocate a i40e_virtbus_device object which contains
> the virtbus_device and a pointer to i40e_info object for the
> RDMA driver to consume on probe(). Register it the virtbus, and provide
> a release callback to free up its memory.
> 
> Sending a patch snippet to hopefully make it clearer.

Yes, this looks a little bit more sane.

greg k-h
Shiraz Saleem Jan. 2, 2020, 4:01 p.m. UTC | #13
> Subject: Re: [PATCH v3 04/20] i40e: Register a virtbus device to provide RDMA
> 
> On Wed, Dec 18, 2019 at 06:57:10PM +0000, Saleem, Shiraz wrote:
> > diff --git a/include/linux/net/intel/i40e_client.h
> > b/include/linux/net/intel/i40e_client.h
> > index 7e147d3..5c81261 100644
> > +++ b/include/linux/net/intel/i40e_client.h
> > @@ -83,11 +83,11 @@ struct i40e_params {
> >
> >  /* Structure to hold Lan device info for a client device */  struct
> > i40e_info {
> >  	struct i40e_client_version version;
> 
> I hope this isn't the inter-module versioning stuff we already Nak'd?

This is not used in irdma for checking any versioning.

This and the version in struct i40e_client are not cleaned up because they are used
between i40e <--> i40iw. And the thought was once i40iw is removed, we can send
a follow on patch to remove this versioning + the global
exported functions i40e_register_client() and i40e_unregister_client() used by i40iw. 

For the ice driver, we have already removed all versioning.

> 
> >  	u8 lanmac[6];
> 
> Is this different from the mac reachable from the netdev?

Hmmm... I think not. This should be possible to get from netdev. We can send a cleanup follow on patch.


> > +struct i40e_virtbus_device {
> > +	struct virtbus_device vdev;
> > +	struct i40e_info *ldev;
> 
> Is the lifetime actually any better? Will ldev be freed and left danling before
> virtbus_device is released?
> 
The consumer of the ldev (i.e the rdma driver) will have a
valid ldev when drv->remove() is called as part of the virtbus
device unregister.
It is probably good to set this ldev pointer inside the i40e_virtbus_device
object to NULL in the netdev driver soon after virtbus device unregister.
diff mbox series

Patch

diff --git a/drivers/infiniband/hw/i40iw/Makefile b/drivers/infiniband/hw/i40iw/Makefile
index 8942f8229945..34da9eba8a7c 100644
--- a/drivers/infiniband/hw/i40iw/Makefile
+++ b/drivers/infiniband/hw/i40iw/Makefile
@@ -1,5 +1,4 @@ 
 # SPDX-License-Identifier: GPL-2.0
-ccflags-y :=  -I $(srctree)/drivers/net/ethernet/intel/i40e
 
 obj-$(CONFIG_INFINIBAND_I40IW) += i40iw.o
 
diff --git a/drivers/infiniband/hw/i40iw/i40iw.h b/drivers/infiniband/hw/i40iw/i40iw.h
index 8feec35f95a7..3197e3536d5c 100644
--- a/drivers/infiniband/hw/i40iw/i40iw.h
+++ b/drivers/infiniband/hw/i40iw/i40iw.h
@@ -57,7 +57,7 @@ 
 #include "i40iw_d.h"
 #include "i40iw_hmc.h"
 
-#include <i40e_client.h>
+#include <linux/net/intel/i40e_client.h>
 #include "i40iw_type.h"
 #include "i40iw_p.h"
 #include <rdma/i40iw-abi.h>
diff --git a/drivers/net/ethernet/intel/Kconfig b/drivers/net/ethernet/intel/Kconfig
index b88328fea1d0..8595f578fbe7 100644
--- a/drivers/net/ethernet/intel/Kconfig
+++ b/drivers/net/ethernet/intel/Kconfig
@@ -241,6 +241,7 @@  config I40E
 	tristate "Intel(R) Ethernet Controller XL710 Family support"
 	imply PTP_1588_CLOCK
 	depends on PCI
+	select VIRTUAL_BUS
 	---help---
 	  This driver supports Intel(R) Ethernet Controller XL710 Family of
 	  devices.  For more information on how to identify your adapter, go
diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h
index cb6367334ca7..4321e81d347c 100644
--- a/drivers/net/ethernet/intel/i40e/i40e.h
+++ b/drivers/net/ethernet/intel/i40e/i40e.h
@@ -38,7 +38,7 @@ 
 #include <net/xdp_sock.h>
 #include "i40e_type.h"
 #include "i40e_prototype.h"
-#include "i40e_client.h"
+#include <linux/net/intel/i40e_client.h>
 #include <linux/avf/virtchnl.h>
 #include "i40e_virtchnl_pf.h"
 #include "i40e_txrx.h"
@@ -655,6 +655,7 @@  struct i40e_pf {
 	u16 last_sw_conf_valid_flags;
 	/* List to keep previous DDP profiles to be rolled back in the future */
 	struct list_head ddp_old_prof;
+	int peer_idx;
 };
 
 /**
diff --git a/drivers/net/ethernet/intel/i40e/i40e_client.c b/drivers/net/ethernet/intel/i40e/i40e_client.c
index e81530ca08d0..a3dee729719b 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_client.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_client.c
@@ -1,12 +1,12 @@ 
 // SPDX-License-Identifier: GPL-2.0
 /* Copyright(c) 2013 - 2018 Intel Corporation. */
 
+#include <linux/net/intel/i40e_client.h>
 #include <linux/list.h>
 #include <linux/errno.h>
 
 #include "i40e.h"
 #include "i40e_prototype.h"
-#include "i40e_client.h"
 
 static const char i40e_client_interface_version_str[] = I40E_CLIENT_VERSION_STR;
 static struct i40e_client *registered_client;
@@ -30,11 +30,17 @@  static int i40e_client_update_vsi_ctxt(struct i40e_info *ldev,
 				       bool is_vf, u32 vf_id,
 				       u32 flag, u32 valid_flag);
 
+static int i40e_client_device_register(struct i40e_info *ldev);
+
+static void i40e_client_device_unregister(struct i40e_info *ldev);
+
 static struct i40e_ops i40e_lan_ops = {
 	.virtchnl_send = i40e_client_virtchnl_send,
 	.setup_qvlist = i40e_client_setup_qvlist,
 	.request_reset = i40e_client_request_reset,
 	.update_vsi_ctxt = i40e_client_update_vsi_ctxt,
+	.client_device_register = i40e_client_device_register,
+	.client_device_unregister = i40e_client_device_unregister,
 };
 
 /**
@@ -275,6 +281,27 @@  void i40e_client_update_msix_info(struct i40e_pf *pf)
 	cdev->lan_info.msix_entries = &pf->msix_entries[pf->iwarp_base_vector];
 }
 
+static int i40e_init_client_virtdev(struct i40e_pf *pf)
+{
+	struct i40e_info *ldev = &pf->cinst->lan_info;
+	struct pci_dev *pdev = pf->pdev;
+	struct virtbus_device *vdev;
+	int ret;
+
+	vdev = &ldev->vdev;
+	vdev->name = I40E_PEER_RDMA_NAME;
+	vdev->dev.parent = &pf->pdev->dev;
+
+	ret = virtbus_dev_register(vdev);
+	if (ret) {
+		dev_err(&pdev->dev, "Failure adding client virtbus dev %s %d\n",
+			I40E_PEER_RDMA_NAME, ret);
+		return ret;
+	}
+
+	return 0;
+}
+
 /**
  * i40e_client_add_instance - add a client instance struct to the instance list
  * @pf: pointer to the board struct
@@ -288,9 +315,6 @@  static void i40e_client_add_instance(struct i40e_pf *pf)
 	struct netdev_hw_addr *mac = NULL;
 	struct i40e_vsi *vsi = pf->vsi[pf->lan_vsi];
 
-	if (!registered_client || pf->cinst)
-		return;
-
 	cdev = kzalloc(sizeof(*cdev), GFP_KERNEL);
 	if (!cdev)
 		return;
@@ -326,7 +350,11 @@  static void i40e_client_add_instance(struct i40e_pf *pf)
 	cdev->client = registered_client;
 	pf->cinst = cdev;
 
-	i40e_client_update_msix_info(pf);
+	cdev->lan_info.msix_count = pf->num_iwarp_msix;
+	cdev->lan_info.msix_entries = &pf->msix_entries[pf->iwarp_base_vector];
+
+	i40e_init_client_virtdev(pf);
+	set_bit(__I40E_CLIENT_INSTANCE_NONE, &cdev->state);
 }
 
 /**
@@ -347,7 +375,7 @@  void i40e_client_del_instance(struct i40e_pf *pf)
  **/
 void i40e_client_subtask(struct i40e_pf *pf)
 {
-	struct i40e_client *client = registered_client;
+	struct i40e_client *client;
 	struct i40e_client_instance *cdev;
 	struct i40e_vsi *vsi = pf->vsi[pf->lan_vsi];
 	int ret = 0;
@@ -361,9 +389,11 @@  void i40e_client_subtask(struct i40e_pf *pf)
 	    test_bit(__I40E_CONFIG_BUSY, pf->state))
 		return;
 
-	if (!client || !cdev)
+	if (!cdev || !cdev->client)
 		return;
 
+	client = cdev->client;
+
 	/* Here we handle client opens. If the client is down, and
 	 * the netdev is registered, then open the client.
 	 */
@@ -424,16 +454,8 @@  int i40e_lan_add_device(struct i40e_pf *pf)
 		 pf->hw.pf_id, pf->hw.bus.bus_id,
 		 pf->hw.bus.device, pf->hw.bus.func);
 
-	/* If a client has already been registered, we need to add an instance
-	 * of it to our new LAN device.
-	 */
-	if (registered_client)
-		i40e_client_add_instance(pf);
+	i40e_client_add_instance(pf);
 
-	/* Since in some cases register may have happened before a device gets
-	 * added, we can schedule a subtask to go initiate the clients if
-	 * they can be launched at probe time.
-	 */
 	set_bit(__I40E_CLIENT_SERVICE_REQUESTED, pf->state);
 	i40e_service_event_schedule(pf);
 
@@ -453,6 +475,8 @@  int i40e_lan_del_device(struct i40e_pf *pf)
 	struct i40e_device *ldev, *tmp;
 	int ret = -ENODEV;
 
+	virtbus_dev_unregister(&pf->cinst->lan_info.vdev);
+
 	/* First, remove any client instance. */
 	i40e_client_del_instance(pf);
 
@@ -733,6 +757,59 @@  static int i40e_client_update_vsi_ctxt(struct i40e_info *ldev,
 	return err;
 }
 
+static int i40e_client_device_register(struct i40e_info *ldev)
+{
+	struct i40e_client *client;
+	struct i40e_pf *pf;
+
+	if (!ldev) {
+		pr_err("Failed to reg client dev: ldev ptr NULL\n");
+		return -EINVAL;
+	}
+
+	client = ldev->client;
+	pf = ldev->pf;
+	if (!client) {
+		pr_err("Failed to reg client dev: client ptr NULL\n");
+		return -EINVAL;
+	}
+
+	if (!ldev->ops || !client->ops) {
+		pr_err("Failed to reg client dev: client dev peer_ops/ops NULL\n");
+		return -EINVAL;
+	}
+
+	pf->cinst->client = ldev->client;
+	set_bit(__I40E_CLIENT_SERVICE_REQUESTED, pf->state);
+	i40e_service_event_schedule(pf);
+
+	return 0;
+}
+
+static void i40e_client_device_unregister(struct i40e_info *ldev)
+{
+	struct i40e_pf *pf = ldev->pf;
+	struct i40e_client_instance *cdev = pf->cinst;
+
+	while (test_and_set_bit(__I40E_SERVICE_SCHED, pf->state))
+		usleep_range(500, 1000);
+
+	if (!cdev || !cdev->client || !cdev->client->ops ||
+	    !cdev->client->ops->close) {
+		dev_err(&pf->pdev->dev, "Cannot close client device\n");
+		return;
+	}
+	cdev->client->ops->close(&cdev->lan_info, cdev->client, false);
+	clear_bit(__I40E_CLIENT_INSTANCE_OPENED, &cdev->state);
+	i40e_client_release_qvlist(&cdev->lan_info);
+	pf->cinst->client = NULL;
+	clear_bit(__I40E_SERVICE_SCHED, pf->state);
+}
+
+/* Retain legacy global registration/unregistration calls till i40iw is
+ * deprecated from the kernel. The irdma unified driver does not use these
+ * exported symbols.
+ */
 /**
  * i40e_register_client - Register a i40e client driver with the L2 driver
  * @client: pointer to the i40e_client struct
diff --git a/drivers/net/ethernet/intel/i40e/i40e_client.h b/include/linux/net/intel/i40e_client.h
similarity index 92%
rename from drivers/net/ethernet/intel/i40e/i40e_client.h
rename to include/linux/net/intel/i40e_client.h
index 72994baf4941..1ca9c1eae60e 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_client.h
+++ b/include/linux/net/intel/i40e_client.h
@@ -4,6 +4,9 @@ 
 #ifndef _I40E_CLIENT_H_
 #define _I40E_CLIENT_H_
 
+#include <linux/virtual_bus.h>
+
+#define I40E_PEER_RDMA_NAME	"i40e_rdma"
 #define I40E_CLIENT_STR_LENGTH 10
 
 /* Client interface version should be updated anytime there is a change in the
@@ -41,8 +44,8 @@  struct i40e_client;
  * In order for us to keep the interface simple, SW will define a
  * unique type value for AEQ.
  */
-#define I40E_QUEUE_TYPE_PE_AEQ  0x80
-#define I40E_QUEUE_INVALID_IDX	0xFFFF
+#define I40E_QUEUE_TYPE_PE_AEQ 0x80
+#define I40E_QUEUE_INVALID_IDX 0xFFFF
 
 struct i40e_qv_info {
 	u32 v_idx; /* msix_vector */
@@ -67,7 +70,7 @@  struct i40e_prio_qos_params {
 	u8 reserved;
 };
 
-#define I40E_CLIENT_MAX_USER_PRIORITY        8
+#define I40E_CLIENT_MAX_USER_PRIORITY 8
 /* Struct to hold Client QoS */
 struct i40e_qos_params {
 	struct i40e_prio_qos_params prio_qos[I40E_CLIENT_MAX_USER_PRIORITY];
@@ -80,12 +83,13 @@  struct i40e_params {
 
 /* Structure to hold Lan device info for a client device */
 struct i40e_info {
+	struct virtbus_device vdev;
 	struct i40e_client_version version;
 	u8 lanmac[6];
 	struct net_device *netdev;
 	struct pci_dev *pcidev;
 	u8 __iomem *hw_addr;
-	u8 fid;	/* function id, PF id or VF id */
+	u8 fid; /* function id, PF id or VF id */
 #define I40E_CLIENT_FTYPE_PF 0
 #define I40E_CLIENT_FTYPE_VF 1
 	u8 ftype; /* function type, PF or VF */
@@ -97,8 +101,9 @@  struct i40e_info {
 	struct i40e_qvlist_info *qvlist_info;
 	struct i40e_params params;
 	struct i40e_ops *ops;
+	struct i40e_client *client;
 
-	u16 msix_count;	 /* number of msix vectors*/
+	u16 msix_count;  /* number of msix vectors*/
 	/* Array down below will be dynamically allocated based on msix_count */
 	struct msix_entry *msix_entries;
 	u16 itr_index; /* Which ITR index the PE driver is suppose to use */
@@ -132,6 +137,11 @@  struct i40e_ops {
 			       struct i40e_client *client,
 			       bool is_vf, u32 vf_id,
 			       u32 flag, u32 valid_flag);
+
+	int (*client_device_register)(struct i40e_info *ldev);
+
+	void (*client_device_unregister)(struct i40e_info *ldev);
+
 };
 
 struct i40e_client_ops {