diff mbox

[RFC,v1,for,accelerated,IPoIB,04/25] IB/verb: Add ipoib_options struct and API

Message ID 1489429896-10781-5-git-send-email-erezsh@mellanox.com (mailing list archive)
State RFC
Headers show

Commit Message

Erez Shitrit March 13, 2017, 6:31 p.m. UTC
The idea is to allow vendors to optimize IPoIB data path.
New struct that includes functions and data member is exposed.
It exposes set of callback functions for handling data path flows in IPoIB driver.
Each vendor can support these set of functions in order to optimize its
specific data path, and let IPoIB to leverage its data path.
The code of IPoIB driver was changed accordingly, and works in both ways
with vendor specific implementation and without.
There is an assumption, that vendors should give the full set of functions
and not only part of them, in order to work properly.

Signed-off-by: Erez Shitrit <erezsh@mellanox.com>
---
 include/rdma/ib_ipoib_accel_ops.h | 59 +++++++++++++++++++++++++++++++++++++++
 include/rdma/ib_verbs.h           | 36 ++++++++++++++++++++++++
 2 files changed, 95 insertions(+)
 create mode 100644 include/rdma/ib_ipoib_accel_ops.h

Comments

Jason Gunthorpe March 13, 2017, 8:01 p.m. UTC | #1
On Mon, Mar 13, 2017 at 08:31:15PM +0200, Erez Shitrit wrote:

> diff --git a/include/rdma/ib_ipoib_accel_ops.h b/include/rdma/ib_ipoib_accel_ops.h
> new file mode 100644
> index 000000000000..148a5529a559
> +++ b/include/rdma/ib_ipoib_accel_ops.h

Both patches need a better naming scheme for this file..

rn_opa_vnic.h
rn_ipoib.h

Maybe?

> +struct rdma_netdev {
> +	void *clnt_priv;
> +
> +	/* control functions */
> +	void (*set_id)(struct net_device *netdev, int id);

> +	/* IB resource allocation function, returns new UD QP */
> +	int (*ib_dev_init)(struct net_device *dev, struct ib_device *hca,
> +			   int *qp_num);

Why can't some combination of alloc_rdma_netdev and ndo.open do this stuff?

> +	void (*ib_dev_cleanup)(struct net_device *dev, struct ib_device *hca);

Ditto

> +	/* send packet */
> +	void (*send)(struct net_device *dev, struct sk_buff *skb,
> +		     struct ipoib_ah *address, u32 dqpn, u32 dqkey);

> +	/* multicast */
> +	int (*attach_mcast)(struct net_device *dev, struct ib_device *hca,
> +			    union ib_gid *gid, u16 lid, int set_qkey);
> +	int (*detach_mcast)(struct net_device *dev, struct ib_device *hca,
> +			    union ib_gid *gid, u16 lid);

It would make more sense to store the struct ib_device pointer in the
struct rdma_netdev.

Should 'lid' be 'mlid'?

> +	int qp_num;

This one probably belongs in ipoib_rdma_netdev

> +	void *context;

What is this? Why is something other than ipoib_priv or ipoib_dev_priv
needed?


>  						struct ib_wq_attr *attr,
>  						u32 wq_attr_mask,
>  						struct ib_udata *udata);
> +	struct ib_ipoib_accel_ops * (*get_ipoib_accel_ops)(struct ib_device *device);

rebase error? Not sure how this compiles

Jason
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Niranjana Vishwanathapura March 14, 2017, 6:44 a.m. UTC | #2
On Mon, Mar 13, 2017 at 08:31:15PM +0200, Erez Shitrit wrote:
>+struct ipoib_rdma_netdev {
>+	struct rdma_netdev rn;  /* keep this first */
>+	/* followed by device private data */
>+	char *dev_priv[0];
>+};
>+
>+static inline void *ipoib_priv(const struct net_device *dev)
>+{
>+	struct rdma_netdev *rn = netdev_priv(dev);
>+
>+	return rn->clnt_priv;
>+}
>+
>+static inline void *ipoib_dev_priv(const struct net_device *dev)
>+{
>+	struct ipoib_rdma_netdev *ipoib_rn = netdev_priv(dev);
>+
>+	return ipoib_rn->dev_priv;
>+}
>+

It can be confusing to see return of ipoib_priv() getting assigned to 
ipoib_dev_priv (legacy name). May be we should change ipoib_dev_priv() to 
ipoib_hw_priv()?


>+#endif /* IB_IPOIB_ACCEL_OPS_H */
>diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
>index 85b9034c8cfc..9b090efccdba 100644
>--- a/include/rdma/ib_verbs.h
>+++ b/include/rdma/ib_verbs.h
>@@ -1901,6 +1901,41 @@ struct ib_port_immutable {
> 	u32                           max_mad_size;
> };
>
>+/* rdma netdev type - specifies protocol type */
>+enum rdma_netdev_t {
>+	RDMA_NETDEV_OPA_VNIC,
>+	RDMA_NETDEV_IPOIB
>+};
>+
>+struct ipoib_ah;
>+
>+/**
>+ * struct rdma_netdev - rdma netdev
>+ * For cases where netstack interfacing is required.
>+ */
>+struct rdma_netdev {
>+	void *clnt_priv;
>+
>+	/* control functions */
>+	void (*set_id)(struct net_device *netdev, int id);
>+	/* IB resource allocation function, returns new UD QP */
>+	int (*ib_dev_init)(struct net_device *dev, struct ib_device *hca,
>+			   int *qp_num);
>+	void (*ib_dev_cleanup)(struct net_device *dev, struct ib_device *hca);
>+
>+	/* send packet */
>+	void (*send)(struct net_device *dev, struct sk_buff *skb,
>+		     struct ipoib_ah *address, u32 dqpn, u32 dqkey);
>+
>+	/* multicast */
>+	int (*attach_mcast)(struct net_device *dev, struct ib_device *hca,
>+			    union ib_gid *gid, u16 lid, int set_qkey);
>+	int (*detach_mcast)(struct net_device *dev, struct ib_device *hca,
>+			    union ib_gid *gid, u16 lid);
>+	int qp_num;

May be ipoib_rdma_netdev structure is the right place for these functions?

>+	void *context;

No context should be necessary here.

>+};
>+
> struct ib_device {
> 	struct device                *dma_device;
>
>@@ -2149,6 +2184,7 @@ struct ib_device {
> 						struct ib_wq_attr *attr,
> 						u32 wq_attr_mask,
> 						struct ib_udata *udata);
>+	struct ib_ipoib_accel_ops * (*get_ipoib_accel_ops)(struct ib_device *device);

old code, needs fix.

Niranjana
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Niranjana Vishwanathapura March 14, 2017, 7:01 a.m. UTC | #3
On Mon, Mar 13, 2017 at 02:01:36PM -0600, Jason Gunthorpe wrote:
>> +	/* multicast */
>> +	int (*attach_mcast)(struct net_device *dev, struct ib_device *hca,
>> +			    union ib_gid *gid, u16 lid, int set_qkey);
>> +	int (*detach_mcast)(struct net_device *dev, struct ib_device *hca,
>> +			    union ib_gid *gid, u16 lid);
>
>It would make more sense to store the struct ib_device pointer in the
>struct rdma_netdev.
>

Agree that it shouldn't be a function parameters.
For opa_vnic, I found it convenient to store ib_device pointer in client and 
device private structures as those will be available in most places anyhow.

Niranjana
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Erez Shitrit March 14, 2017, 1:25 p.m. UTC | #4
On Mon, Mar 13, 2017 at 10:01 PM, Jason Gunthorpe
<jgunthorpe@obsidianresearch.com> wrote:
> On Mon, Mar 13, 2017 at 08:31:15PM +0200, Erez Shitrit wrote:
>
>> diff --git a/include/rdma/ib_ipoib_accel_ops.h b/include/rdma/ib_ipoib_accel_ops.h
>> new file mode 100644
>> index 000000000000..148a5529a559
>> +++ b/include/rdma/ib_ipoib_accel_ops.h
>
> Both patches need a better naming scheme for this file..
>
> rn_opa_vnic.h
> rn_ipoib.h
>
> Maybe?

Can work for me.

vnic?

>
>> +struct rdma_netdev {
>> +     void *clnt_priv;
>> +
>> +     /* control functions */
>> +     void (*set_id)(struct net_device *netdev, int id);
>
>> +     /* IB resource allocation function, returns new UD QP */
>> +     int (*ib_dev_init)(struct net_device *dev, struct ib_device *hca,
>> +                        int *qp_num);
>
> Why can't some combination of alloc_rdma_netdev and ndo.open do this stuff?
>
>> +     void (*ib_dev_cleanup)(struct net_device *dev, struct ib_device *hca);
>
> Ditto
>
>> +     /* send packet */
>> +     void (*send)(struct net_device *dev, struct sk_buff *skb,
>> +                  struct ipoib_ah *address, u32 dqpn, u32 dqkey);
>
>> +     /* multicast */
>> +     int (*attach_mcast)(struct net_device *dev, struct ib_device *hca,
>> +                         union ib_gid *gid, u16 lid, int set_qkey);
>> +     int (*detach_mcast)(struct net_device *dev, struct ib_device *hca,
>> +                         union ib_gid *gid, u16 lid);
>
> It would make more sense to store the struct ib_device pointer in the
> struct rdma_netdev.

Yes, I can add the ib_device and the qp_number to the rdma_netdev, and
in that way will take out the extra parameters for these functions and
be able to use ndo_ops.

>
> Should 'lid' be 'mlid'?

yes. will change

>
>> +     int qp_num;
>
> This one probably belongs in ipoib_rdma_netdev
>
>> +     void *context;
>
> What is this? Why is something other than ipoib_priv or ipoib_dev_priv
> needed?

ipoib_priv is doesn't known in the lower layers, that context is used
to keep data (qp pointer in that case) that is not available
otherwise.

>
>
>>                                               struct ib_wq_attr *attr,
>>                                               u32 wq_attr_mask,
>>                                               struct ib_udata *udata);
>> +     struct ib_ipoib_accel_ops * (*get_ipoib_accel_ops)(struct ib_device *device);
>
> rebase error? Not sure how this compiles

yes, will take it out. (was compiled with that, probably because it is
not in used.)

>
> Jason
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Erez Shitrit March 14, 2017, 1:25 p.m. UTC | #5
On Tue, Mar 14, 2017 at 9:01 AM, Vishwanathapura, Niranjana
<niranjana.vishwanathapura@intel.com> wrote:
> On Mon, Mar 13, 2017 at 02:01:36PM -0600, Jason Gunthorpe wrote:
>>>
>>> +       /* multicast */
>>> +       int (*attach_mcast)(struct net_device *dev, struct ib_device
>>> *hca,
>>> +                           union ib_gid *gid, u16 lid, int set_qkey);
>>> +       int (*detach_mcast)(struct net_device *dev, struct ib_device
>>> *hca,
>>> +                           union ib_gid *gid, u16 lid);
>>
>>
>> It would make more sense to store the struct ib_device pointer in the
>> struct rdma_netdev.
>>
>
> Agree that it shouldn't be a function parameters.
> For opa_vnic, I found it convenient to store ib_device pointer in client and
> device private structures as those will be available in most places anyhow.

Will add it to the rdma_netdev obj, as Jason suggested.
Thanks,

>
> Niranjana
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jason Gunthorpe March 14, 2017, 4:11 p.m. UTC | #6
On Tue, Mar 14, 2017 at 12:01:09AM -0700, Vishwanathapura, Niranjana wrote:
> On Mon, Mar 13, 2017 at 02:01:36PM -0600, Jason Gunthorpe wrote:
> >>+	/* multicast */
> >>+	int (*attach_mcast)(struct net_device *dev, struct ib_device *hca,
> >>+			    union ib_gid *gid, u16 lid, int set_qkey);
> >>+	int (*detach_mcast)(struct net_device *dev, struct ib_device *hca,
> >>+			    union ib_gid *gid, u16 lid);
> >
> >It would make more sense to store the struct ib_device pointer in the
> >struct rdma_netdev.
> >
> 
> Agree that it shouldn't be a function parameters.
> For opa_vnic, I found it convenient to store ib_device pointer in client and
> device private structures as those will be available in most places anyhow.

If vnic uses it too, then lets add the ib_device and port num to
rdma_netdev itself?

Jason
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Niranjana Vishwanathapura March 14, 2017, 7:03 p.m. UTC | #7
On Tue, Mar 14, 2017 at 03:25:01PM +0200, Erez Shitrit wrote:
>On Mon, Mar 13, 2017 at 10:01 PM, Jason Gunthorpe
><jgunthorpe@obsidianresearch.com> wrote:
>> On Mon, Mar 13, 2017 at 08:31:15PM +0200, Erez Shitrit wrote:
>>
>>> diff --git a/include/rdma/ib_ipoib_accel_ops.h b/include/rdma/ib_ipoib_accel_ops.h
>>> new file mode 100644
>>> index 000000000000..148a5529a559
>>> +++ b/include/rdma/ib_ipoib_accel_ops.h
>>
>> Both patches need a better naming scheme for this file..
>>
>> rn_opa_vnic.h
>> rn_ipoib.h
>>
>> Maybe?
>
>Can work for me.
>
>vnic?
>

I do see why it is an issue for ipoib (we don't want two ipoib.h files). We can 
add rn_ prefix for vnic, but probably opa_vnic.h is good enough.

>>> +     void *context;
>>
>> What is this? Why is something other than ipoib_priv or ipoib_dev_priv
>> needed?
>
>ipoib_priv is doesn't known in the lower layers, that context is used
>to keep data (qp pointer in that case) that is not available
>otherwise.
>

lower layers should be using ipoib_dev_priv() and no other context is needed. 
which patch is using this 'context'?

Niranjana
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Leon Romanovsky March 15, 2017, 6:27 a.m. UTC | #8
On Tue, Mar 14, 2017 at 12:03:01PM -0700, Vishwanathapura, Niranjana wrote:
> On Tue, Mar 14, 2017 at 03:25:01PM +0200, Erez Shitrit wrote:
> > On Mon, Mar 13, 2017 at 10:01 PM, Jason Gunthorpe
> > <jgunthorpe@obsidianresearch.com> wrote:
> > > On Mon, Mar 13, 2017 at 08:31:15PM +0200, Erez Shitrit wrote:
> > >
> > > > diff --git a/include/rdma/ib_ipoib_accel_ops.h b/include/rdma/ib_ipoib_accel_ops.h
> > > > new file mode 100644
> > > > index 000000000000..148a5529a559
> > > > +++ b/include/rdma/ib_ipoib_accel_ops.h
> > >
> > > Both patches need a better naming scheme for this file..
> > >
> > > rn_opa_vnic.h
> > > rn_ipoib.h
> > >
> > > Maybe?
> >
> > Can work for me.
> >
> > vnic?
> >
>
> I do see why it is an issue for ipoib (we don't want two ipoib.h files). We
> can add rn_ prefix for vnic, but probably opa_vnic.h is good enough.

Just for the consistency, I would like to see the same "rn_" prefix
for both files (ipoib and opa_vnic).

Thanks
Leon Romanovsky March 15, 2017, 6:30 a.m. UTC | #9
On Tue, Mar 14, 2017 at 10:11:49AM -0600, Jason Gunthorpe wrote:
> On Tue, Mar 14, 2017 at 12:01:09AM -0700, Vishwanathapura, Niranjana wrote:
> > On Mon, Mar 13, 2017 at 02:01:36PM -0600, Jason Gunthorpe wrote:
> > >>+	/* multicast */
> > >>+	int (*attach_mcast)(struct net_device *dev, struct ib_device *hca,
> > >>+			    union ib_gid *gid, u16 lid, int set_qkey);
> > >>+	int (*detach_mcast)(struct net_device *dev, struct ib_device *hca,
> > >>+			    union ib_gid *gid, u16 lid);
> > >
> > >It would make more sense to store the struct ib_device pointer in the
> > >struct rdma_netdev.
> > >
> >
> > Agree that it shouldn't be a function parameters.
> > For opa_vnic, I found it convenient to store ib_device pointer in client and
> > device private structures as those will be available in most places anyhow.
>
> If vnic uses it too, then lets add the ib_device and port num to
> rdma_netdev itself?

Agree, at the end this rdma_netdev is intended for the drivers/infiniband
and it is better to have this binding (rdma_netdev and ib_device) as early as possible.

Thanks

>
> Jason
> --
> To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
Niranjana Vishwanathapura March 15, 2017, 4:40 p.m. UTC | #10
On Wed, Mar 15, 2017 at 08:30:43AM +0200, Leon Romanovsky wrote:
>On Tue, Mar 14, 2017 at 10:11:49AM -0600, Jason Gunthorpe wrote:
>> On Tue, Mar 14, 2017 at 12:01:09AM -0700, Vishwanathapura, Niranjana wrote:
>> > On Mon, Mar 13, 2017 at 02:01:36PM -0600, Jason Gunthorpe wrote:
>> > >>+	/* multicast */
>> > >>+	int (*attach_mcast)(struct net_device *dev, struct ib_device *hca,
>> > >>+			    union ib_gid *gid, u16 lid, int set_qkey);
>> > >>+	int (*detach_mcast)(struct net_device *dev, struct ib_device *hca,
>> > >>+			    union ib_gid *gid, u16 lid);
>> > >
>> > >It would make more sense to store the struct ib_device pointer in the
>> > >struct rdma_netdev.
>> > >
>> >
>> > Agree that it shouldn't be a function parameters.
>> > For opa_vnic, I found it convenient to store ib_device pointer in client and
>> > device private structures as those will be available in most places anyhow.
>>
>> If vnic uses it too, then lets add the ib_device and port num to
>> rdma_netdev itself?
>
>Agree, at the end this rdma_netdev is intended for the drivers/infiniband
>and it is better to have this binding (rdma_netdev and ib_device) as early as possible.
>

I agree with adding ibdev and port num to rdma_netdev.

Niranjana

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Niranjana Vishwanathapura March 15, 2017, 4:42 p.m. UTC | #11
On Wed, Mar 15, 2017 at 08:27:28AM +0200, Leon Romanovsky wrote:
>On Tue, Mar 14, 2017 at 12:03:01PM -0700, Vishwanathapura, Niranjana wrote:
>> On Tue, Mar 14, 2017 at 03:25:01PM +0200, Erez Shitrit wrote:
>> > On Mon, Mar 13, 2017 at 10:01 PM, Jason Gunthorpe
>> > <jgunthorpe@obsidianresearch.com> wrote:
>> > > On Mon, Mar 13, 2017 at 08:31:15PM +0200, Erez Shitrit wrote:
>> > >
>> > > > diff --git a/include/rdma/ib_ipoib_accel_ops.h b/include/rdma/ib_ipoib_accel_ops.h
>> > > > new file mode 100644
>> > > > index 000000000000..148a5529a559
>> > > > +++ b/include/rdma/ib_ipoib_accel_ops.h
>> > >
>> > > Both patches need a better naming scheme for this file..
>> > >
>> > > rn_opa_vnic.h
>> > > rn_ipoib.h
>> > >
>> > > Maybe?
>> >
>> > Can work for me.
>> >
>> > vnic?
>> >
>>
>> I do see why it is an issue for ipoib (we don't want two ipoib.h files). We
>> can add rn_ prefix for vnic, but probably opa_vnic.h is good enough.
>
>Just for the consistency, I would like to see the same "rn_" prefix
>for both files (ipoib and opa_vnic).
>
>Thanks

Fine with VNIC.

Niranjana

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Erez Shitrit March 15, 2017, 4:52 p.m. UTC | #12
On Wed, Mar 15, 2017 at 6:40 PM, Vishwanathapura, Niranjana
<niranjana.vishwanathapura@intel.com> wrote:
> On Wed, Mar 15, 2017 at 08:30:43AM +0200, Leon Romanovsky wrote:
>>
>> On Tue, Mar 14, 2017 at 10:11:49AM -0600, Jason Gunthorpe wrote:
>>>
>>> On Tue, Mar 14, 2017 at 12:01:09AM -0700, Vishwanathapura, Niranjana
>>> wrote:
>>> > On Mon, Mar 13, 2017 at 02:01:36PM -0600, Jason Gunthorpe wrote:
>>> > >>+   /* multicast */
>>> > >>+   int (*attach_mcast)(struct net_device *dev, struct ib_device
>>> > >> *hca,
>>> > >>+                       union ib_gid *gid, u16 lid, int set_qkey);
>>> > >>+   int (*detach_mcast)(struct net_device *dev, struct ib_device
>>> > >> *hca,
>>> > >>+                       union ib_gid *gid, u16 lid);
>>> > >
>>> > >It would make more sense to store the struct ib_device pointer in the
>>> > >struct rdma_netdev.
>>> > >
>>> >
>>> > Agree that it shouldn't be a function parameters.
>>> > For opa_vnic, I found it convenient to store ib_device pointer in
>>> > client and
>>> > device private structures as those will be available in most places
>>> > anyhow.
>>>
>>> If vnic uses it too, then lets add the ib_device and port num to
>>> rdma_netdev itself?
>>
>>
>> Agree, at the end this rdma_netdev is intended for the drivers/infiniband
>> and it is better to have this binding (rdma_netdev and ib_device) as early
>> as possible.
>>
>
> I agree with adding ibdev and port num to rdma_netdev.

Good. will do that.

>
> Niranjana
>
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Erez Shitrit March 16, 2017, 3:17 p.m. UTC | #13
On Mon, Mar 13, 2017 at 10:01 PM, Jason Gunthorpe
<jgunthorpe@obsidianresearch.com> wrote:
> On Mon, Mar 13, 2017 at 08:31:15PM +0200, Erez Shitrit wrote:
>
>> diff --git a/include/rdma/ib_ipoib_accel_ops.h b/include/rdma/ib_ipoib_accel_ops.h
>> new file mode 100644
>> index 000000000000..148a5529a559
>> +++ b/include/rdma/ib_ipoib_accel_ops.h
>
> Both patches need a better naming scheme for this file..
>
> rn_opa_vnic.h
> rn_ipoib.h
>
> Maybe?
>
>> +struct rdma_netdev {
>> +     void *clnt_priv;
>> +
>> +     /* control functions */
>> +     void (*set_id)(struct net_device *netdev, int id);
>
>> +     /* IB resource allocation function, returns new UD QP */
>> +     int (*ib_dev_init)(struct net_device *dev, struct ib_device *hca,
>> +                        int *qp_num);
>
> Why can't some combination of alloc_rdma_netdev and ndo.open do this stuff?
>
>> +     void (*ib_dev_cleanup)(struct net_device *dev, struct ib_device *hca);
>
> Ditto
>
>> +     /* send packet */
>> +     void (*send)(struct net_device *dev, struct sk_buff *skb,
>> +                  struct ipoib_ah *address, u32 dqpn, u32 dqkey);
>
>> +     /* multicast */
>> +     int (*attach_mcast)(struct net_device *dev, struct ib_device *hca,
>> +                         union ib_gid *gid, u16 lid, int set_qkey);
>> +     int (*detach_mcast)(struct net_device *dev, struct ib_device *hca,
>> +                         union ib_gid *gid, u16 lid);
>
> It would make more sense to store the struct ib_device pointer in the
> struct rdma_netdev.
>
> Should 'lid' be 'mlid'?
>
>> +     int qp_num;
>
> This one probably belongs in ipoib_rdma_netdev
>
>> +     void *context;

The QP as a part of the HW resources, it is created in the low-level
driver, and used by the upper ipoib for few reasons, (for example the
mac of the ipoib interface includes from the qp_num)
Now, if we want to use the ndo's init/uninit i need to store member
variables (qp_num and context) in the rdma_netdev, that will let me
use the ndos as is.
rdma_netdev is the one who belongs to both layers, ipoib and the low-level.

>
> What is this? Why is something other than ipoib_priv or ipoib_dev_priv
> needed?
>
>
>>                                               struct ib_wq_attr *attr,
>>                                               u32 wq_attr_mask,
>>                                               struct ib_udata *udata);
>> +     struct ib_ipoib_accel_ops * (*get_ipoib_accel_ops)(struct ib_device *device);
>
> rebase error? Not sure how this compiles
>
> Jason
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jason Gunthorpe March 16, 2017, 4:04 p.m. UTC | #14
On Thu, Mar 16, 2017 at 05:17:55PM +0200, Erez Shitrit wrote:
 
> The QP as a part of the HW resources, it is created in the low-level
> driver, and used by the upper ipoib for few reasons, (for example the
> mac of the ipoib interface includes from the qp_num)
> Now, if we want to use the ndo's init/uninit i need to store member
> variables (qp_num and context) in the rdma_netdev, that will let me
> use the ndos as is.
> rdma_netdev is the one who belongs to both layers, ipoib and the low-level.

IPOIB rdma_netdev's can be casted to ipoib_rdma_netdev, so it is where
ipoib specific cross-layer stuff should live

Jason
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/include/rdma/ib_ipoib_accel_ops.h b/include/rdma/ib_ipoib_accel_ops.h
new file mode 100644
index 000000000000..148a5529a559
--- /dev/null
+++ b/include/rdma/ib_ipoib_accel_ops.h
@@ -0,0 +1,59 @@ 
+/*
+ * Copyright (c) 2017 Mellanox Technologies Ltd.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#if !defined(IB_IPOIB_ACCEL_OPS_H)
+#define IB_IPOIB_ACCEL_OPS_H
+
+#include <rdma/ib_verbs.h>
+
+/* ipoib rdma netdev's private data structure */
+struct ipoib_rdma_netdev {
+	struct rdma_netdev rn;  /* keep this first */
+	/* followed by device private data */
+	char *dev_priv[0];
+};
+
+static inline void *ipoib_priv(const struct net_device *dev)
+{
+	struct rdma_netdev *rn = netdev_priv(dev);
+
+	return rn->clnt_priv;
+}
+
+static inline void *ipoib_dev_priv(const struct net_device *dev)
+{
+	struct ipoib_rdma_netdev *ipoib_rn = netdev_priv(dev);
+
+	return ipoib_rn->dev_priv;
+}
+
+#endif /* IB_IPOIB_ACCEL_OPS_H */
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 85b9034c8cfc..9b090efccdba 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -1901,6 +1901,41 @@  struct ib_port_immutable {
 	u32                           max_mad_size;
 };
 
+/* rdma netdev type - specifies protocol type */
+enum rdma_netdev_t {
+	RDMA_NETDEV_OPA_VNIC,
+	RDMA_NETDEV_IPOIB
+};
+
+struct ipoib_ah;
+
+/**
+ * struct rdma_netdev - rdma netdev
+ * For cases where netstack interfacing is required.
+ */
+struct rdma_netdev {
+	void *clnt_priv;
+
+	/* control functions */
+	void (*set_id)(struct net_device *netdev, int id);
+	/* IB resource allocation function, returns new UD QP */
+	int (*ib_dev_init)(struct net_device *dev, struct ib_device *hca,
+			   int *qp_num);
+	void (*ib_dev_cleanup)(struct net_device *dev, struct ib_device *hca);
+
+	/* send packet */
+	void (*send)(struct net_device *dev, struct sk_buff *skb,
+		     struct ipoib_ah *address, u32 dqpn, u32 dqkey);
+
+	/* multicast */
+	int (*attach_mcast)(struct net_device *dev, struct ib_device *hca,
+			    union ib_gid *gid, u16 lid, int set_qkey);
+	int (*detach_mcast)(struct net_device *dev, struct ib_device *hca,
+			    union ib_gid *gid, u16 lid);
+	int qp_num;
+	void *context;
+};
+
 struct ib_device {
 	struct device                *dma_device;
 
@@ -2149,6 +2184,7 @@  struct ib_device {
 						struct ib_wq_attr *attr,
 						u32 wq_attr_mask,
 						struct ib_udata *udata);
+	struct ib_ipoib_accel_ops * (*get_ipoib_accel_ops)(struct ib_device *device);
 	struct ib_rwq_ind_table *  (*create_rwq_ind_table)(struct ib_device *device,
 							   struct ib_rwq_ind_table_init_attr *init_attr,
 							   struct ib_udata *udata);