diff mbox

[v4,14/19] IB/core: Add IB_DEVICE_OPA_MAD_SUPPORT device cap flag

Message ID 550C2514.5070001@profitbricks.com (mailing list archive)
State Rejected
Headers show

Commit Message

Michael Wang March 20, 2015, 1:48 p.m. UTC
Hi, folks

I've done a draft (very rough draft...) according to my understanding on
Sean's proposal.

The implementation is to allow device setup the management flags during
ib_query_port() (amso1100 as eg), and later we could use the flags to check
the capability.

For new capability/proto, like OPA, device could setup new flag
IB_MGMT_PROTO_OPA during query_port() callback, and some helper like
rdma_mgmt_cap_opa() can be used for management branch.

How do you think about this?

Regards,
Michael Wang





On 03/18/2015 12:36 AM, Hefty, Sean wrote:
>> But it makes sense to me to use management specific
>> fields/attributes/flags for the *management* pieces, rather than using the
>> link and/or transport layer protocols as a proxy.  Management related code
>> should really branch based on that.
> As a proposal, we could add a new field to the kernel port attribute structure.  The field would be a bitmask of management capabilities/protocols:
>
> IB_MGMT_PROTO_SM - supports IB SMPs
> IB_MGMT_PROTO_SA - supports IB SA MADs
> IB_MGMT_PROTO_GS - supports IB GSI MADs (e.g. CM, PM, ...)
> IB_MGMT_PROTO_OPA_SM - supports OPA SMPs (or whatever they are called)
> IB_MGMT_PROTO_OPA_GS - supports OPA GS MADs (or whatever is supported)
>
> If the *GS flags are not sufficient to distinguish between MADs supported over IB and RoCE, it can be further divided (i.e. CM, PM, BM, DM, etc.).
>
> This would provide a direct mapping of which management protocols are supported for a given port, rather than it being inferred by the link/transport fields, which should really be independent.  It would also allow for simple checks by the core layer.
>
> If we want the code to be more generic, additional field(s) could be added, such as mad_size, so that any size of management datagram is supported.  This would be used instead of inferring the size based on the supported protocol.
>
> - Sean
> N�����r��y���b�X��ǧv�^�)Þº{.n�+����{��Ù?�{ay�Ê?Ú?�,j��f���h���z��w������j:+v���w�j�m��������zZ+�����ݢj"��!tml=

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Comments

Ira Weiny March 21, 2015, 12:05 a.m. UTC | #1
My apologies to those who are duplicated here.  This did not make it to the
mailing list due to mail configuration issues.

From ira.weiny@intel.com Fri Mar 20 18:55:29 2015

> 
> Hi, folks
> 
> I've done a draft (very rough draft...) according to my understanding on
> Sean's proposal.
> 
> The implementation is to allow device setup the management flags during
> ib_query_port() (amso1100 as eg), and later we could use the flags to check
> the capability.
> 
> For new capability/proto, like OPA, device could setup new flag
> IB_MGMT_PROTO_OPA during query_port() callback, and some helper like
> rdma_mgmt_cap_opa() can be used for management branch.
> 
> How do you think about this?

This is not saving us anything...  See below.

> 
> Regards,
> Michael Wang
> 
> 
> 
> diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
> index d570030..ad1685e 100644
> --- a/drivers/infiniband/core/cma.c
> +++ b/drivers/infiniband/core/cma.c
> @@ -375,8 +375,7 @@ static int cma_acquire_dev(struct rdma_id_private 
> *id_priv,
>                        listen_id_priv->id.port_num) == dev_ll) {
>           cma_dev = listen_id_priv->cma_dev;
>           port = listen_id_priv->id.port_num;
> -        if (rdma_node_get_transport(cma_dev->device->node_type) == 
> RDMA_TRANSPORT_IB &&
> -            rdma_port_get_link_layer(cma_dev->device, port) == 
> IB_LINK_LAYER_ETHERNET)
> +        if (rdma_mgmt_cap_iboe(cma_dev->device, port))

This is still indicating a specific technology "iboe" rather than the specific
management capabilities the port has.

Also this if statement does not seem to have anything to do with the management
support.  Here the iboe_gid is a different format and needs to be processed
differently from the gid.

>               ret = ib_find_cached_gid(cma_dev->device, &iboe_gid,
>                            &found_port, NULL);
>           else
> @@ -395,8 +394,7 @@ static int cma_acquire_dev(struct rdma_id_private 
> *id_priv,
>                   listen_id_priv->id.port_num == port)
>                   continue;
>               if (rdma_port_get_link_layer(cma_dev->device, port) == 
> dev_ll) {
> -                if (rdma_node_get_transport(cma_dev->device->node_type) 
> == RDMA_TRANSPORT_IB &&
> -                    rdma_port_get_link_layer(cma_dev->device, port) == 
> IB_LINK_LAYER_ETHERNET)
> +                if (rdma_mgmt_cap_iboe(cma_dev->device, port))
>                       ret = ib_find_cached_gid(cma_dev->device, 
> &iboe_gid, &found_port, NULL);
>                   else
>                       ret = ib_find_cached_gid(cma_dev->device, &gid, 
> &found_port, NULL);
> diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c
> index 74c30f4..0ae6b04 100644
> --- a/drivers/infiniband/core/mad.c
> +++ b/drivers/infiniband/core/mad.c
> @@ -2938,7 +2938,7 @@ static int ib_mad_port_open(struct ib_device *device,
>       init_mad_qp(port_priv, &port_priv->qp_info[1]);
> 
>       cq_size = mad_sendq_size + mad_recvq_size;
> -    has_smi = rdma_port_get_link_layer(device, port_num) == 
> IB_LINK_LAYER_INFINIBAND;
> +    has_smi = rdma_mgmt_cap_smi(device, port_num);
>       if (has_smi)
>           cq_size *= 2;
> 
> @@ -3057,7 +3057,7 @@ static void ib_mad_init_device(struct ib_device 
> *device)
>   {
>       int start, end, i;
> 
> -    if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB)
> +    if (!rdma_mgmt_cap_ib(device))
>           return;
> 
>       if (device->node_type == RDMA_NODE_IB_SWITCH) {
> diff --git a/drivers/infiniband/core/verbs.c 
> b/drivers/infiniband/core/verbs.c
> index f93eb8d..5ecf9c8 100644
> --- a/drivers/infiniband/core/verbs.c
> +++ b/drivers/infiniband/core/verbs.c
> @@ -146,6 +146,26 @@ enum rdma_link_layer 
> rdma_port_get_link_layer(struct ib_device *device, u8 port_
>   }
>   EXPORT_SYMBOL(rdma_port_get_link_layer);
> 
> +int rdma_port_default_mgmt_flags(struct ib_device *device, u8 port_num)
> +{
> +    int mgmt_flags = 0;
> +    enum rdma_transport_type tp =
> +            rdma_node_get_transport(device->node_type);
> +    enum rdma_link_layer ll =
> +            rdma_port_get_link_layer(device, port_num);

This does not separate the management capabilities from the transport and link
layer like Sean was advocating.  This is just refactoring the current
implementation with the use of additional flags.

> +
> +    if (tp == RDMA_TRANSPORT_IB) {
> +        mgmt_flags |= IB_MGMT_PROTO_IB;
> +        if (ll == IB_LINK_LAYER_INFINIBAND) {
> +            mgmt_flags |= IB_MGMT_PROTO_SMI;
> +            mgmt_flags |= IB_MGMT_PROTO_IBOE;
> +        }
> +    }
> +
> +    return mgmt_flags;
> +}
> +EXPORT_SYMBOL(rdma_port_default_mgmt_flags);
> +
>   /* Protection domains */
> 
>   struct ib_pd *ib_alloc_pd(struct ib_device *device)
> diff --git a/drivers/infiniband/hw/amso1100/c2_provider.c 
> b/drivers/infiniband/hw/amso1100/c2_provider.c
> index bdf3507..04d005e 100644
> --- a/drivers/infiniband/hw/amso1100/c2_provider.c
> +++ b/drivers/infiniband/hw/amso1100/c2_provider.c
> @@ -96,6 +96,9 @@ static int c2_query_port(struct ib_device *ibdev,
>       props->active_width = 1;
>       props->active_speed = IB_SPEED_SDR;
> 
> +    /* Makeup flags here, by default or on your own */
> +    props->mgmt_flags = rdma_port_default_mgmt_flags(ibdev, port);
> +
>       return 0;
>   }
> 
> diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
> index 65994a1..d19c7c9 100644
> --- a/include/rdma/ib_verbs.h
> +++ b/include/rdma/ib_verbs.h
> @@ -90,6 +90,13 @@ enum rdma_link_layer {
>       IB_LINK_LAYER_ETHERNET,
>   };
> 
> +enum rdma_mgmt_flag {
> +    IB_MGMT_PROTO_IB,
> +    IB_MGMT_PROTO_SMI,
> +    IB_MGMT_PROTO_IBOE,

IB and IBoE are not management protocols.

> +    /* More Here*/
> +};
> +
>   enum ib_device_cap_flags {
>       IB_DEVICE_RESIZE_MAX_WR        = 1,
>       IB_DEVICE_BAD_PKEY_CNTR        = (1<<1),
> @@ -352,6 +359,7 @@ struct ib_port_attr {
>       enum ib_mtu        active_mtu;
>       int            gid_tbl_len;
>       u32            port_cap_flags;
> +    u32            mgmt_flags;
>       u32            max_msg_sz;
>       u32            bad_pkey_cntr;
>       u32            qkey_viol_cntr;
> @@ -1743,6 +1751,32 @@ int ib_query_port(struct ib_device *device,
>   enum rdma_link_layer rdma_port_get_link_layer(struct ib_device *device,
>                              u8 port_num);
> 
> +int rdma_port_default_mgmt_flags(struct ib_device *device, u8 port_num);

This should return u32.

I think I would rather see your other patch go in to clean up the code a bit
and work this issue separately.

Ira

> +
> +static inline int rdma_mgmt_cap(struct ib_device *device, u8 port_num)
> +{
> +    struct ib_port_attr port_attr;
> +    memset(&port_attr, 0, sizeof port_attr);
> +    ib_query_port(device, port_num, &port_attr);
> +    return port_attr.mgmt_flags;
> +}
> +
> +static inline int rdma_mgmt_cap_ib(struct ib_device *device)
> +{
> +    u8 port_num = device->node_type == RDMA_NODE_IB_SWITCH ? 0 : 1;
> +    return rdma_mgmt_cap(device, port_num) & IB_MGMT_PROTO_IB;
> +}
> +
> +static inline int rdma_mgmt_cap_smi(struct ib_device *device, u8 port_num)
> +{
> +    return rdma_mgmt_cap(device, port_num) & IB_MGMT_PROTO_SMI;
> +}
> +
> +static inline int rdma_mgmt_cap_iboe(struct ib_device *device, u8 port_num)
> +{
> +    return rdma_mgmt_cap(device, port_num) & IB_MGMT_PROTO_IBOE;
> +}
> +
>   int ib_query_gid(struct ib_device *device,
>            u8 port_num, int index, union ib_gid *gid);
> 
> 
> 
> On 03/18/2015 12:36 AM, Hefty, Sean wrote:
> >> But it makes sense to me to use management specific
> >> fields/attributes/flags for the *management* pieces, rather than using the
> >> link and/or transport layer protocols as a proxy.  Management related code
> >> should really branch based on that.
> > As a proposal, we could add a new field to the kernel port attribute structure.  The field would be a bitmask of management capabilities/protocols:
> >
> > IB_MGMT_PROTO_SM - supports IB SMPs
> > IB_MGMT_PROTO_SA - supports IB SA MADs
> > IB_MGMT_PROTO_GS - supports IB GSI MADs (e.g. CM, PM, ...)
> > IB_MGMT_PROTO_OPA_SM - supports OPA SMPs (or whatever they are called)
> > IB_MGMT_PROTO_OPA_GS - supports OPA GS MADs (or whatever is supported)
> >
> > If the *GS flags are not sufficient to distinguish between MADs supported over IB and RoCE, it can be further divided (i.e. CM, PM, BM, DM, etc.).
> >
> > This would provide a direct mapping of which management protocols are supported for a given port, rather than it being inferred by the link/transport fields, which should really be independent.  It would also allow for simple checks by the core layer.
> >
> > If we want the code to be more generic, additional field(s) could be added, such as mad_size, so that any size of management datagram is supported.  This would be used instead of inferring the size based on the supported protocol.
> >
> > - Sean
> > N�����r��y���b�X��ǧv�^�)Þº{.n�+����{��Ùs�{ay�Ê?ÚT�,j��f���h���z��w������j:+v���w�j�m��������zZ+�����ݢj"��!tml=
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 




--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Michael Wang March 21, 2015, 7:49 a.m. UTC | #2
Hello, Ira

Thanks for the reply :-)

On Sat, Mar 21, 2015 at 1:05 AM, ira.weiny <ira.weiny@intel.com> wrote:
> My apologies to those who are duplicated here.  This did not make it to the
> mailing list due to mail configuration issues.
>
> From ira.weiny@intel.com Fri Mar 20 18:55:29 2015
>
[snip]
>> -        if (rdma_node_get_transport(cma_dev->device->node_type) ==
>> RDMA_TRANSPORT_IB &&
>> -            rdma_port_get_link_layer(cma_dev->device, port) ==
>> IB_LINK_LAYER_ETHERNET)
>> +        if (rdma_mgmt_cap_iboe(cma_dev->device, port))
>
> This is still indicating a specific technology "iboe" rather than the specific
> management capabilities the port has.
>
> Also this if statement does not seem to have anything to do with the management
> support.  Here the iboe_gid is a different format and needs to be processed
> differently from the gid.

Agree, the mgmt cap here is more close to concept 'feature' rather
than 'protocol'.

The purpose is to help branch the management, like when vendor declare
his device support ibeo format, mad can get the hint by checking the
port attribute then branch the management path, which currently is
inferred from transport type and link layer type, not told by the
vendor.

>
>>               ret = ib_find_cached_gid(cma_dev->device, &iboe_gid,
>>                            &found_port, NULL);
>>           else
[snip]
>> +    enum rdma_transport_type tp =
>> +            rdma_node_get_transport(device->node_type);
>> +    enum rdma_link_layer ll =
>> +            rdma_port_get_link_layer(device, port_num);
>
> This does not separate the management capabilities from the transport and link
> layer like Sean was advocating.  This is just refactoring the current
> implementation with the use of additional flags.

Yes, it's currently copy the logical we are using to branch the
management, however, if the vendor could provide his own setup code
here, we don't really need to infer anymore but following the
indicator from vendor, this 'default' method is for transition.

Sean, could you please give more hint or details on what you'd rather
like to have? I think I got some misunderstanding on your
suggestion...

>
>> +
[snip]
>>
>> +enum rdma_mgmt_flag {
>> +    IB_MGMT_PROTO_IB,
>> +    IB_MGMT_PROTO_SMI,
>> +    IB_MGMT_PROTO_IBOE,
>
> IB and IBoE are not management protocols.

May be it should be 'feature' rather than 'proto'?

We have management branch for iboe although it's not really a protocol
(or does it belong to any protocol specially?), also we have too much
places to check if device got infiniband feature, especially at the
beginning of initialization, I really want to have some way to
simplify these check, or make them more like a formal mechanism.

>
[snip]
>>
>> +int rdma_port_default_mgmt_flags(struct ib_device *device, u8 port_num);
>
> This should return u32.

Yeah.. that's why I call it a draft ;-)

Actually the flags are even not defined in bit but integer :-P just
try to introduce the idea see if it's close to Sean's proposal.

>
> I think I would rather see your other patch go in to clean up the code a bit
> and work this issue separately.

Agree, it seems like not a simple work, me too would rather to cleanup
the code of inferring firstly, put them together into some public
helper, waiting for adapt to some new mechanism.

Sean, what's your opinion?

Regards,
Michael Wang

>
> Ira
>
>> +
>> +static inline int rdma_mgmt_cap(struct ib_device *device, u8 port_num)
>> +{
>> +    struct ib_port_attr port_attr;
>> +    memset(&port_attr, 0, sizeof port_attr);
>> +    ib_query_port(device, port_num, &port_attr);
>> +    return port_attr.mgmt_flags;
>> +}
>> +
>> +static inline int rdma_mgmt_cap_ib(struct ib_device *device)
>> +{
>> +    u8 port_num = device->node_type == RDMA_NODE_IB_SWITCH ? 0 : 1;
>> +    return rdma_mgmt_cap(device, port_num) & IB_MGMT_PROTO_IB;
>> +}
>> +
>> +static inline int rdma_mgmt_cap_smi(struct ib_device *device, u8 port_num)
>> +{
>> +    return rdma_mgmt_cap(device, port_num) & IB_MGMT_PROTO_SMI;
>> +}
>> +
>> +static inline int rdma_mgmt_cap_iboe(struct ib_device *device, u8 port_num)
>> +{
>> +    return rdma_mgmt_cap(device, port_num) & IB_MGMT_PROTO_IBOE;
>> +}
>> +
>>   int ib_query_gid(struct ib_device *device,
>>            u8 port_num, int index, union ib_gid *gid);
>>
>>
>>
>> On 03/18/2015 12:36 AM, Hefty, Sean wrote:
>> >> But it makes sense to me to use management specific
>> >> fields/attributes/flags for the *management* pieces, rather than using the
>> >> link and/or transport layer protocols as a proxy.  Management related code
>> >> should really branch based on that.
>> > As a proposal, we could add a new field to the kernel port attribute structure.  The field would be a bitmask of management capabilities/protocols:
>> >
>> > IB_MGMT_PROTO_SM - supports IB SMPs
>> > IB_MGMT_PROTO_SA - supports IB SA MADs
>> > IB_MGMT_PROTO_GS - supports IB GSI MADs (e.g. CM, PM, ...)
>> > IB_MGMT_PROTO_OPA_SM - supports OPA SMPs (or whatever they are called)
>> > IB_MGMT_PROTO_OPA_GS - supports OPA GS MADs (or whatever is supported)
>> >
>> > If the *GS flags are not sufficient to distinguish between MADs supported over IB and RoCE, it can be further divided (i.e. CM, PM, BM, DM, etc.).
>> >
>> > This would provide a direct mapping of which management protocols are supported for a given port, rather than it being inferred by the link/transport fields, which should really be independent.  It would also allow for simple checks by the core layer.
>> >
>> > If we want the code to be more generic, additional field(s) could be added, such as mad_size, so that any size of management datagram is supported.  This would be used instead of inferring the size based on the supported protocol.
>> >
>> > - Sean
>> > N�����r��y���b�X��ǧv�^�)Þº{.n�+����{��Ùs�{ay� Ê?ÚT�,j ��f���h���z� �w��� ���j:+v���w�j�m���� ����zZ+�����ݢj"��!tml=
>>
>> --
>> To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
>> the body of a message to majordomo@vger.kernel.org
>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>>
>
>
>
>
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Hefty, Sean March 23, 2015, 4:31 p.m. UTC | #3
PiBZZXMsIGl0J3MgY3VycmVudGx5IGNvcHkgdGhlIGxvZ2ljYWwgd2UgYXJlIHVzaW5nIHRvIGJy
YW5jaCB0aGUNCj4gbWFuYWdlbWVudCwgaG93ZXZlciwgaWYgdGhlIHZlbmRvciBjb3VsZCBwcm92
aWRlIGhpcyBvd24gc2V0dXAgY29kZQ0KPiBoZXJlLCB3ZSBkb24ndCByZWFsbHkgbmVlZCB0byBp
bmZlciBhbnltb3JlIGJ1dCBmb2xsb3dpbmcgdGhlDQo+IGluZGljYXRvciBmcm9tIHZlbmRvciwg
dGhpcyAnZGVmYXVsdCcgbWV0aG9kIGlzIGZvciB0cmFuc2l0aW9uLg0KPiANCj4gU2VhbiwgY291
bGQgeW91IHBsZWFzZSBnaXZlIG1vcmUgaGludCBvciBkZXRhaWxzIG9uIHdoYXQgeW91J2QgcmF0
aGVyDQo+IGxpa2UgdG8gaGF2ZT8gSSB0aGluayBJIGdvdCBzb21lIG1pc3VuZGVyc3RhbmRpbmcg
b24geW91cg0KPiBzdWdnZXN0aW9uLi4uDQoNClRvIHJlc3RhdGUgbXkgc3VnZ2VzdGluZywgSSB3
YXMgdGhpbmtpbmcgb2YgZGVmaW5pbmcgc29tZXRoaW5nIGxpa2UgdGhpczoNCg0KZW51bSB7DQoJ
SUJfTUdNVF9QUk9UT19TTSA9ICgxIDw8IDApLCAgIC8qIHN1cHBvcnRzIElCIFNNUHMgKi8NCglJ
Ql9NR01UX1BST1RPX1NBID0gKDEgPDwgMSksICAgLyogc3VwcG9ydHMgSUIgU0EgTUFEcyAqLw0K
CUlCX01HTVRfUFJPVE9fR1MgPSAoMSA8PCAyKSwgICAvKiBzdXBwb3J0cyBJQiBHU0kgTUFEcyAo
ZS5nLiBQTSwgLi4uKSAqLw0KCUlCX01HTVRfUFJPVE9fQ00gPSAoMSA8PCAzKSwgICAvKiBJQiBD
TSBjYWxsZWQgb3V0IHNlcGFyYXRlbHkgKi8NCglJQl9NR01UX1BST1RPX0lXX0NNID0gKDEgPDwg
NCksLyogaVdhcnAgQ00gKi8NCgkvKiBPUEEgY2FuIGRlZmluZSBuZXcgdmFsdWVzIGhlcmUgKi8N
Cn07DQoNCnN0cnVjdCBpYl9wb3J0X2F0dHIgew0KCS4uLg0KCXUzMgltZ210X3Byb3RvOyAgLyog
Yml0bWFzayBvZiBzdXBwb3J0ZWQgcHJvdG9jb2xzICovDQp9Ow0KDQpJIGFtIG5vdCBmYW1pbGlh
ciBlbm91Z2ggd2l0aCBSb0NFIChJQm9FKSB0byBrbm93IG9mZiB0aGUgdG9wIG9mIG15IGhlYWQg
aWYgdGhpcyBicmVha2Rvd24gd29ya3MgYXMgSSBkZWZpbmVkIGl0LCBvciBpZiBJQl9NR01UX1BS
T1RPX0dTIG5lZWRzIHRvIGJlIHNlcGFyYXRlZCBpbnRvIG1vcmUgbWdtdCBjbGFzc2VzLiAgKEhh
bCBvciBJcmEgbWlnaHQuKSAgSSBzZXBhcmF0ZWQgb3V0IHRoZSBDTSBjbGFzcywgYXMgdGhlIHJk
bWEgY20gaGFzIGNoZWNrcyB3aGVyZSBpdCB3YW50cyB0byBkaXN0aW5ndWlzaCBiZXR3ZWVuIHdo
aWNoIENNIHByb3RvY29sIHRvIGV4ZWN1dGUgKElCIG9yIGlXYXJwKS4NCg0KVGhpcyBjaGFuZ2Ug
d291bGQgYmUgbGltaXRlZCB0byBtYW5hZ2VtZW50IGNoZWNrcyBvbmx5LiAgVGhlcmUgbWF5IHN0
aWxsIGJlIHBsYWNlcyBpbiB0aGUgY29kZSB3aGVyZSB0aGUgbGluayBhbmQgdHJhbnNwb3J0IGNo
ZWNrcyB3b3VsZCBjb250aW51ZSB0byBleGlzdC4gIEFnYWluLCB0aGlzIGlzIGp1c3QgYSBzdWdn
ZXN0aW9uLiAgV2l0aG91dCBhY3R1YWxseSBpbXBsZW1lbnRpbmcgdGhlIHBhdGNoLCBJIGRvbid0
IGtub3cgaWYgdGhpcyB3b3VsZCBzaW1wbGlmeSB0aGluZ3MuICBUaGUgY2hlY2tzIGluIHRoZSBy
ZG1hIGNtLCBpbiBwYXJ0aWN1bGFyLCBhcmUgbWVzc3kuDQoNCi0gU2Vhbg0K
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Michael Wang March 24, 2015, 12:49 p.m. UTC | #4
On 03/23/2015 05:31 PM, Hefty, Sean wrote:
>> [snip]
> To restate my suggesting, I was thinking of defining something like this:
>
> enum {
> 	IB_MGMT_PROTO_SM = (1 << 0),   /* supports IB SMPs */
> 	IB_MGMT_PROTO_SA = (1 << 1),   /* supports IB SA MADs */
> 	IB_MGMT_PROTO_GS = (1 << 2),   /* supports IB GSI MADs (e.g. PM, ...) */
> 	IB_MGMT_PROTO_CM = (1 << 3),   /* IB CM called out separately */
> 	IB_MGMT_PROTO_IW_CM = (1 << 4),/* iWarp CM */
> 	/* OPA can define new values here */
> };
>
> struct ib_port_attr {
> 	...
> 	u32	mgmt_proto;  /* bitmask of supported protocols */
> };

Thanks for the restate, Sean :) seems like your proposal is also to ask 
vendor
setup 'mgmt_proto' during ib_query_port(), correct?

>
> I am not familiar enough with RoCE (IBoE) to know off the top of my head if this breakdown works as I defined it, or if IB_MGMT_PROTO_GS needs to be separated into more mgmt classes.  (Hal or Ira might.)  I separated out the CM class, as the rdma cm has checks where it wants to distinguish between which CM protocol to execute (IB or iWarp).

Maybe we can apply this thought to CM part firstly?
>
> This change would be limited to management checks only.  There may still be places in the code where the link and transport checks would continue to exist.  Again, this is just a suggestion.  Without actually implementing the patch, I don't know if this would simplify things.  The checks in the rdma cm, in particular, are messy.
I think it's time to make a formal patch now and discuss the problem in
a separate thread, I'll still use the mechanism in draft and apply these
flags, let's see if it satisfied peoples ;-)

Regards,
Michael Wang

> - Sean

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Michael Wang March 25, 2015, 10:30 a.m. UTC | #5
Hi, Sean

On 03/24/2015 01:49 PM, Michael Wang wrote:
> On 03/23/2015 05:31 PM, Hefty, Sean wrote:
>>> [snip]
>> To restate my suggesting, I was thinking of defining something like 
>> this:
>>
>> enum {
>>     IB_MGMT_PROTO_SM = (1 << 0),   /* supports IB SMPs */
>>     IB_MGMT_PROTO_SA = (1 << 1),   /* supports IB SA MADs */
>>     IB_MGMT_PROTO_GS = (1 << 2),   /* supports IB GSI MADs (e.g. PM, 
>> ...) */
>>     IB_MGMT_PROTO_CM = (1 << 3),   /* IB CM called out separately */
>>     IB_MGMT_PROTO_IW_CM = (1 << 4),/* iWarp CM */
>>     /* OPA can define new values here */
>> };
>>
>> struct ib_port_attr {
>>     ...
>>     u32    mgmt_proto;  /* bitmask of supported protocols */
>> };
>
> Thanks for the restate, Sean :) seems like your proposal is also to 
> ask vendor
> setup 'mgmt_proto' during ib_query_port(), correct?

I think we got one problem here, if we rely on ib_query_port() to setup 
mgmt flags
each time, the performance may suffered, since some implementation of
query_port() is really expensive, like hardware communication (mlx4/5) and
mutex protection (usnic)...

And also I found that the current implementation doesn't match the idea 
very well,
for example CM, I haven't found any scene to check whether a specified 
port support
CM or not (correct me please), mostly only check the device rather than 
it's port,
SM is checking the port, however since we already verified transport 
layer at beginning,
just check link layer sounds not that bad...

Thus I think using flags may not very helpful to the current 
implementation, may
be use some helper to refine the code is more applicable?

I'll send out the patch later with just helpers, we can discuss in that 
thread see if
there is any better solutions ;-)

Regards,
Michael Wang

>
>>
>> I am not familiar enough with RoCE (IBoE) to know off the top of my 
>> head if this breakdown works as I defined it, or if IB_MGMT_PROTO_GS 
>> needs to be separated into more mgmt classes. (Hal or Ira might.)  I 
>> separated out the CM class, as the rdma cm has checks where it wants 
>> to distinguish between which CM protocol to execute (IB or iWarp).
>
> Maybe we can apply this thought to CM part firstly?
>>
>> This change would be limited to management checks only.  There may 
>> still be places in the code where the link and transport checks would 
>> continue to exist.  Again, this is just a suggestion.  Without 
>> actually implementing the patch, I don't know if this would simplify 
>> things.  The checks in the rdma cm, in particular, are messy.
> I think it's time to make a formal patch now and discuss the problem in
> a separate thread, I'll still use the mechanism in draft and apply these
> flags, let's see if it satisfied peoples ;-)
>
> Regards,
> Michael Wang
>
>> - Sean
>

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Ira Weiny April 2, 2015, 10:45 p.m. UTC | #6
On Wed, Mar 25, 2015 at 11:30:32AM +0100, Michael Wang wrote:
> Hi, Sean
> 
> On 03/24/2015 01:49 PM, Michael Wang wrote:
> >On 03/23/2015 05:31 PM, Hefty, Sean wrote:
> >>>[snip]
> >>To restate my suggesting, I was thinking of defining something like 
> >>this:
> >>
> >>enum {
> >>    IB_MGMT_PROTO_SM = (1 << 0),   /* supports IB SMPs */
> >>    IB_MGMT_PROTO_SA = (1 << 1),   /* supports IB SA MADs */
> >>    IB_MGMT_PROTO_GS = (1 << 2),   /* supports IB GSI MADs (e.g. PM, 
> >>...) */
> >>    IB_MGMT_PROTO_CM = (1 << 3),   /* IB CM called out separately */
> >>    IB_MGMT_PROTO_IW_CM = (1 << 4),/* iWarp CM */
> >>    /* OPA can define new values here */
> >>};
> >>
> >>struct ib_port_attr {
> >>    ...
> >>    u32    mgmt_proto;  /* bitmask of supported protocols */
> >>};
> >
> >Thanks for the restate, Sean :) seems like your proposal is also to 
> >ask vendor
> >setup 'mgmt_proto' during ib_query_port(), correct?
> 
> I think we got one problem here, if we rely on ib_query_port() to setup 
> mgmt flags
> each time, the performance may suffered, since some implementation of
> query_port() is really expensive, like hardware communication (mlx4/5) and
> mutex protection (usnic)...

This is why my OPA patch series included a cache of the device attributes.

https://www.mail-archive.com/linux-rdma%40vger.kernel.org/msg22827.html

This was suggested by Or and incorporated rather than having the MAD stack
issue ib_query_device.  The same could be done for port attributes.

That said I would like to see the next version of your patch series.  We have
had a lot of churn on that thread and I think you have a lot of things you were
going to incorporate.  :-D

Thanks!
Ira

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index d570030..ad1685e 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -375,8 +375,7 @@  static int cma_acquire_dev(struct rdma_id_private 
*id_priv,
                       listen_id_priv->id.port_num) == dev_ll) {
          cma_dev = listen_id_priv->cma_dev;
          port = listen_id_priv->id.port_num;
-        if (rdma_node_get_transport(cma_dev->device->node_type) == 
RDMA_TRANSPORT_IB &&
-            rdma_port_get_link_layer(cma_dev->device, port) == 
IB_LINK_LAYER_ETHERNET)
+        if (rdma_mgmt_cap_iboe(cma_dev->device, port))
              ret = ib_find_cached_gid(cma_dev->device, &iboe_gid,
                           &found_port, NULL);
          else
@@ -395,8 +394,7 @@  static int cma_acquire_dev(struct rdma_id_private 
*id_priv,
                  listen_id_priv->id.port_num == port)
                  continue;
              if (rdma_port_get_link_layer(cma_dev->device, port) == 
dev_ll) {
-                if (rdma_node_get_transport(cma_dev->device->node_type) 
== RDMA_TRANSPORT_IB &&
-                    rdma_port_get_link_layer(cma_dev->device, port) == 
IB_LINK_LAYER_ETHERNET)
+                if (rdma_mgmt_cap_iboe(cma_dev->device, port))
                      ret = ib_find_cached_gid(cma_dev->device, 
&iboe_gid, &found_port, NULL);
                  else
                      ret = ib_find_cached_gid(cma_dev->device, &gid, 
&found_port, NULL);
diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c
index 74c30f4..0ae6b04 100644
--- a/drivers/infiniband/core/mad.c
+++ b/drivers/infiniband/core/mad.c
@@ -2938,7 +2938,7 @@  static int ib_mad_port_open(struct ib_device *device,
      init_mad_qp(port_priv, &port_priv->qp_info[1]);

      cq_size = mad_sendq_size + mad_recvq_size;
-    has_smi = rdma_port_get_link_layer(device, port_num) == 
IB_LINK_LAYER_INFINIBAND;
+    has_smi = rdma_mgmt_cap_smi(device, port_num);
      if (has_smi)
          cq_size *= 2;

@@ -3057,7 +3057,7 @@  static void ib_mad_init_device(struct ib_device 
*device)
  {
      int start, end, i;

-    if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB)
+    if (!rdma_mgmt_cap_ib(device))
          return;

      if (device->node_type == RDMA_NODE_IB_SWITCH) {
diff --git a/drivers/infiniband/core/verbs.c 
b/drivers/infiniband/core/verbs.c
index f93eb8d..5ecf9c8 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -146,6 +146,26 @@  enum rdma_link_layer 
rdma_port_get_link_layer(struct ib_device *device, u8 port_
  }
  EXPORT_SYMBOL(rdma_port_get_link_layer);

+int rdma_port_default_mgmt_flags(struct ib_device *device, u8 port_num)
+{
+    int mgmt_flags = 0;
+    enum rdma_transport_type tp =
+            rdma_node_get_transport(device->node_type);
+    enum rdma_link_layer ll =
+            rdma_port_get_link_layer(device, port_num);
+
+    if (tp == RDMA_TRANSPORT_IB) {
+        mgmt_flags |= IB_MGMT_PROTO_IB;
+        if (ll == IB_LINK_LAYER_INFINIBAND) {
+            mgmt_flags |= IB_MGMT_PROTO_SMI;
+            mgmt_flags |= IB_MGMT_PROTO_IBOE;
+        }
+    }
+
+    return mgmt_flags;
+}
+EXPORT_SYMBOL(rdma_port_default_mgmt_flags);
+
  /* Protection domains */

  struct ib_pd *ib_alloc_pd(struct ib_device *device)
diff --git a/drivers/infiniband/hw/amso1100/c2_provider.c 
b/drivers/infiniband/hw/amso1100/c2_provider.c
index bdf3507..04d005e 100644
--- a/drivers/infiniband/hw/amso1100/c2_provider.c
+++ b/drivers/infiniband/hw/amso1100/c2_provider.c
@@ -96,6 +96,9 @@  static int c2_query_port(struct ib_device *ibdev,
      props->active_width = 1;
      props->active_speed = IB_SPEED_SDR;

+    /* Makeup flags here, by default or on your own */
+    props->mgmt_flags = rdma_port_default_mgmt_flags(ibdev, port);
+
      return 0;
  }

diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 65994a1..d19c7c9 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -90,6 +90,13 @@  enum rdma_link_layer {
      IB_LINK_LAYER_ETHERNET,
  };

+enum rdma_mgmt_flag {
+    IB_MGMT_PROTO_IB,
+    IB_MGMT_PROTO_SMI,
+    IB_MGMT_PROTO_IBOE,
+    /* More Here*/
+};
+
  enum ib_device_cap_flags {
      IB_DEVICE_RESIZE_MAX_WR        = 1,
      IB_DEVICE_BAD_PKEY_CNTR        = (1<<1),
@@ -352,6 +359,7 @@  struct ib_port_attr {
      enum ib_mtu        active_mtu;
      int            gid_tbl_len;
      u32            port_cap_flags;
+    u32            mgmt_flags;
      u32            max_msg_sz;
      u32            bad_pkey_cntr;
      u32            qkey_viol_cntr;
@@ -1743,6 +1751,32 @@  int ib_query_port(struct ib_device *device,
  enum rdma_link_layer rdma_port_get_link_layer(struct ib_device *device,
                             u8 port_num);

+int rdma_port_default_mgmt_flags(struct ib_device *device, u8 port_num);
+
+static inline int rdma_mgmt_cap(struct ib_device *device, u8 port_num)
+{
+    struct ib_port_attr port_attr;
+    memset(&port_attr, 0, sizeof port_attr);
+    ib_query_port(device, port_num, &port_attr);
+    return port_attr.mgmt_flags;
+}
+
+static inline int rdma_mgmt_cap_ib(struct ib_device *device)
+{
+    u8 port_num = device->node_type == RDMA_NODE_IB_SWITCH ? 0 : 1;
+    return rdma_mgmt_cap(device, port_num) & IB_MGMT_PROTO_IB;
+}
+
+static inline int rdma_mgmt_cap_smi(struct ib_device *device, u8 port_num)
+{
+    return rdma_mgmt_cap(device, port_num) & IB_MGMT_PROTO_SMI;
+}
+
+static inline int rdma_mgmt_cap_iboe(struct ib_device *device, u8 port_num)
+{
+    return rdma_mgmt_cap(device, port_num) & IB_MGMT_PROTO_IBOE;
+}
+
  int ib_query_gid(struct ib_device *device,
           u8 port_num, int index, union ib_gid *gid);