Message ID | 550C2514.5070001@profitbricks.com (mailing list archive) |
---|---|
State | Rejected |
Headers | show |
My apologies to those who are duplicated here. This did not make it to the mailing list due to mail configuration issues. From ira.weiny@intel.com Fri Mar 20 18:55:29 2015 > > Hi, folks > > I've done a draft (very rough draft...) according to my understanding on > Sean's proposal. > > The implementation is to allow device setup the management flags during > ib_query_port() (amso1100 as eg), and later we could use the flags to check > the capability. > > For new capability/proto, like OPA, device could setup new flag > IB_MGMT_PROTO_OPA during query_port() callback, and some helper like > rdma_mgmt_cap_opa() can be used for management branch. > > How do you think about this? This is not saving us anything... See below. > > Regards, > Michael Wang > > > > diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c > index d570030..ad1685e 100644 > --- a/drivers/infiniband/core/cma.c > +++ b/drivers/infiniband/core/cma.c > @@ -375,8 +375,7 @@ static int cma_acquire_dev(struct rdma_id_private > *id_priv, > listen_id_priv->id.port_num) == dev_ll) { > cma_dev = listen_id_priv->cma_dev; > port = listen_id_priv->id.port_num; > - if (rdma_node_get_transport(cma_dev->device->node_type) == > RDMA_TRANSPORT_IB && > - rdma_port_get_link_layer(cma_dev->device, port) == > IB_LINK_LAYER_ETHERNET) > + if (rdma_mgmt_cap_iboe(cma_dev->device, port)) This is still indicating a specific technology "iboe" rather than the specific management capabilities the port has. Also this if statement does not seem to have anything to do with the management support. Here the iboe_gid is a different format and needs to be processed differently from the gid. > ret = ib_find_cached_gid(cma_dev->device, &iboe_gid, > &found_port, NULL); > else > @@ -395,8 +394,7 @@ static int cma_acquire_dev(struct rdma_id_private > *id_priv, > listen_id_priv->id.port_num == port) > continue; > if (rdma_port_get_link_layer(cma_dev->device, port) == > dev_ll) { > - if (rdma_node_get_transport(cma_dev->device->node_type) > == RDMA_TRANSPORT_IB && > - rdma_port_get_link_layer(cma_dev->device, port) == > IB_LINK_LAYER_ETHERNET) > + if (rdma_mgmt_cap_iboe(cma_dev->device, port)) > ret = ib_find_cached_gid(cma_dev->device, > &iboe_gid, &found_port, NULL); > else > ret = ib_find_cached_gid(cma_dev->device, &gid, > &found_port, NULL); > diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c > index 74c30f4..0ae6b04 100644 > --- a/drivers/infiniband/core/mad.c > +++ b/drivers/infiniband/core/mad.c > @@ -2938,7 +2938,7 @@ static int ib_mad_port_open(struct ib_device *device, > init_mad_qp(port_priv, &port_priv->qp_info[1]); > > cq_size = mad_sendq_size + mad_recvq_size; > - has_smi = rdma_port_get_link_layer(device, port_num) == > IB_LINK_LAYER_INFINIBAND; > + has_smi = rdma_mgmt_cap_smi(device, port_num); > if (has_smi) > cq_size *= 2; > > @@ -3057,7 +3057,7 @@ static void ib_mad_init_device(struct ib_device > *device) > { > int start, end, i; > > - if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB) > + if (!rdma_mgmt_cap_ib(device)) > return; > > if (device->node_type == RDMA_NODE_IB_SWITCH) { > diff --git a/drivers/infiniband/core/verbs.c > b/drivers/infiniband/core/verbs.c > index f93eb8d..5ecf9c8 100644 > --- a/drivers/infiniband/core/verbs.c > +++ b/drivers/infiniband/core/verbs.c > @@ -146,6 +146,26 @@ enum rdma_link_layer > rdma_port_get_link_layer(struct ib_device *device, u8 port_ > } > EXPORT_SYMBOL(rdma_port_get_link_layer); > > +int rdma_port_default_mgmt_flags(struct ib_device *device, u8 port_num) > +{ > + int mgmt_flags = 0; > + enum rdma_transport_type tp = > + rdma_node_get_transport(device->node_type); > + enum rdma_link_layer ll = > + rdma_port_get_link_layer(device, port_num); This does not separate the management capabilities from the transport and link layer like Sean was advocating. This is just refactoring the current implementation with the use of additional flags. > + > + if (tp == RDMA_TRANSPORT_IB) { > + mgmt_flags |= IB_MGMT_PROTO_IB; > + if (ll == IB_LINK_LAYER_INFINIBAND) { > + mgmt_flags |= IB_MGMT_PROTO_SMI; > + mgmt_flags |= IB_MGMT_PROTO_IBOE; > + } > + } > + > + return mgmt_flags; > +} > +EXPORT_SYMBOL(rdma_port_default_mgmt_flags); > + > /* Protection domains */ > > struct ib_pd *ib_alloc_pd(struct ib_device *device) > diff --git a/drivers/infiniband/hw/amso1100/c2_provider.c > b/drivers/infiniband/hw/amso1100/c2_provider.c > index bdf3507..04d005e 100644 > --- a/drivers/infiniband/hw/amso1100/c2_provider.c > +++ b/drivers/infiniband/hw/amso1100/c2_provider.c > @@ -96,6 +96,9 @@ static int c2_query_port(struct ib_device *ibdev, > props->active_width = 1; > props->active_speed = IB_SPEED_SDR; > > + /* Makeup flags here, by default or on your own */ > + props->mgmt_flags = rdma_port_default_mgmt_flags(ibdev, port); > + > return 0; > } > > diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h > index 65994a1..d19c7c9 100644 > --- a/include/rdma/ib_verbs.h > +++ b/include/rdma/ib_verbs.h > @@ -90,6 +90,13 @@ enum rdma_link_layer { > IB_LINK_LAYER_ETHERNET, > }; > > +enum rdma_mgmt_flag { > + IB_MGMT_PROTO_IB, > + IB_MGMT_PROTO_SMI, > + IB_MGMT_PROTO_IBOE, IB and IBoE are not management protocols. > + /* More Here*/ > +}; > + > enum ib_device_cap_flags { > IB_DEVICE_RESIZE_MAX_WR = 1, > IB_DEVICE_BAD_PKEY_CNTR = (1<<1), > @@ -352,6 +359,7 @@ struct ib_port_attr { > enum ib_mtu active_mtu; > int gid_tbl_len; > u32 port_cap_flags; > + u32 mgmt_flags; > u32 max_msg_sz; > u32 bad_pkey_cntr; > u32 qkey_viol_cntr; > @@ -1743,6 +1751,32 @@ int ib_query_port(struct ib_device *device, > enum rdma_link_layer rdma_port_get_link_layer(struct ib_device *device, > u8 port_num); > > +int rdma_port_default_mgmt_flags(struct ib_device *device, u8 port_num); This should return u32. I think I would rather see your other patch go in to clean up the code a bit and work this issue separately. Ira > + > +static inline int rdma_mgmt_cap(struct ib_device *device, u8 port_num) > +{ > + struct ib_port_attr port_attr; > + memset(&port_attr, 0, sizeof port_attr); > + ib_query_port(device, port_num, &port_attr); > + return port_attr.mgmt_flags; > +} > + > +static inline int rdma_mgmt_cap_ib(struct ib_device *device) > +{ > + u8 port_num = device->node_type == RDMA_NODE_IB_SWITCH ? 0 : 1; > + return rdma_mgmt_cap(device, port_num) & IB_MGMT_PROTO_IB; > +} > + > +static inline int rdma_mgmt_cap_smi(struct ib_device *device, u8 port_num) > +{ > + return rdma_mgmt_cap(device, port_num) & IB_MGMT_PROTO_SMI; > +} > + > +static inline int rdma_mgmt_cap_iboe(struct ib_device *device, u8 port_num) > +{ > + return rdma_mgmt_cap(device, port_num) & IB_MGMT_PROTO_IBOE; > +} > + > int ib_query_gid(struct ib_device *device, > u8 port_num, int index, union ib_gid *gid); > > > > On 03/18/2015 12:36 AM, Hefty, Sean wrote: > >> But it makes sense to me to use management specific > >> fields/attributes/flags for the *management* pieces, rather than using the > >> link and/or transport layer protocols as a proxy. Management related code > >> should really branch based on that. > > As a proposal, we could add a new field to the kernel port attribute structure. The field would be a bitmask of management capabilities/protocols: > > > > IB_MGMT_PROTO_SM - supports IB SMPs > > IB_MGMT_PROTO_SA - supports IB SA MADs > > IB_MGMT_PROTO_GS - supports IB GSI MADs (e.g. CM, PM, ...) > > IB_MGMT_PROTO_OPA_SM - supports OPA SMPs (or whatever they are called) > > IB_MGMT_PROTO_OPA_GS - supports OPA GS MADs (or whatever is supported) > > > > If the *GS flags are not sufficient to distinguish between MADs supported over IB and RoCE, it can be further divided (i.e. CM, PM, BM, DM, etc.). > > > > This would provide a direct mapping of which management protocols are supported for a given port, rather than it being inferred by the link/transport fields, which should really be independent. It would also allow for simple checks by the core layer. > > > > If we want the code to be more generic, additional field(s) could be added, such as mad_size, so that any size of management datagram is supported. This would be used instead of inferring the size based on the supported protocol. > > > > - Sean > > N�����r��y���b�X��ǧv�^�)Þº{.n�+����{��Ùs�{ay�Ê?ÚT�,j��f���h���z��w������j:+v���w�j�m��������zZ+�����ݢj"��!tml= > > -- > To unsubscribe from this list: send the line "unsubscribe linux-rdma" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html > -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Hello, Ira Thanks for the reply :-) On Sat, Mar 21, 2015 at 1:05 AM, ira.weiny <ira.weiny@intel.com> wrote: > My apologies to those who are duplicated here. This did not make it to the > mailing list due to mail configuration issues. > > From ira.weiny@intel.com Fri Mar 20 18:55:29 2015 > [snip] >> - if (rdma_node_get_transport(cma_dev->device->node_type) == >> RDMA_TRANSPORT_IB && >> - rdma_port_get_link_layer(cma_dev->device, port) == >> IB_LINK_LAYER_ETHERNET) >> + if (rdma_mgmt_cap_iboe(cma_dev->device, port)) > > This is still indicating a specific technology "iboe" rather than the specific > management capabilities the port has. > > Also this if statement does not seem to have anything to do with the management > support. Here the iboe_gid is a different format and needs to be processed > differently from the gid. Agree, the mgmt cap here is more close to concept 'feature' rather than 'protocol'. The purpose is to help branch the management, like when vendor declare his device support ibeo format, mad can get the hint by checking the port attribute then branch the management path, which currently is inferred from transport type and link layer type, not told by the vendor. > >> ret = ib_find_cached_gid(cma_dev->device, &iboe_gid, >> &found_port, NULL); >> else [snip] >> + enum rdma_transport_type tp = >> + rdma_node_get_transport(device->node_type); >> + enum rdma_link_layer ll = >> + rdma_port_get_link_layer(device, port_num); > > This does not separate the management capabilities from the transport and link > layer like Sean was advocating. This is just refactoring the current > implementation with the use of additional flags. Yes, it's currently copy the logical we are using to branch the management, however, if the vendor could provide his own setup code here, we don't really need to infer anymore but following the indicator from vendor, this 'default' method is for transition. Sean, could you please give more hint or details on what you'd rather like to have? I think I got some misunderstanding on your suggestion... > >> + [snip] >> >> +enum rdma_mgmt_flag { >> + IB_MGMT_PROTO_IB, >> + IB_MGMT_PROTO_SMI, >> + IB_MGMT_PROTO_IBOE, > > IB and IBoE are not management protocols. May be it should be 'feature' rather than 'proto'? We have management branch for iboe although it's not really a protocol (or does it belong to any protocol specially?), also we have too much places to check if device got infiniband feature, especially at the beginning of initialization, I really want to have some way to simplify these check, or make them more like a formal mechanism. > [snip] >> >> +int rdma_port_default_mgmt_flags(struct ib_device *device, u8 port_num); > > This should return u32. Yeah.. that's why I call it a draft ;-) Actually the flags are even not defined in bit but integer :-P just try to introduce the idea see if it's close to Sean's proposal. > > I think I would rather see your other patch go in to clean up the code a bit > and work this issue separately. Agree, it seems like not a simple work, me too would rather to cleanup the code of inferring firstly, put them together into some public helper, waiting for adapt to some new mechanism. Sean, what's your opinion? Regards, Michael Wang > > Ira > >> + >> +static inline int rdma_mgmt_cap(struct ib_device *device, u8 port_num) >> +{ >> + struct ib_port_attr port_attr; >> + memset(&port_attr, 0, sizeof port_attr); >> + ib_query_port(device, port_num, &port_attr); >> + return port_attr.mgmt_flags; >> +} >> + >> +static inline int rdma_mgmt_cap_ib(struct ib_device *device) >> +{ >> + u8 port_num = device->node_type == RDMA_NODE_IB_SWITCH ? 0 : 1; >> + return rdma_mgmt_cap(device, port_num) & IB_MGMT_PROTO_IB; >> +} >> + >> +static inline int rdma_mgmt_cap_smi(struct ib_device *device, u8 port_num) >> +{ >> + return rdma_mgmt_cap(device, port_num) & IB_MGMT_PROTO_SMI; >> +} >> + >> +static inline int rdma_mgmt_cap_iboe(struct ib_device *device, u8 port_num) >> +{ >> + return rdma_mgmt_cap(device, port_num) & IB_MGMT_PROTO_IBOE; >> +} >> + >> int ib_query_gid(struct ib_device *device, >> u8 port_num, int index, union ib_gid *gid); >> >> >> >> On 03/18/2015 12:36 AM, Hefty, Sean wrote: >> >> But it makes sense to me to use management specific >> >> fields/attributes/flags for the *management* pieces, rather than using the >> >> link and/or transport layer protocols as a proxy. Management related code >> >> should really branch based on that. >> > As a proposal, we could add a new field to the kernel port attribute structure. The field would be a bitmask of management capabilities/protocols: >> > >> > IB_MGMT_PROTO_SM - supports IB SMPs >> > IB_MGMT_PROTO_SA - supports IB SA MADs >> > IB_MGMT_PROTO_GS - supports IB GSI MADs (e.g. CM, PM, ...) >> > IB_MGMT_PROTO_OPA_SM - supports OPA SMPs (or whatever they are called) >> > IB_MGMT_PROTO_OPA_GS - supports OPA GS MADs (or whatever is supported) >> > >> > If the *GS flags are not sufficient to distinguish between MADs supported over IB and RoCE, it can be further divided (i.e. CM, PM, BM, DM, etc.). >> > >> > This would provide a direct mapping of which management protocols are supported for a given port, rather than it being inferred by the link/transport fields, which should really be independent. It would also allow for simple checks by the core layer. >> > >> > If we want the code to be more generic, additional field(s) could be added, such as mad_size, so that any size of management datagram is supported. This would be used instead of inferring the size based on the supported protocol. >> > >> > - Sean >> > N�����r��y���b�X��ǧv�^�)Þº{.n�+����{��Ùs�{ay� Ê?ÚT�,j ��f���h���z� �w��� ���j:+v���w�j�m���� ����zZ+�����ݢj"��!tml= >> >> -- >> To unsubscribe from this list: send the line "unsubscribe linux-rdma" in >> the body of a message to majordomo@vger.kernel.org >> More majordomo info at http://vger.kernel.org/majordomo-info.html >> > > > > -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
PiBZZXMsIGl0J3MgY3VycmVudGx5IGNvcHkgdGhlIGxvZ2ljYWwgd2UgYXJlIHVzaW5nIHRvIGJy YW5jaCB0aGUNCj4gbWFuYWdlbWVudCwgaG93ZXZlciwgaWYgdGhlIHZlbmRvciBjb3VsZCBwcm92 aWRlIGhpcyBvd24gc2V0dXAgY29kZQ0KPiBoZXJlLCB3ZSBkb24ndCByZWFsbHkgbmVlZCB0byBp bmZlciBhbnltb3JlIGJ1dCBmb2xsb3dpbmcgdGhlDQo+IGluZGljYXRvciBmcm9tIHZlbmRvciwg dGhpcyAnZGVmYXVsdCcgbWV0aG9kIGlzIGZvciB0cmFuc2l0aW9uLg0KPiANCj4gU2VhbiwgY291 bGQgeW91IHBsZWFzZSBnaXZlIG1vcmUgaGludCBvciBkZXRhaWxzIG9uIHdoYXQgeW91J2QgcmF0 aGVyDQo+IGxpa2UgdG8gaGF2ZT8gSSB0aGluayBJIGdvdCBzb21lIG1pc3VuZGVyc3RhbmRpbmcg b24geW91cg0KPiBzdWdnZXN0aW9uLi4uDQoNClRvIHJlc3RhdGUgbXkgc3VnZ2VzdGluZywgSSB3 YXMgdGhpbmtpbmcgb2YgZGVmaW5pbmcgc29tZXRoaW5nIGxpa2UgdGhpczoNCg0KZW51bSB7DQoJ SUJfTUdNVF9QUk9UT19TTSA9ICgxIDw8IDApLCAgIC8qIHN1cHBvcnRzIElCIFNNUHMgKi8NCglJ Ql9NR01UX1BST1RPX1NBID0gKDEgPDwgMSksICAgLyogc3VwcG9ydHMgSUIgU0EgTUFEcyAqLw0K CUlCX01HTVRfUFJPVE9fR1MgPSAoMSA8PCAyKSwgICAvKiBzdXBwb3J0cyBJQiBHU0kgTUFEcyAo ZS5nLiBQTSwgLi4uKSAqLw0KCUlCX01HTVRfUFJPVE9fQ00gPSAoMSA8PCAzKSwgICAvKiBJQiBD TSBjYWxsZWQgb3V0IHNlcGFyYXRlbHkgKi8NCglJQl9NR01UX1BST1RPX0lXX0NNID0gKDEgPDwg NCksLyogaVdhcnAgQ00gKi8NCgkvKiBPUEEgY2FuIGRlZmluZSBuZXcgdmFsdWVzIGhlcmUgKi8N Cn07DQoNCnN0cnVjdCBpYl9wb3J0X2F0dHIgew0KCS4uLg0KCXUzMgltZ210X3Byb3RvOyAgLyog Yml0bWFzayBvZiBzdXBwb3J0ZWQgcHJvdG9jb2xzICovDQp9Ow0KDQpJIGFtIG5vdCBmYW1pbGlh ciBlbm91Z2ggd2l0aCBSb0NFIChJQm9FKSB0byBrbm93IG9mZiB0aGUgdG9wIG9mIG15IGhlYWQg aWYgdGhpcyBicmVha2Rvd24gd29ya3MgYXMgSSBkZWZpbmVkIGl0LCBvciBpZiBJQl9NR01UX1BS T1RPX0dTIG5lZWRzIHRvIGJlIHNlcGFyYXRlZCBpbnRvIG1vcmUgbWdtdCBjbGFzc2VzLiAgKEhh bCBvciBJcmEgbWlnaHQuKSAgSSBzZXBhcmF0ZWQgb3V0IHRoZSBDTSBjbGFzcywgYXMgdGhlIHJk bWEgY20gaGFzIGNoZWNrcyB3aGVyZSBpdCB3YW50cyB0byBkaXN0aW5ndWlzaCBiZXR3ZWVuIHdo aWNoIENNIHByb3RvY29sIHRvIGV4ZWN1dGUgKElCIG9yIGlXYXJwKS4NCg0KVGhpcyBjaGFuZ2Ug d291bGQgYmUgbGltaXRlZCB0byBtYW5hZ2VtZW50IGNoZWNrcyBvbmx5LiAgVGhlcmUgbWF5IHN0 aWxsIGJlIHBsYWNlcyBpbiB0aGUgY29kZSB3aGVyZSB0aGUgbGluayBhbmQgdHJhbnNwb3J0IGNo ZWNrcyB3b3VsZCBjb250aW51ZSB0byBleGlzdC4gIEFnYWluLCB0aGlzIGlzIGp1c3QgYSBzdWdn ZXN0aW9uLiAgV2l0aG91dCBhY3R1YWxseSBpbXBsZW1lbnRpbmcgdGhlIHBhdGNoLCBJIGRvbid0 IGtub3cgaWYgdGhpcyB3b3VsZCBzaW1wbGlmeSB0aGluZ3MuICBUaGUgY2hlY2tzIGluIHRoZSBy ZG1hIGNtLCBpbiBwYXJ0aWN1bGFyLCBhcmUgbWVzc3kuDQoNCi0gU2Vhbg0K -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 03/23/2015 05:31 PM, Hefty, Sean wrote: >> [snip] > To restate my suggesting, I was thinking of defining something like this: > > enum { > IB_MGMT_PROTO_SM = (1 << 0), /* supports IB SMPs */ > IB_MGMT_PROTO_SA = (1 << 1), /* supports IB SA MADs */ > IB_MGMT_PROTO_GS = (1 << 2), /* supports IB GSI MADs (e.g. PM, ...) */ > IB_MGMT_PROTO_CM = (1 << 3), /* IB CM called out separately */ > IB_MGMT_PROTO_IW_CM = (1 << 4),/* iWarp CM */ > /* OPA can define new values here */ > }; > > struct ib_port_attr { > ... > u32 mgmt_proto; /* bitmask of supported protocols */ > }; Thanks for the restate, Sean :) seems like your proposal is also to ask vendor setup 'mgmt_proto' during ib_query_port(), correct? > > I am not familiar enough with RoCE (IBoE) to know off the top of my head if this breakdown works as I defined it, or if IB_MGMT_PROTO_GS needs to be separated into more mgmt classes. (Hal or Ira might.) I separated out the CM class, as the rdma cm has checks where it wants to distinguish between which CM protocol to execute (IB or iWarp). Maybe we can apply this thought to CM part firstly? > > This change would be limited to management checks only. There may still be places in the code where the link and transport checks would continue to exist. Again, this is just a suggestion. Without actually implementing the patch, I don't know if this would simplify things. The checks in the rdma cm, in particular, are messy. I think it's time to make a formal patch now and discuss the problem in a separate thread, I'll still use the mechanism in draft and apply these flags, let's see if it satisfied peoples ;-) Regards, Michael Wang > - Sean -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Hi, Sean On 03/24/2015 01:49 PM, Michael Wang wrote: > On 03/23/2015 05:31 PM, Hefty, Sean wrote: >>> [snip] >> To restate my suggesting, I was thinking of defining something like >> this: >> >> enum { >> IB_MGMT_PROTO_SM = (1 << 0), /* supports IB SMPs */ >> IB_MGMT_PROTO_SA = (1 << 1), /* supports IB SA MADs */ >> IB_MGMT_PROTO_GS = (1 << 2), /* supports IB GSI MADs (e.g. PM, >> ...) */ >> IB_MGMT_PROTO_CM = (1 << 3), /* IB CM called out separately */ >> IB_MGMT_PROTO_IW_CM = (1 << 4),/* iWarp CM */ >> /* OPA can define new values here */ >> }; >> >> struct ib_port_attr { >> ... >> u32 mgmt_proto; /* bitmask of supported protocols */ >> }; > > Thanks for the restate, Sean :) seems like your proposal is also to > ask vendor > setup 'mgmt_proto' during ib_query_port(), correct? I think we got one problem here, if we rely on ib_query_port() to setup mgmt flags each time, the performance may suffered, since some implementation of query_port() is really expensive, like hardware communication (mlx4/5) and mutex protection (usnic)... And also I found that the current implementation doesn't match the idea very well, for example CM, I haven't found any scene to check whether a specified port support CM or not (correct me please), mostly only check the device rather than it's port, SM is checking the port, however since we already verified transport layer at beginning, just check link layer sounds not that bad... Thus I think using flags may not very helpful to the current implementation, may be use some helper to refine the code is more applicable? I'll send out the patch later with just helpers, we can discuss in that thread see if there is any better solutions ;-) Regards, Michael Wang > >> >> I am not familiar enough with RoCE (IBoE) to know off the top of my >> head if this breakdown works as I defined it, or if IB_MGMT_PROTO_GS >> needs to be separated into more mgmt classes. (Hal or Ira might.) I >> separated out the CM class, as the rdma cm has checks where it wants >> to distinguish between which CM protocol to execute (IB or iWarp). > > Maybe we can apply this thought to CM part firstly? >> >> This change would be limited to management checks only. There may >> still be places in the code where the link and transport checks would >> continue to exist. Again, this is just a suggestion. Without >> actually implementing the patch, I don't know if this would simplify >> things. The checks in the rdma cm, in particular, are messy. > I think it's time to make a formal patch now and discuss the problem in > a separate thread, I'll still use the mechanism in draft and apply these > flags, let's see if it satisfied peoples ;-) > > Regards, > Michael Wang > >> - Sean > -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Wed, Mar 25, 2015 at 11:30:32AM +0100, Michael Wang wrote: > Hi, Sean > > On 03/24/2015 01:49 PM, Michael Wang wrote: > >On 03/23/2015 05:31 PM, Hefty, Sean wrote: > >>>[snip] > >>To restate my suggesting, I was thinking of defining something like > >>this: > >> > >>enum { > >> IB_MGMT_PROTO_SM = (1 << 0), /* supports IB SMPs */ > >> IB_MGMT_PROTO_SA = (1 << 1), /* supports IB SA MADs */ > >> IB_MGMT_PROTO_GS = (1 << 2), /* supports IB GSI MADs (e.g. PM, > >>...) */ > >> IB_MGMT_PROTO_CM = (1 << 3), /* IB CM called out separately */ > >> IB_MGMT_PROTO_IW_CM = (1 << 4),/* iWarp CM */ > >> /* OPA can define new values here */ > >>}; > >> > >>struct ib_port_attr { > >> ... > >> u32 mgmt_proto; /* bitmask of supported protocols */ > >>}; > > > >Thanks for the restate, Sean :) seems like your proposal is also to > >ask vendor > >setup 'mgmt_proto' during ib_query_port(), correct? > > I think we got one problem here, if we rely on ib_query_port() to setup > mgmt flags > each time, the performance may suffered, since some implementation of > query_port() is really expensive, like hardware communication (mlx4/5) and > mutex protection (usnic)... This is why my OPA patch series included a cache of the device attributes. https://www.mail-archive.com/linux-rdma%40vger.kernel.org/msg22827.html This was suggested by Or and incorporated rather than having the MAD stack issue ib_query_device. The same could be done for port attributes. That said I would like to see the next version of your patch series. We have had a lot of churn on that thread and I think you have a lot of things you were going to incorporate. :-D Thanks! Ira -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index d570030..ad1685e 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -375,8 +375,7 @@ static int cma_acquire_dev(struct rdma_id_private *id_priv, listen_id_priv->id.port_num) == dev_ll) { cma_dev = listen_id_priv->cma_dev; port = listen_id_priv->id.port_num; - if (rdma_node_get_transport(cma_dev->device->node_type) == RDMA_TRANSPORT_IB && - rdma_port_get_link_layer(cma_dev->device, port) == IB_LINK_LAYER_ETHERNET) + if (rdma_mgmt_cap_iboe(cma_dev->device, port)) ret = ib_find_cached_gid(cma_dev->device, &iboe_gid, &found_port, NULL); else @@ -395,8 +394,7 @@ static int cma_acquire_dev(struct rdma_id_private *id_priv, listen_id_priv->id.port_num == port) continue; if (rdma_port_get_link_layer(cma_dev->device, port) == dev_ll) { - if (rdma_node_get_transport(cma_dev->device->node_type) == RDMA_TRANSPORT_IB && - rdma_port_get_link_layer(cma_dev->device, port) == IB_LINK_LAYER_ETHERNET) + if (rdma_mgmt_cap_iboe(cma_dev->device, port)) ret = ib_find_cached_gid(cma_dev->device, &iboe_gid, &found_port, NULL); else ret = ib_find_cached_gid(cma_dev->device, &gid, &found_port, NULL); diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c index 74c30f4..0ae6b04 100644 --- a/drivers/infiniband/core/mad.c +++ b/drivers/infiniband/core/mad.c @@ -2938,7 +2938,7 @@ static int ib_mad_port_open(struct ib_device *device, init_mad_qp(port_priv, &port_priv->qp_info[1]); cq_size = mad_sendq_size + mad_recvq_size; - has_smi = rdma_port_get_link_layer(device, port_num) == IB_LINK_LAYER_INFINIBAND; + has_smi = rdma_mgmt_cap_smi(device, port_num); if (has_smi) cq_size *= 2; @@ -3057,7 +3057,7 @@ static void ib_mad_init_device(struct ib_device *device) { int start, end, i; - if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB) + if (!rdma_mgmt_cap_ib(device)) return; if (device->node_type == RDMA_NODE_IB_SWITCH) { diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index f93eb8d..5ecf9c8 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -146,6 +146,26 @@ enum rdma_link_layer rdma_port_get_link_layer(struct ib_device *device, u8 port_ } EXPORT_SYMBOL(rdma_port_get_link_layer); +int rdma_port_default_mgmt_flags(struct ib_device *device, u8 port_num) +{ + int mgmt_flags = 0; + enum rdma_transport_type tp = + rdma_node_get_transport(device->node_type); + enum rdma_link_layer ll = + rdma_port_get_link_layer(device, port_num); + + if (tp == RDMA_TRANSPORT_IB) { + mgmt_flags |= IB_MGMT_PROTO_IB; + if (ll == IB_LINK_LAYER_INFINIBAND) { + mgmt_flags |= IB_MGMT_PROTO_SMI; + mgmt_flags |= IB_MGMT_PROTO_IBOE; + } + } + + return mgmt_flags; +} +EXPORT_SYMBOL(rdma_port_default_mgmt_flags); + /* Protection domains */ struct ib_pd *ib_alloc_pd(struct ib_device *device) diff --git a/drivers/infiniband/hw/amso1100/c2_provider.c b/drivers/infiniband/hw/amso1100/c2_provider.c index bdf3507..04d005e 100644 --- a/drivers/infiniband/hw/amso1100/c2_provider.c +++ b/drivers/infiniband/hw/amso1100/c2_provider.c @@ -96,6 +96,9 @@ static int c2_query_port(struct ib_device *ibdev, props->active_width = 1; props->active_speed = IB_SPEED_SDR; + /* Makeup flags here, by default or on your own */ + props->mgmt_flags = rdma_port_default_mgmt_flags(ibdev, port); + return 0; } diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 65994a1..d19c7c9 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -90,6 +90,13 @@ enum rdma_link_layer { IB_LINK_LAYER_ETHERNET, }; +enum rdma_mgmt_flag { + IB_MGMT_PROTO_IB, + IB_MGMT_PROTO_SMI, + IB_MGMT_PROTO_IBOE, + /* More Here*/ +}; + enum ib_device_cap_flags { IB_DEVICE_RESIZE_MAX_WR = 1, IB_DEVICE_BAD_PKEY_CNTR = (1<<1), @@ -352,6 +359,7 @@ struct ib_port_attr { enum ib_mtu active_mtu; int gid_tbl_len; u32 port_cap_flags; + u32 mgmt_flags; u32 max_msg_sz; u32 bad_pkey_cntr; u32 qkey_viol_cntr; @@ -1743,6 +1751,32 @@ int ib_query_port(struct ib_device *device, enum rdma_link_layer rdma_port_get_link_layer(struct ib_device *device, u8 port_num); +int rdma_port_default_mgmt_flags(struct ib_device *device, u8 port_num); + +static inline int rdma_mgmt_cap(struct ib_device *device, u8 port_num) +{ + struct ib_port_attr port_attr; + memset(&port_attr, 0, sizeof port_attr); + ib_query_port(device, port_num, &port_attr); + return port_attr.mgmt_flags; +} + +static inline int rdma_mgmt_cap_ib(struct ib_device *device) +{ + u8 port_num = device->node_type == RDMA_NODE_IB_SWITCH ? 0 : 1; + return rdma_mgmt_cap(device, port_num) & IB_MGMT_PROTO_IB; +} + +static inline int rdma_mgmt_cap_smi(struct ib_device *device, u8 port_num) +{ + return rdma_mgmt_cap(device, port_num) & IB_MGMT_PROTO_SMI; +} + +static inline int rdma_mgmt_cap_iboe(struct ib_device *device, u8 port_num) +{ + return rdma_mgmt_cap(device, port_num) & IB_MGMT_PROTO_IBOE; +} + int ib_query_gid(struct ib_device *device, u8 port_num, int index, union ib_gid *gid);