Message ID | 20180523153724.2404.63310.stgit@scvm10.sc.intel.com (mailing list archive) |
---|---|
State | Changes Requested |
Headers | show |
> -----Original Message----- > From: linux-rdma-owner@vger.kernel.org [mailto:linux-rdma- > owner@vger.kernel.org] On Behalf Of Dennis Dalessandro > Sent: Wednesday, May 23, 2018 10:37 AM > To: jgg@ziepe.ca; dledford@redhat.com > Cc: linux-rdma@vger.kernel.org; Mike Marciniszyn > <mike.marciniszyn@intel.com>; Ira Weiny <ira.weiny@intel.com>; Sadanand > Warrier <sadanand.warrier@intel.com> > Subject: [PATCH for-next] IB/ipoib: Increase IPOIB Datagram mode MTU's upper > limit > > From: Sadanand Warrier <sadanand.warrier@intel.com> > > Currently the IPOIB ud mtu is restricted to 4Kbytes. Remove this limitation so > that the IPOIB module can potentially use an MTU that is bounded by the MTU > of the underlying device. > > Reviewed-by: Mike Marciniszyn <mike.marciniszyn@intel.com> > Reviewed-by: Ira Weiny <ira.weiny@intel.com> > Reviewed-by: Dennis Dalessandro <dennis.dalessandro@intel.com> > Signed-off-by: Sadanand Warrier <sadanand.warrier@intel.com> > Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com> > --- > drivers/infiniband/hw/hfi1/hfi.h | 6 +++ > drivers/infiniband/hw/hfi1/qp.c | 21 ++--------- > drivers/infiniband/hw/hfi1/verbs.c | 9 +++-- > drivers/infiniband/hw/qib/qib_qp.c | 2 + > drivers/infiniband/hw/qib/qib_verbs.c | 2 + > drivers/infiniband/sw/rdmavt/qp.c | 2 + > drivers/infiniband/ulp/ipoib/ipoib_main.c | 2 + > drivers/infiniband/ulp/ipoib/ipoib_multicast.c | 8 +++- > include/rdma/ib_verbs.h | 48 ++++++++++++++++++++++++ > include/rdma/opa_port_info.h | 8 ---- > include/rdma/rdma_vt.h | 2 + > 11 files changed, 73 insertions(+), 37 deletions(-) > > diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h > index dd84238..de1b02a 100644 > --- a/drivers/infiniband/hw/hfi1/hfi.h > +++ b/drivers/infiniband/hw/hfi1/hfi.h > @@ -1721,6 +1721,12 @@ static inline int valid_ib_mtu(unsigned int mtu) > mtu == 4096; > } > > +static inline int valid_opa_mtu(unsigned int mtu) { > + return valid_ib_mtu(mtu) || mtu == 8192 || > + mtu == 10240; > +} > + > static inline int valid_opa_max_mtu(unsigned int mtu) { > return mtu >= 2048 && > diff --git a/drivers/infiniband/hw/hfi1/qp.c b/drivers/infiniband/hw/hfi1/qp.c > index 1697d96..977bacd 100644 > --- a/drivers/infiniband/hw/hfi1/qp.c > +++ b/drivers/infiniband/hw/hfi1/qp.c > @@ -168,15 +168,6 @@ static void flush_iowait(struct rvt_qp *qp) > write_sequnlock_irqrestore(lock, flags); } > > -static inline int opa_mtu_enum_to_int(int mtu) -{ > - switch (mtu) { > - case OPA_MTU_8192: return 8192; > - case OPA_MTU_10240: return 10240; > - default: return -1; > - } > -} > - > /** > * This function is what we would push to the core layer if we wanted to be a > * "first class citizen". Instead we hide this here and rely on Verbs ULPs @@ - > 184,15 +175,9 @@ static inline int opa_mtu_enum_to_int(int mtu) > */ > static inline int verbs_mtu_enum_to_int(struct ib_device *dev, enum ib_mtu > mtu) { > - int val; > - > - /* Constraining 10KB packets to 8KB packets */ > if (mtu == (enum ib_mtu)OPA_MTU_10240) > mtu = OPA_MTU_8192; > - val = opa_mtu_enum_to_int((int)mtu); > - if (val > 0) > - return val; > - return ib_mtu_enum_to_int(mtu); > + return opa_mtu_enum_to_int((enum opa_mtu)mtu); > } > > int hfi1_check_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr, @@ - > 308,13 +293,13 @@ int hfi1_check_send_wqe(struct rvt_qp *qp, > break; > case IB_QPT_SMI: > ah = ibah_to_rvtah(wqe->ud_wr.ah); > - if (wqe->length > (1 << ah->log_pmtu)) > + if (wqe->length > ah->pmtu) > return -EINVAL; > break; > case IB_QPT_GSI: > case IB_QPT_UD: > ah = ibah_to_rvtah(wqe->ud_wr.ah); > - if (wqe->length > (1 << ah->log_pmtu)) > + if (wqe->length > ah->pmtu) > return -EINVAL; > if (ibp->sl_to_sc[rdma_ah_get_sl(&ah->attr)] == 0xf) > return -EINVAL; > diff --git a/drivers/infiniband/hw/hfi1/verbs.c > b/drivers/infiniband/hw/hfi1/verbs.c > index fc2e44c..ab67f8d 100644 > --- a/drivers/infiniband/hw/hfi1/verbs.c > +++ b/drivers/infiniband/hw/hfi1/verbs.c > @@ -1481,9 +1481,10 @@ static int query_port(struct rvt_dev_info *rdi, u8 > port_num, > * from the Path Records to us will get the new 8k MTU. Those that > * attempt to process the MTU enum may fail in various ways. > */ > - props->max_mtu = mtu_to_enum((!valid_ib_mtu(hfi1_max_mtu) ? > - 4096 : hfi1_max_mtu), IB_MTU_4096); > - props->active_mtu = !valid_ib_mtu(ppd->ibmtu) ? props->max_mtu : > + props->max_mtu = mtu_to_enum((!valid_opa_mtu(hfi1_max_mtu) ? > + 4096 : hfi1_max_mtu), > + IB_MTU_4096); > + props->active_mtu = !valid_opa_mtu(ppd->ibmtu) ? props->max_mtu : > mtu_to_enum(ppd->ibmtu, IB_MTU_4096); > > /* > @@ -1617,7 +1618,7 @@ static void hfi1_notify_new_ah(struct ib_device > *ibdev, > dd = dd_from_ppd(ppd); > ah->vl = sc_to_vlt(dd, sc5); > if (ah->vl < num_vls || ah->vl == 15) > - ah->log_pmtu = ilog2(dd->vld[ah->vl].mtu); > + ah->pmtu = dd->vld[ah->vl].mtu; > } > > /** > diff --git a/drivers/infiniband/hw/qib/qib_qp.c > b/drivers/infiniband/hw/qib/qib_qp.c > index 344e401..59f0093 100644 > --- a/drivers/infiniband/hw/qib/qib_qp.c > +++ b/drivers/infiniband/hw/qib/qib_qp.c > @@ -402,7 +402,7 @@ int qib_check_send_wqe(struct rvt_qp *qp, > case IB_QPT_GSI: > case IB_QPT_UD: > ah = ibah_to_rvtah(wqe->ud_wr.ah); > - if (wqe->length > (1 << ah->log_pmtu)) > + if (wqe->length > ah->pmtu) > return -EINVAL; > /* progress hint */ > ret = 1; > diff --git a/drivers/infiniband/hw/qib/qib_verbs.c > b/drivers/infiniband/hw/qib/qib_verbs.c > index 14b4057..c5d0ae4 100644 > --- a/drivers/infiniband/hw/qib/qib_verbs.c > +++ b/drivers/infiniband/hw/qib/qib_verbs.c > @@ -1367,7 +1367,7 @@ static void qib_notify_new_ah(struct ib_device > *ibdev, > ibp = to_iport(ibdev, rdma_ah_get_port_num(ah_attr)); > ppd = ppd_from_ibp(ibp); > ah->vl = ibp->sl_to_vl[rdma_ah_get_sl(&ah->attr)]; > - ah->log_pmtu = ilog2(ppd->ibmtu); > + ah->pmtu = ppd->ibmtu; > } > > struct ib_ah *qib_create_qp0_ah(struct qib_ibport *ibp, u16 dlid) diff --git > a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c > index 6e9a351..00ddf97 100644 > --- a/drivers/infiniband/sw/rdmavt/qp.c > +++ b/drivers/infiniband/sw/rdmavt/qp.c > @@ -1835,7 +1835,7 @@ static int rvt_post_one_wr(struct rvt_qp *qp, > qp->ibqp.qp_type != IB_QPT_RC) { > struct rvt_ah *ah = ibah_to_rvtah(wqe->ud_wr.ah); > > - log_pmtu = ah->log_pmtu; > + log_pmtu = ilog2(ah->pmtu); I don’t know if UD is widely used in IPoIB or not. But won't you have any performance impact by doing ilog2 on every work request? Just curious. > atomic_inc(&ibah_to_rvtah(ud_wr(wr)->ah)->refcount); > } > > diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c > b/drivers/infiniband/ulp/ipoib/ipoib_main.c > index cf291f9..49af25f 100644 > --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c > +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c > @@ -2257,7 +2257,7 @@ void ipoib_set_dev_features(struct ipoib_dev_priv > *priv, struct ib_device *hca) > goto device_init_failed; > } > > - priv->max_ib_mtu = ib_mtu_enum_to_int(attr.max_mtu); > + priv->max_ib_mtu = rdma_mtu_enum_to_int(hca, port, attr.max_mtu); > > /* MTU will be reset when mcast join happens */ > priv->dev->mtu = IPOIB_UD_MTU(priv->max_ib_mtu); diff --git > a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c > b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c > index 6709328..025cb07 100644 > --- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c > +++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c > @@ -243,10 +243,14 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast > *mcast, > if (priv->mcast_mtu == priv->admin_mtu) > priv->admin_mtu = > priv->mcast_mtu = > - IPOIB_UD_MTU(ib_mtu_enum_to_int(priv->broadcast- > >mcmember.mtu)); > + IPOIB_UD_MTU(rdma_mtu_enum_to_int(priv->ca, > + priv->port, > + priv->broadcast- > >mcmember.mtu)); > else > priv->mcast_mtu = > - IPOIB_UD_MTU(ib_mtu_enum_to_int(priv->broadcast- > >mcmember.mtu)); > + IPOIB_UD_MTU(rdma_mtu_enum_to_int(priv->ca, > + priv->port, > + priv->broadcast- > >mcmember.mtu)); > > priv->qkey = be32_to_cpu(priv->broadcast->mcmember.qkey); > spin_unlock_irq(&priv->lock); > diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index > e849bd0..b1fd9b1 100644 > --- a/include/rdma/ib_verbs.h > +++ b/include/rdma/ib_verbs.h > @@ -395,6 +395,11 @@ enum ib_mtu { > IB_MTU_4096 = 5 > }; > > +enum opa_mtu { > + OPA_MTU_8192 = 6, > + OPA_MTU_10240 = 7 > +}; > + > static inline int ib_mtu_enum_to_int(enum ib_mtu mtu) { > switch (mtu) { > @@ -421,6 +426,24 @@ static inline enum ib_mtu ib_mtu_int_to_enum(int > mtu) > return IB_MTU_256; > } > > +static inline int opa_mtu_enum_to_int(enum opa_mtu mtu) { > + switch (mtu) { > + case OPA_MTU_8192: return 8192; > + case OPA_MTU_10240: return 10240; > + default: return(ib_mtu_enum_to_int((enum ib_mtu) mtu)); > + } > +} > + > +static inline enum opa_mtu opa_mtu_int_to_enum(int mtu) { > + if (mtu >= 10240) > + return OPA_MTU_10240; > + else if (mtu >= 8192) > + return OPA_MTU_8192; > + else return ((enum opa_mtu) ib_mtu_int_to_enum(mtu)); } > + > enum ib_port_state { > IB_PORT_NOP = 0, > IB_PORT_DOWN = 1, > @@ -2997,6 +3020,31 @@ static inline bool rdma_cap_read_inv(struct > ib_device *dev, u32 port_num) > return rdma_protocol_iwarp(dev, port_num); } > > +/** > + * rdma_core_cap_opa_port - Return whether the RDMA Port is OPA or not. > + * > + * @device: Device > + * @port_num: 1 based Port number > + * > + * Return true if the port is an Intel OPA port false if not. > + */ > + > +static inline bool rdma_core_cap_opa_port(struct ib_device *device, > + u32 port_num) > +{ > + return (device->port_immutable[port_num].core_cap_flags & > RDMA_CORE_PORT_INTEL_OPA) > + == RDMA_CORE_PORT_INTEL_OPA; > +} > + > +static inline int rdma_mtu_enum_to_int(struct ib_device *device, u8 port, > + int mtu) > +{ > + if (rdma_core_cap_opa_port(device, port)) > + return opa_mtu_enum_to_int((enum opa_mtu) mtu); > + else > + return ib_mtu_enum_to_int((enum ib_mtu) mtu); } > + Above rdma function needs kdoc comment block, mainly to describe what is mtu as its typecasted internally.
On 5/23/2018 12:48 PM, Parav Pandit wrote: >> - log_pmtu = ah->log_pmtu; >> + log_pmtu = ilog2(ah->pmtu); > I don’t know if UD is widely used in IPoIB or not. > But won't you have any performance impact by doing ilog2 on every work request? > Just curious. It might just find more use with a larger MTU that allows better performance. As to the ilog2 impact, it shouldn't be bad because while this is in the data path, it's not in the "hot" data path. >> + >> +static inline int rdma_mtu_enum_to_int(struct ib_device *device, u8 port, >> + int mtu) >> +{ >> + if (rdma_core_cap_opa_port(device, port)) >> + return opa_mtu_enum_to_int((enum opa_mtu) mtu); >> + else >> + return ib_mtu_enum_to_int((enum ib_mtu) mtu); } >> + > > Above rdma function needs kdoc comment block, mainly to describe what is mtu as its typecasted internally. Sounds reasonable. We'll touch that up and send a v2. -Denny -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Wed, May 23, 2018 at 02:08:19PM -0400, Dennis Dalessandro wrote: > On 5/23/2018 12:48 PM, Parav Pandit wrote: > >>- log_pmtu = ah->log_pmtu; > >>+ log_pmtu = ilog2(ah->pmtu); > >I don’t know if UD is widely used in IPoIB or not. > >But won't you have any performance impact by doing ilog2 on every work request? > >Just curious. > > It might just find more use with a larger MTU that allows better > performance. At least on mlx5 ipoib and netdev ip o ethernet performance are basically the same. The way to get good performance is to implement the IP offloads. As I've said before, if OPA cares about good ipoib performance then the hfi driver needs to implement the rdma_netdev interface for ipoib as well as for opa_vnic. But, increasing the allowed MTU is a reasonablish idea too.. Jason -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 5/23/2018 3:44 PM, Jason Gunthorpe wrote: > On Wed, May 23, 2018 at 02:08:19PM -0400, Dennis Dalessandro wrote: >> On 5/23/2018 12:48 PM, Parav Pandit wrote: >>>> - log_pmtu = ah->log_pmtu; >>>> + log_pmtu = ilog2(ah->pmtu); >>> I don’t know if UD is widely used in IPoIB or not. >>> But won't you have any performance impact by doing ilog2 on every work request? >>> Just curious. >> >> It might just find more use with a larger MTU that allows better >> performance. > > At least on mlx5 ipoib and netdev ip o ethernet performance are > basically the same. The way to get good performance is to implement > the IP offloads. > > As I've said before, if OPA cares about good ipoib performance then > the hfi driver needs to implement the rdma_netdev interface for ipoib > as well as for opa_vnic. > > But, increasing the allowed MTU is a reasonablish idea too.. > > Jason > Actually, we've discussed this a bit more internally, let's go ahead and drop this patch, for now at least. Thanks -Denny -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index dd84238..de1b02a 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -1721,6 +1721,12 @@ static inline int valid_ib_mtu(unsigned int mtu) mtu == 4096; } +static inline int valid_opa_mtu(unsigned int mtu) +{ + return valid_ib_mtu(mtu) || mtu == 8192 || + mtu == 10240; +} + static inline int valid_opa_max_mtu(unsigned int mtu) { return mtu >= 2048 && diff --git a/drivers/infiniband/hw/hfi1/qp.c b/drivers/infiniband/hw/hfi1/qp.c index 1697d96..977bacd 100644 --- a/drivers/infiniband/hw/hfi1/qp.c +++ b/drivers/infiniband/hw/hfi1/qp.c @@ -168,15 +168,6 @@ static void flush_iowait(struct rvt_qp *qp) write_sequnlock_irqrestore(lock, flags); } -static inline int opa_mtu_enum_to_int(int mtu) -{ - switch (mtu) { - case OPA_MTU_8192: return 8192; - case OPA_MTU_10240: return 10240; - default: return -1; - } -} - /** * This function is what we would push to the core layer if we wanted to be a * "first class citizen". Instead we hide this here and rely on Verbs ULPs @@ -184,15 +175,9 @@ static inline int opa_mtu_enum_to_int(int mtu) */ static inline int verbs_mtu_enum_to_int(struct ib_device *dev, enum ib_mtu mtu) { - int val; - - /* Constraining 10KB packets to 8KB packets */ if (mtu == (enum ib_mtu)OPA_MTU_10240) mtu = OPA_MTU_8192; - val = opa_mtu_enum_to_int((int)mtu); - if (val > 0) - return val; - return ib_mtu_enum_to_int(mtu); + return opa_mtu_enum_to_int((enum opa_mtu)mtu); } int hfi1_check_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr, @@ -308,13 +293,13 @@ int hfi1_check_send_wqe(struct rvt_qp *qp, break; case IB_QPT_SMI: ah = ibah_to_rvtah(wqe->ud_wr.ah); - if (wqe->length > (1 << ah->log_pmtu)) + if (wqe->length > ah->pmtu) return -EINVAL; break; case IB_QPT_GSI: case IB_QPT_UD: ah = ibah_to_rvtah(wqe->ud_wr.ah); - if (wqe->length > (1 << ah->log_pmtu)) + if (wqe->length > ah->pmtu) return -EINVAL; if (ibp->sl_to_sc[rdma_ah_get_sl(&ah->attr)] == 0xf) return -EINVAL; diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c index fc2e44c..ab67f8d 100644 --- a/drivers/infiniband/hw/hfi1/verbs.c +++ b/drivers/infiniband/hw/hfi1/verbs.c @@ -1481,9 +1481,10 @@ static int query_port(struct rvt_dev_info *rdi, u8 port_num, * from the Path Records to us will get the new 8k MTU. Those that * attempt to process the MTU enum may fail in various ways. */ - props->max_mtu = mtu_to_enum((!valid_ib_mtu(hfi1_max_mtu) ? - 4096 : hfi1_max_mtu), IB_MTU_4096); - props->active_mtu = !valid_ib_mtu(ppd->ibmtu) ? props->max_mtu : + props->max_mtu = mtu_to_enum((!valid_opa_mtu(hfi1_max_mtu) ? + 4096 : hfi1_max_mtu), + IB_MTU_4096); + props->active_mtu = !valid_opa_mtu(ppd->ibmtu) ? props->max_mtu : mtu_to_enum(ppd->ibmtu, IB_MTU_4096); /* @@ -1617,7 +1618,7 @@ static void hfi1_notify_new_ah(struct ib_device *ibdev, dd = dd_from_ppd(ppd); ah->vl = sc_to_vlt(dd, sc5); if (ah->vl < num_vls || ah->vl == 15) - ah->log_pmtu = ilog2(dd->vld[ah->vl].mtu); + ah->pmtu = dd->vld[ah->vl].mtu; } /** diff --git a/drivers/infiniband/hw/qib/qib_qp.c b/drivers/infiniband/hw/qib/qib_qp.c index 344e401..59f0093 100644 --- a/drivers/infiniband/hw/qib/qib_qp.c +++ b/drivers/infiniband/hw/qib/qib_qp.c @@ -402,7 +402,7 @@ int qib_check_send_wqe(struct rvt_qp *qp, case IB_QPT_GSI: case IB_QPT_UD: ah = ibah_to_rvtah(wqe->ud_wr.ah); - if (wqe->length > (1 << ah->log_pmtu)) + if (wqe->length > ah->pmtu) return -EINVAL; /* progress hint */ ret = 1; diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c index 14b4057..c5d0ae4 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.c +++ b/drivers/infiniband/hw/qib/qib_verbs.c @@ -1367,7 +1367,7 @@ static void qib_notify_new_ah(struct ib_device *ibdev, ibp = to_iport(ibdev, rdma_ah_get_port_num(ah_attr)); ppd = ppd_from_ibp(ibp); ah->vl = ibp->sl_to_vl[rdma_ah_get_sl(&ah->attr)]; - ah->log_pmtu = ilog2(ppd->ibmtu); + ah->pmtu = ppd->ibmtu; } struct ib_ah *qib_create_qp0_ah(struct qib_ibport *ibp, u16 dlid) diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index 6e9a351..00ddf97 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -1835,7 +1835,7 @@ static int rvt_post_one_wr(struct rvt_qp *qp, qp->ibqp.qp_type != IB_QPT_RC) { struct rvt_ah *ah = ibah_to_rvtah(wqe->ud_wr.ah); - log_pmtu = ah->log_pmtu; + log_pmtu = ilog2(ah->pmtu); atomic_inc(&ibah_to_rvtah(ud_wr(wr)->ah)->refcount); } diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index cf291f9..49af25f 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -2257,7 +2257,7 @@ void ipoib_set_dev_features(struct ipoib_dev_priv *priv, struct ib_device *hca) goto device_init_failed; } - priv->max_ib_mtu = ib_mtu_enum_to_int(attr.max_mtu); + priv->max_ib_mtu = rdma_mtu_enum_to_int(hca, port, attr.max_mtu); /* MTU will be reset when mcast join happens */ priv->dev->mtu = IPOIB_UD_MTU(priv->max_ib_mtu); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c index 6709328..025cb07 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c @@ -243,10 +243,14 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast, if (priv->mcast_mtu == priv->admin_mtu) priv->admin_mtu = priv->mcast_mtu = - IPOIB_UD_MTU(ib_mtu_enum_to_int(priv->broadcast->mcmember.mtu)); + IPOIB_UD_MTU(rdma_mtu_enum_to_int(priv->ca, + priv->port, + priv->broadcast->mcmember.mtu)); else priv->mcast_mtu = - IPOIB_UD_MTU(ib_mtu_enum_to_int(priv->broadcast->mcmember.mtu)); + IPOIB_UD_MTU(rdma_mtu_enum_to_int(priv->ca, + priv->port, + priv->broadcast->mcmember.mtu)); priv->qkey = be32_to_cpu(priv->broadcast->mcmember.qkey); spin_unlock_irq(&priv->lock); diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index e849bd0..b1fd9b1 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -395,6 +395,11 @@ enum ib_mtu { IB_MTU_4096 = 5 }; +enum opa_mtu { + OPA_MTU_8192 = 6, + OPA_MTU_10240 = 7 +}; + static inline int ib_mtu_enum_to_int(enum ib_mtu mtu) { switch (mtu) { @@ -421,6 +426,24 @@ static inline enum ib_mtu ib_mtu_int_to_enum(int mtu) return IB_MTU_256; } +static inline int opa_mtu_enum_to_int(enum opa_mtu mtu) +{ + switch (mtu) { + case OPA_MTU_8192: return 8192; + case OPA_MTU_10240: return 10240; + default: return(ib_mtu_enum_to_int((enum ib_mtu) mtu)); + } +} + +static inline enum opa_mtu opa_mtu_int_to_enum(int mtu) +{ + if (mtu >= 10240) + return OPA_MTU_10240; + else if (mtu >= 8192) + return OPA_MTU_8192; + else return ((enum opa_mtu) ib_mtu_int_to_enum(mtu)); +} + enum ib_port_state { IB_PORT_NOP = 0, IB_PORT_DOWN = 1, @@ -2997,6 +3020,31 @@ static inline bool rdma_cap_read_inv(struct ib_device *dev, u32 port_num) return rdma_protocol_iwarp(dev, port_num); } +/** + * rdma_core_cap_opa_port - Return whether the RDMA Port is OPA or not. + * + * @device: Device + * @port_num: 1 based Port number + * + * Return true if the port is an Intel OPA port false if not. + */ + +static inline bool rdma_core_cap_opa_port(struct ib_device *device, + u32 port_num) +{ + return (device->port_immutable[port_num].core_cap_flags & RDMA_CORE_PORT_INTEL_OPA) + == RDMA_CORE_PORT_INTEL_OPA; +} + +static inline int rdma_mtu_enum_to_int(struct ib_device *device, u8 port, + int mtu) +{ + if (rdma_core_cap_opa_port(device, port)) + return opa_mtu_enum_to_int((enum opa_mtu) mtu); + else + return ib_mtu_enum_to_int((enum ib_mtu) mtu); +} + int ib_query_gid(struct ib_device *device, u8 port_num, int index, union ib_gid *gid, struct ib_gid_attr *attr); diff --git a/include/rdma/opa_port_info.h b/include/rdma/opa_port_info.h index b4f0ac0..b3518e9 100644 --- a/include/rdma/opa_port_info.h +++ b/include/rdma/opa_port_info.h @@ -137,14 +137,6 @@ #define OPA_CAP_MASK3_IsVLMarkerSupported (1 << 1) #define OPA_CAP_MASK3_IsVLrSupported (1 << 0) -/** - * new MTU values - */ -enum { - OPA_MTU_8192 = 6, - OPA_MTU_10240 = 7, -}; - enum { OPA_PORT_PHYS_CONF_DISCONNECTED = 0, OPA_PORT_PHYS_CONF_STANDARD = 1, diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index e79229a..a3d98cb 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -189,8 +189,8 @@ struct rvt_ah { struct ib_ah ibah; struct rdma_ah_attr attr; atomic_t refcount; + u16 pmtu; u8 vl; - u8 log_pmtu; }; struct rvt_dev_info;