diff mbox series

[v3,for-next,07/16] IB/ipoib: Increase ipoib Datagram mode MTU's upper limit

Message ID 20200511160618.173205.23053.stgit@awfm-01.aw.intel.com (mailing list archive)
State Accepted
Delegated to: Jason Gunthorpe
Headers show
Series New hfi1 feature: Accelerated IP | expand

Commit Message

Dennis Dalessandro May 11, 2020, 4:06 p.m. UTC
From: Kaike Wan <kaike.wan@intel.com>

Currently the ipoib UD mtu is restricted to 4K bytes. Remove this
limitation so that the IPOIB module can potentially use an MTU (in UD
mode) that is bounded by the MTU of the underlying device. A field is
added to the ib_port_attr structure to indicate the maximum physical
MTU the underlying device supports.

Reviewed-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
Reviewed-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
Signed-off-by: Sadanand Warrier <sadanand.warrier@intel.com>
Signed-off-by: Kaike Wan <kaike.wan@intel.com>
Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
---
 drivers/infiniband/hw/hfi1/qp.c                |   18 +-----
 drivers/infiniband/hw/hfi1/verbs.c             |    2 +
 drivers/infiniband/ulp/ipoib/ipoib_main.c      |    2 -
 drivers/infiniband/ulp/ipoib/ipoib_multicast.c |   11 ++-
 include/rdma/ib_verbs.h                        |   77 ++++++++++++++++++++++++
 include/rdma/opa_port_info.h                   |   10 ---
 6 files changed, 88 insertions(+), 32 deletions(-)

Comments

Nathan Chancellor May 27, 2020, 4:03 a.m. UTC | #1
On Mon, May 11, 2020 at 12:06:18PM -0400, Dennis Dalessandro wrote:
> From: Kaike Wan <kaike.wan@intel.com>
> 
> Currently the ipoib UD mtu is restricted to 4K bytes. Remove this
> limitation so that the IPOIB module can potentially use an MTU (in UD
> mode) that is bounded by the MTU of the underlying device. A field is
> added to the ib_port_attr structure to indicate the maximum physical
> MTU the underlying device supports.
> 
> Reviewed-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
> Reviewed-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
> Signed-off-by: Sadanand Warrier <sadanand.warrier@intel.com>
> Signed-off-by: Kaike Wan <kaike.wan@intel.com>
> Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
> ---
>  drivers/infiniband/hw/hfi1/qp.c                |   18 +-----
>  drivers/infiniband/hw/hfi1/verbs.c             |    2 +
>  drivers/infiniband/ulp/ipoib/ipoib_main.c      |    2 -
>  drivers/infiniband/ulp/ipoib/ipoib_multicast.c |   11 ++-
>  include/rdma/ib_verbs.h                        |   77 ++++++++++++++++++++++++
>  include/rdma/opa_port_info.h                   |   10 ---
>  6 files changed, 88 insertions(+), 32 deletions(-)
> 
> diff --git a/drivers/infiniband/hw/hfi1/qp.c b/drivers/infiniband/hw/hfi1/qp.c
> index f8e733a..0c2ae9f 100644
> --- a/drivers/infiniband/hw/hfi1/qp.c
> +++ b/drivers/infiniband/hw/hfi1/qp.c
> @@ -1,5 +1,5 @@
>  /*
> - * Copyright(c) 2015 - 2019 Intel Corporation.
> + * Copyright(c) 2015 - 2020 Intel Corporation.
>   *
>   * This file is provided under a dual BSD/GPLv2 license.  When using or
>   * redistributing this file, you may do so under either license.
> @@ -186,15 +186,6 @@ static void flush_iowait(struct rvt_qp *qp)
>  	write_sequnlock_irqrestore(lock, flags);
>  }
>  
> -static inline int opa_mtu_enum_to_int(int mtu)
> -{
> -	switch (mtu) {
> -	case OPA_MTU_8192:  return 8192;
> -	case OPA_MTU_10240: return 10240;
> -	default:            return -1;
> -	}
> -}
> -
>  /**
>   * This function is what we would push to the core layer if we wanted to be a
>   * "first class citizen".  Instead we hide this here and rely on Verbs ULPs
> @@ -202,15 +193,10 @@ static inline int opa_mtu_enum_to_int(int mtu)
>   */
>  static inline int verbs_mtu_enum_to_int(struct ib_device *dev, enum ib_mtu mtu)
>  {
> -	int val;
> -
>  	/* Constraining 10KB packets to 8KB packets */
>  	if (mtu == (enum ib_mtu)OPA_MTU_10240)
>  		mtu = OPA_MTU_8192;
> -	val = opa_mtu_enum_to_int((int)mtu);
> -	if (val > 0)
> -		return val;
> -	return ib_mtu_enum_to_int(mtu);
> +	return opa_mtu_enum_to_int((enum opa_mtu)mtu);
>  }
>  
>  int hfi1_check_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr,
> diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c
> index c61b291..19d5d00 100644
> --- a/drivers/infiniband/hw/hfi1/verbs.c
> +++ b/drivers/infiniband/hw/hfi1/verbs.c
> @@ -1439,6 +1439,8 @@ static int query_port(struct rvt_dev_info *rdi, u8 port_num,
>  				      4096 : hfi1_max_mtu), IB_MTU_4096);
>  	props->active_mtu = !valid_ib_mtu(ppd->ibmtu) ? props->max_mtu :
>  		mtu_to_enum(ppd->ibmtu, IB_MTU_4096);
> +	props->phys_mtu = HFI1_CAP_IS_KSET(AIP) ? hfi1_max_mtu :
> +				ib_mtu_enum_to_int(props->max_mtu);
>  
>  	return 0;
>  }
> diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
> index d4c6a97..22216f1 100644
> --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
> +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
> @@ -1855,7 +1855,7 @@ static int ipoib_parent_init(struct net_device *ndev)
>  			priv->port);
>  		return result;
>  	}
> -	priv->max_ib_mtu = ib_mtu_enum_to_int(attr.max_mtu);
> +	priv->max_ib_mtu = rdma_mtu_from_attr(priv->ca, priv->port, &attr);
>  
>  	result = ib_query_pkey(priv->ca, priv->port, 0, &priv->pkey);
>  	if (result) {
> diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
> index b9e9562..7166ee9b 100644
> --- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
> +++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
> @@ -218,6 +218,7 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast,
>  	struct rdma_ah_attr av;
>  	int ret;
>  	int set_qkey = 0;
> +	int mtu;
>  
>  	mcast->mcmember = *mcmember;
>  
> @@ -240,13 +241,11 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast,
>  		priv->broadcast->mcmember.flow_label = mcmember->flow_label;
>  		priv->broadcast->mcmember.hop_limit = mcmember->hop_limit;
>  		/* assume if the admin and the mcast are the same both can be changed */
> +		mtu = rdma_mtu_enum_to_int(priv->ca,  priv->port,
> +					   priv->broadcast->mcmember.mtu);
>  		if (priv->mcast_mtu == priv->admin_mtu)
> -			priv->admin_mtu =
> -			priv->mcast_mtu =
> -			IPOIB_UD_MTU(ib_mtu_enum_to_int(priv->broadcast->mcmember.mtu));
> -		else
> -			priv->mcast_mtu =
> -			IPOIB_UD_MTU(ib_mtu_enum_to_int(priv->broadcast->mcmember.mtu));
> +			priv->admin_mtu = IPOIB_UD_MTU(mtu);
> +		priv->mcast_mtu = IPOIB_UD_MTU(mtu);
>  
>  		priv->qkey = be32_to_cpu(priv->broadcast->mcmember.qkey);
>  		spin_unlock_irq(&priv->lock);
> diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
> index 4b23ee5..792fb72 100644
> --- a/include/rdma/ib_verbs.h
> +++ b/include/rdma/ib_verbs.h
> @@ -462,6 +462,11 @@ enum ib_mtu {
>  	IB_MTU_4096 = 5
>  };
>  
> +enum opa_mtu {
> +	OPA_MTU_8192 = 6,
> +	OPA_MTU_10240 = 7
> +};
> +
>  static inline int ib_mtu_enum_to_int(enum ib_mtu mtu)
>  {
>  	switch (mtu) {
> @@ -488,6 +493,28 @@ static inline enum ib_mtu ib_mtu_int_to_enum(int mtu)
>  		return IB_MTU_256;
>  }
>  
> +static inline int opa_mtu_enum_to_int(enum opa_mtu mtu)
> +{
> +	switch (mtu) {
> +	case OPA_MTU_8192:
> +		return 8192;
> +	case OPA_MTU_10240:
> +		return 10240;
> +	default:
> +		return(ib_mtu_enum_to_int((enum ib_mtu)mtu));
> +	}
> +}
> +
> +static inline enum opa_mtu opa_mtu_int_to_enum(int mtu)
> +{
> +	if (mtu >= 10240)
> +		return OPA_MTU_10240;
> +	else if (mtu >= 8192)
> +		return OPA_MTU_8192;
> +	else
> +		return ((enum opa_mtu)ib_mtu_int_to_enum(mtu));
> +}
> +
>  enum ib_port_state {
>  	IB_PORT_NOP		= 0,
>  	IB_PORT_DOWN		= 1,
> @@ -651,6 +678,7 @@ struct ib_port_attr {
>  	enum ib_port_state	state;
>  	enum ib_mtu		max_mtu;
>  	enum ib_mtu		active_mtu;
> +	u32                     phys_mtu;
>  	int			gid_tbl_len;
>  	unsigned int		ip_gids:1;
>  	/* This is the value from PortInfo CapabilityMask, defined by IBA */
> @@ -3364,6 +3392,55 @@ static inline unsigned int rdma_find_pg_bit(unsigned long addr,
>  	return __fls(pgsz);
>  }
>  
> +/**
> + * rdma_core_cap_opa_port - Return whether the RDMA Port is OPA or not.
> + * @device: Device
> + * @port_num: 1 based Port number
> + *
> + * Return true if port is an Intel OPA port , false if not
> + */
> +static inline bool rdma_core_cap_opa_port(struct ib_device *device,
> +					  u32 port_num)
> +{
> +	return (device->port_data[port_num].immutable.core_cap_flags &
> +		RDMA_CORE_PORT_INTEL_OPA) == RDMA_CORE_PORT_INTEL_OPA;
> +}
> +
> +/**
> + * rdma_mtu_enum_to_int - Return the mtu of the port as an integer value.
> + * @device: Device
> + * @port_num: Port number
> + * @mtu: enum value of MTU
> + *
> + * Return the MTU size supported by the port as an integer value. Will return
> + * -1 if enum value of mtu is not supported.
> + */
> +static inline int rdma_mtu_enum_to_int(struct ib_device *device, u8 port,
> +				       int mtu)
> +{
> +	if (rdma_core_cap_opa_port(device, port))
> +		return opa_mtu_enum_to_int((enum opa_mtu)mtu);
> +	else
> +		return ib_mtu_enum_to_int((enum ib_mtu)mtu);
> +}
> +
> +/**
> + * rdma_mtu_from_attr - Return the mtu of the port from the port attribute.
> + * @device: Device
> + * @port_num: Port number
> + * @attr: port attribute
> + *
> + * Return the MTU size supported by the port as an integer value.
> + */
> +static inline int rdma_mtu_from_attr(struct ib_device *device, u8 port,
> +				     struct ib_port_attr *attr)
> +{
> +	if (rdma_core_cap_opa_port(device, port))
> +		return attr->phys_mtu;
> +	else
> +		return ib_mtu_enum_to_int(attr->max_mtu);
> +}
> +
>  int ib_set_vf_link_state(struct ib_device *device, int vf, u8 port,
>  			 int state);
>  int ib_get_vf_config(struct ib_device *device, int vf, u8 port,
> diff --git a/include/rdma/opa_port_info.h b/include/rdma/opa_port_info.h
> index bdbfe25..0d9e6d7 100644
> --- a/include/rdma/opa_port_info.h
> +++ b/include/rdma/opa_port_info.h
> @@ -1,5 +1,5 @@
>  /*
> - * Copyright (c) 2014-2017 Intel Corporation.  All rights reserved.
> + * Copyright (c) 2014-2020 Intel Corporation.  All rights reserved.
>   *
>   * This software is available to you under a choice of one of two
>   * licenses.  You may choose to be licensed under the terms of the GNU
> @@ -139,14 +139,6 @@
>  #define OPA_CAP_MASK3_IsVLMarkerSupported         (1 << 1)
>  #define OPA_CAP_MASK3_IsVLrSupported              (1 << 0)
>  
> -/**
> - * new MTU values
> - */
> -enum {
> -	OPA_MTU_8192  = 6,
> -	OPA_MTU_10240 = 7,
> -};
> -
>  enum {
>  	OPA_PORT_PHYS_CONF_DISCONNECTED = 0,
>  	OPA_PORT_PHYS_CONF_STANDARD     = 1,
> 

This patch introduces a new clang warning:

drivers/infiniband/hw/hfi1/qp.c:198:9: warning: implicit conversion from enumeration type 'enum opa_mtu' to different enumeration type 'enum ib_mtu' [-Wenum-conversion]
                mtu = OPA_MTU_8192;
                    ~ ^~~~~~~~~~~~
1 warning generated.

The simple solution is obviously to just remove the cast but then I
noticed all of the explicit casts along with the casts in the conversion
functions. What is the point of having these values as enums then
explicitly casting them to others? Doesn't that just remove the type
safety that enums offer? Wouldn't it just be better to have preprocessor
defines? I noticed that OPA_MTU_0 to OPA_MTU_4096 are done that way but
then OPA_MTU_8192 and OPA_MTU_10240 are defined as enumerators... I
spent at least 15 minutes looking into how to untangle all of this and
just decided to sent this email instead. I am happy to just send a patch
shutting up this warning with a cast but I think the better long term
solution is to just consolidate these enums/functions into something
less unweildy.

Cheers,
Nathan
Dennis Dalessandro June 1, 2020, 1:48 p.m. UTC | #2
On 5/27/2020 12:03 AM, Nathan Chancellor wrote:
> On Mon, May 11, 2020 at 12:06:18PM -0400, Dennis Dalessandro wrote:
>> From: Kaike Wan <kaike.wan@intel.com>
>>
>> Currently the ipoib UD mtu is restricted to 4K bytes. Remove this
>> limitation so that the IPOIB module can potentially use an MTU (in UD
>> mode) that is bounded by the MTU of the underlying device. A field is
>> added to the ib_port_attr structure to indicate the maximum physical
>> MTU the underlying device supports.
>>
>> Reviewed-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
>> Reviewed-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
>> Signed-off-by: Sadanand Warrier <sadanand.warrier@intel.com>
>> Signed-off-by: Kaike Wan <kaike.wan@intel.com>
>> Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
>> ---
>>   drivers/infiniband/hw/hfi1/qp.c                |   18 +-----
>>   drivers/infiniband/hw/hfi1/verbs.c             |    2 +
>>   drivers/infiniband/ulp/ipoib/ipoib_main.c      |    2 -
>>   drivers/infiniband/ulp/ipoib/ipoib_multicast.c |   11 ++-
>>   include/rdma/ib_verbs.h                        |   77 ++++++++++++++++++++++++
>>   include/rdma/opa_port_info.h                   |   10 ---
>>   6 files changed, 88 insertions(+), 32 deletions(-)
>>
>> diff --git a/drivers/infiniband/hw/hfi1/qp.c b/drivers/infiniband/hw/hfi1/qp.c
>> index f8e733a..0c2ae9f 100644
>> --- a/drivers/infiniband/hw/hfi1/qp.c
>> +++ b/drivers/infiniband/hw/hfi1/qp.c
>> @@ -1,5 +1,5 @@
>>   /*
>> - * Copyright(c) 2015 - 2019 Intel Corporation.
>> + * Copyright(c) 2015 - 2020 Intel Corporation.
>>    *
>>    * This file is provided under a dual BSD/GPLv2 license.  When using or
>>    * redistributing this file, you may do so under either license.
>> @@ -186,15 +186,6 @@ static void flush_iowait(struct rvt_qp *qp)
>>   	write_sequnlock_irqrestore(lock, flags);
>>   }
>>   
>> -static inline int opa_mtu_enum_to_int(int mtu)
>> -{
>> -	switch (mtu) {
>> -	case OPA_MTU_8192:  return 8192;
>> -	case OPA_MTU_10240: return 10240;
>> -	default:            return -1;
>> -	}
>> -}
>> -
>>   /**
>>    * This function is what we would push to the core layer if we wanted to be a
>>    * "first class citizen".  Instead we hide this here and rely on Verbs ULPs
>> @@ -202,15 +193,10 @@ static inline int opa_mtu_enum_to_int(int mtu)
>>    */
>>   static inline int verbs_mtu_enum_to_int(struct ib_device *dev, enum ib_mtu mtu)
>>   {
>> -	int val;
>> -
>>   	/* Constraining 10KB packets to 8KB packets */
>>   	if (mtu == (enum ib_mtu)OPA_MTU_10240)
>>   		mtu = OPA_MTU_8192;
>> -	val = opa_mtu_enum_to_int((int)mtu);
>> -	if (val > 0)
>> -		return val;
>> -	return ib_mtu_enum_to_int(mtu);
>> +	return opa_mtu_enum_to_int((enum opa_mtu)mtu);
>>   }
>>   
>>   int hfi1_check_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr,
>> diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c
>> index c61b291..19d5d00 100644
>> --- a/drivers/infiniband/hw/hfi1/verbs.c
>> +++ b/drivers/infiniband/hw/hfi1/verbs.c
>> @@ -1439,6 +1439,8 @@ static int query_port(struct rvt_dev_info *rdi, u8 port_num,
>>   				      4096 : hfi1_max_mtu), IB_MTU_4096);
>>   	props->active_mtu = !valid_ib_mtu(ppd->ibmtu) ? props->max_mtu :
>>   		mtu_to_enum(ppd->ibmtu, IB_MTU_4096);
>> +	props->phys_mtu = HFI1_CAP_IS_KSET(AIP) ? hfi1_max_mtu :
>> +				ib_mtu_enum_to_int(props->max_mtu);
>>   
>>   	return 0;
>>   }
>> diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
>> index d4c6a97..22216f1 100644
>> --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
>> +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
>> @@ -1855,7 +1855,7 @@ static int ipoib_parent_init(struct net_device *ndev)
>>   			priv->port);
>>   		return result;
>>   	}
>> -	priv->max_ib_mtu = ib_mtu_enum_to_int(attr.max_mtu);
>> +	priv->max_ib_mtu = rdma_mtu_from_attr(priv->ca, priv->port, &attr);
>>   
>>   	result = ib_query_pkey(priv->ca, priv->port, 0, &priv->pkey);
>>   	if (result) {
>> diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
>> index b9e9562..7166ee9b 100644
>> --- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
>> +++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
>> @@ -218,6 +218,7 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast,
>>   	struct rdma_ah_attr av;
>>   	int ret;
>>   	int set_qkey = 0;
>> +	int mtu;
>>   
>>   	mcast->mcmember = *mcmember;
>>   
>> @@ -240,13 +241,11 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast,
>>   		priv->broadcast->mcmember.flow_label = mcmember->flow_label;
>>   		priv->broadcast->mcmember.hop_limit = mcmember->hop_limit;
>>   		/* assume if the admin and the mcast are the same both can be changed */
>> +		mtu = rdma_mtu_enum_to_int(priv->ca,  priv->port,
>> +					   priv->broadcast->mcmember.mtu);
>>   		if (priv->mcast_mtu == priv->admin_mtu)
>> -			priv->admin_mtu =
>> -			priv->mcast_mtu =
>> -			IPOIB_UD_MTU(ib_mtu_enum_to_int(priv->broadcast->mcmember.mtu));
>> -		else
>> -			priv->mcast_mtu =
>> -			IPOIB_UD_MTU(ib_mtu_enum_to_int(priv->broadcast->mcmember.mtu));
>> +			priv->admin_mtu = IPOIB_UD_MTU(mtu);
>> +		priv->mcast_mtu = IPOIB_UD_MTU(mtu);
>>   
>>   		priv->qkey = be32_to_cpu(priv->broadcast->mcmember.qkey);
>>   		spin_unlock_irq(&priv->lock);
>> diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
>> index 4b23ee5..792fb72 100644
>> --- a/include/rdma/ib_verbs.h
>> +++ b/include/rdma/ib_verbs.h
>> @@ -462,6 +462,11 @@ enum ib_mtu {
>>   	IB_MTU_4096 = 5
>>   };
>>   
>> +enum opa_mtu {
>> +	OPA_MTU_8192 = 6,
>> +	OPA_MTU_10240 = 7
>> +};
>> +
>>   static inline int ib_mtu_enum_to_int(enum ib_mtu mtu)
>>   {
>>   	switch (mtu) {
>> @@ -488,6 +493,28 @@ static inline enum ib_mtu ib_mtu_int_to_enum(int mtu)
>>   		return IB_MTU_256;
>>   }
>>   
>> +static inline int opa_mtu_enum_to_int(enum opa_mtu mtu)
>> +{
>> +	switch (mtu) {
>> +	case OPA_MTU_8192:
>> +		return 8192;
>> +	case OPA_MTU_10240:
>> +		return 10240;
>> +	default:
>> +		return(ib_mtu_enum_to_int((enum ib_mtu)mtu));
>> +	}
>> +}
>> +
>> +static inline enum opa_mtu opa_mtu_int_to_enum(int mtu)
>> +{
>> +	if (mtu >= 10240)
>> +		return OPA_MTU_10240;
>> +	else if (mtu >= 8192)
>> +		return OPA_MTU_8192;
>> +	else
>> +		return ((enum opa_mtu)ib_mtu_int_to_enum(mtu));
>> +}
>> +
>>   enum ib_port_state {
>>   	IB_PORT_NOP		= 0,
>>   	IB_PORT_DOWN		= 1,
>> @@ -651,6 +678,7 @@ struct ib_port_attr {
>>   	enum ib_port_state	state;
>>   	enum ib_mtu		max_mtu;
>>   	enum ib_mtu		active_mtu;
>> +	u32                     phys_mtu;
>>   	int			gid_tbl_len;
>>   	unsigned int		ip_gids:1;
>>   	/* This is the value from PortInfo CapabilityMask, defined by IBA */
>> @@ -3364,6 +3392,55 @@ static inline unsigned int rdma_find_pg_bit(unsigned long addr,
>>   	return __fls(pgsz);
>>   }
>>   
>> +/**
>> + * rdma_core_cap_opa_port - Return whether the RDMA Port is OPA or not.
>> + * @device: Device
>> + * @port_num: 1 based Port number
>> + *
>> + * Return true if port is an Intel OPA port , false if not
>> + */
>> +static inline bool rdma_core_cap_opa_port(struct ib_device *device,
>> +					  u32 port_num)
>> +{
>> +	return (device->port_data[port_num].immutable.core_cap_flags &
>> +		RDMA_CORE_PORT_INTEL_OPA) == RDMA_CORE_PORT_INTEL_OPA;
>> +}
>> +
>> +/**
>> + * rdma_mtu_enum_to_int - Return the mtu of the port as an integer value.
>> + * @device: Device
>> + * @port_num: Port number
>> + * @mtu: enum value of MTU
>> + *
>> + * Return the MTU size supported by the port as an integer value. Will return
>> + * -1 if enum value of mtu is not supported.
>> + */
>> +static inline int rdma_mtu_enum_to_int(struct ib_device *device, u8 port,
>> +				       int mtu)
>> +{
>> +	if (rdma_core_cap_opa_port(device, port))
>> +		return opa_mtu_enum_to_int((enum opa_mtu)mtu);
>> +	else
>> +		return ib_mtu_enum_to_int((enum ib_mtu)mtu);
>> +}
>> +
>> +/**
>> + * rdma_mtu_from_attr - Return the mtu of the port from the port attribute.
>> + * @device: Device
>> + * @port_num: Port number
>> + * @attr: port attribute
>> + *
>> + * Return the MTU size supported by the port as an integer value.
>> + */
>> +static inline int rdma_mtu_from_attr(struct ib_device *device, u8 port,
>> +				     struct ib_port_attr *attr)
>> +{
>> +	if (rdma_core_cap_opa_port(device, port))
>> +		return attr->phys_mtu;
>> +	else
>> +		return ib_mtu_enum_to_int(attr->max_mtu);
>> +}
>> +
>>   int ib_set_vf_link_state(struct ib_device *device, int vf, u8 port,
>>   			 int state);
>>   int ib_get_vf_config(struct ib_device *device, int vf, u8 port,
>> diff --git a/include/rdma/opa_port_info.h b/include/rdma/opa_port_info.h
>> index bdbfe25..0d9e6d7 100644
>> --- a/include/rdma/opa_port_info.h
>> +++ b/include/rdma/opa_port_info.h
>> @@ -1,5 +1,5 @@
>>   /*
>> - * Copyright (c) 2014-2017 Intel Corporation.  All rights reserved.
>> + * Copyright (c) 2014-2020 Intel Corporation.  All rights reserved.
>>    *
>>    * This software is available to you under a choice of one of two
>>    * licenses.  You may choose to be licensed under the terms of the GNU
>> @@ -139,14 +139,6 @@
>>   #define OPA_CAP_MASK3_IsVLMarkerSupported         (1 << 1)
>>   #define OPA_CAP_MASK3_IsVLrSupported              (1 << 0)
>>   
>> -/**
>> - * new MTU values
>> - */
>> -enum {
>> -	OPA_MTU_8192  = 6,
>> -	OPA_MTU_10240 = 7,
>> -};
>> -
>>   enum {
>>   	OPA_PORT_PHYS_CONF_DISCONNECTED = 0,
>>   	OPA_PORT_PHYS_CONF_STANDARD     = 1,
>>
> 
> This patch introduces a new clang warning:
> 
> drivers/infiniband/hw/hfi1/qp.c:198:9: warning: implicit conversion from enumeration type 'enum opa_mtu' to different enumeration type 'enum ib_mtu' [-Wenum-conversion]
>                  mtu = OPA_MTU_8192;
>                      ~ ^~~~~~~~~~~~
> 1 warning generated.
> 
> The simple solution is obviously to just remove the cast but then I
> noticed all of the explicit casts along with the casts in the conversion
> functions. What is the point of having these values as enums then
> explicitly casting them to others? Doesn't that just remove the type
> safety that enums offer? Wouldn't it just be better to have preprocessor
> defines? I noticed that OPA_MTU_0 to OPA_MTU_4096 are done that way but
> then OPA_MTU_8192 and OPA_MTU_10240 are defined as enumerators... I
> spent at least 15 minutes looking into how to untangle all of this and
> just decided to sent this email instead. I am happy to just send a patch
> shutting up this warning with a cast but I think the better long term
> solution is to just consolidate these enums/functions into something
> less unweildy.
> 
> Cheers,
> Nathan
> 

They should probably all be in "enum ib_mtu". Jason any issues with us 
donig that? I can't for certain recall the real reason they were kept 
separate in the first place.

-Denny
Jason Gunthorpe June 1, 2020, 1:57 p.m. UTC | #3
On Mon, Jun 01, 2020 at 09:48:47AM -0400, Dennis Dalessandro wrote:

> They should probably all be in "enum ib_mtu". Jason any issues with us donig
> that? I can't for certain recall the real reason they were kept separate in
> the first place.

It is probably OK

Jason
Nathan Chancellor June 16, 2020, 12:56 a.m. UTC | #4
On Mon, Jun 01, 2020 at 10:57:22AM -0300, Jason Gunthorpe wrote:
> On Mon, Jun 01, 2020 at 09:48:47AM -0400, Dennis Dalessandro wrote:
> 
> > They should probably all be in "enum ib_mtu". Jason any issues with us donig
> > that? I can't for certain recall the real reason they were kept separate in
> > the first place.
> 
> It is probably OK
> 
> Jason

I don't mind taking a wack at this if you guys are too busy (I'm rather
tired of seeing the warning across all of my builds). However, I am
wondering how far should this be unwound? Should 'enum opa_mtu' be
collapsed into 'enum ib_mtu' and then all of the opa conversion
functions be eliminated in favor of the ib ones? It looks like
OPA_MTU_8192 and OPA_MTU_10240 are used in a few places within
drivers/infiniband/hw/hfi1, should all of those instances be converted
over to IB_MTU_* and the defines at the top of
drivers/infiniband/hw/hfi1/hfi.h be eliminated?

Cheers,
Nathan
Leon Romanovsky June 16, 2020, 6:25 a.m. UTC | #5
On Mon, Jun 15, 2020 at 05:56:50PM -0700, Nathan Chancellor wrote:
> On Mon, Jun 01, 2020 at 10:57:22AM -0300, Jason Gunthorpe wrote:
> > On Mon, Jun 01, 2020 at 09:48:47AM -0400, Dennis Dalessandro wrote:
> >
> > > They should probably all be in "enum ib_mtu". Jason any issues with us donig
> > > that? I can't for certain recall the real reason they were kept separate in
> > > the first place.
> >
> > It is probably OK
> >
> > Jason
>
> I don't mind taking a wack at this if you guys are too busy (I'm rather
> tired of seeing the warning across all of my builds). However, I am
> wondering how far should this be unwound? Should 'enum opa_mtu' be
> collapsed into 'enum ib_mtu' and then all of the opa conversion
> functions be eliminated in favor of the ib ones? It looks like
> OPA_MTU_8192 and OPA_MTU_10240 are used in a few places within
> drivers/infiniband/hw/hfi1, should all of those instances be converted
> over to IB_MTU_* and the defines at the top of
> drivers/infiniband/hw/hfi1/hfi.h be eliminated?

We rather keep separation due to logic separation.

While ib_* defines come from IBTA and interoperable across different
devices and vendors, opa_* definitions are Intel proprietary ones used
for the product that was canceled.

Thanks

>
> Cheers,
> Nathan
Nathan Chancellor June 16, 2020, 6:42 p.m. UTC | #6
On Tue, Jun 16, 2020 at 09:25:34AM +0300, Leon Romanovsky wrote:
> On Mon, Jun 15, 2020 at 05:56:50PM -0700, Nathan Chancellor wrote:
> > On Mon, Jun 01, 2020 at 10:57:22AM -0300, Jason Gunthorpe wrote:
> > > On Mon, Jun 01, 2020 at 09:48:47AM -0400, Dennis Dalessandro wrote:
> > >
> > > > They should probably all be in "enum ib_mtu". Jason any issues with us donig
> > > > that? I can't for certain recall the real reason they were kept separate in
> > > > the first place.
> > >
> > > It is probably OK
> > >
> > > Jason
> >
> > I don't mind taking a wack at this if you guys are too busy (I'm rather
> > tired of seeing the warning across all of my builds). However, I am
> > wondering how far should this be unwound? Should 'enum opa_mtu' be
> > collapsed into 'enum ib_mtu' and then all of the opa conversion
> > functions be eliminated in favor of the ib ones? It looks like
> > OPA_MTU_8192 and OPA_MTU_10240 are used in a few places within
> > drivers/infiniband/hw/hfi1, should all of those instances be converted
> > over to IB_MTU_* and the defines at the top of
> > drivers/infiniband/hw/hfi1/hfi.h be eliminated?
> 
> We rather keep separation due to logic separation.
> 
> While ib_* defines come from IBTA and interoperable across different
> devices and vendors, opa_* definitions are Intel proprietary ones used
> for the product that was canceled.
> 
> Thanks
> 
> >
> > Cheers,
> > Nathan

Fair enough, could someone take care of properly fixing the warning that
was introduced by this patch then? I can send a patch that just adds an
explicit cast but that looks like an eye sore to me. Otherwise, I am not
familiar enough with this code to know what is an appropriate fix or
not.

Cheers,
Nathan
Dennis Dalessandro June 16, 2020, 7:14 p.m. UTC | #7
On 6/16/2020 2:25 AM, Leon Romanovsky wrote:
> On Mon, Jun 15, 2020 at 05:56:50PM -0700, Nathan Chancellor wrote:
>> On Mon, Jun 01, 2020 at 10:57:22AM -0300, Jason Gunthorpe wrote:
>>> On Mon, Jun 01, 2020 at 09:48:47AM -0400, Dennis Dalessandro wrote:
>>>
>>>> They should probably all be in "enum ib_mtu". Jason any issues with us donig
>>>> that? I can't for certain recall the real reason they were kept separate in
>>>> the first place.
>>>
>>> It is probably OK
>>>
>>> Jason
>>
>> I don't mind taking a wack at this if you guys are too busy (I'm rather
>> tired of seeing the warning across all of my builds). However, I am
>> wondering how far should this be unwound? Should 'enum opa_mtu' be
>> collapsed into 'enum ib_mtu' and then all of the opa conversion
>> functions be eliminated in favor of the ib ones? It looks like
>> OPA_MTU_8192 and OPA_MTU_10240 are used in a few places within
>> drivers/infiniband/hw/hfi1, should all of those instances be converted
>> over to IB_MTU_* and the defines at the top of
>> drivers/infiniband/hw/hfi1/hfi.h be eliminated?

My opinion is yes.

> We rather keep separation due to logic separation.

To be fair, "you" rather. Not we. I'd like some others to weigh in here. 
Increasing the available MTUs an an enum just makes sense. Why does it 
matter if IB doesn't need them right now. Maybe someday.

> While ib_* defines come from IBTA and interoperable across different
> devices and vendors, opa_* definitions are Intel proprietary ones used
> for the product that was canceled.

But does it hurt to have more potentially available? Can you please 
explain the technical reason here?

-Denny
Ira Weiny June 16, 2020, 7:17 p.m. UTC | #8
On Tue, Jun 16, 2020 at 11:42:31AM -0700, Nathan Chancellor wrote:
> On Tue, Jun 16, 2020 at 09:25:34AM +0300, Leon Romanovsky wrote:
> > On Mon, Jun 15, 2020 at 05:56:50PM -0700, Nathan Chancellor wrote:
> > > On Mon, Jun 01, 2020 at 10:57:22AM -0300, Jason Gunthorpe wrote:
> > > > On Mon, Jun 01, 2020 at 09:48:47AM -0400, Dennis Dalessandro wrote:
> > > >
> > > > > They should probably all be in "enum ib_mtu". Jason any issues with us donig
> > > > > that? I can't for certain recall the real reason they were kept separate in
> > > > > the first place.
> > > >
> > > > It is probably OK
> > > >
> > > > Jason
> > >
> > > I don't mind taking a wack at this if you guys are too busy (I'm rather
> > > tired of seeing the warning across all of my builds). However, I am
> > > wondering how far should this be unwound? Should 'enum opa_mtu' be
> > > collapsed into 'enum ib_mtu' and then all of the opa conversion
> > > functions be eliminated in favor of the ib ones? It looks like
> > > OPA_MTU_8192 and OPA_MTU_10240 are used in a few places within
> > > drivers/infiniband/hw/hfi1, should all of those instances be converted
> > > over to IB_MTU_* and the defines at the top of
> > > drivers/infiniband/hw/hfi1/hfi.h be eliminated?
> > 
> > We rather keep separation due to logic separation.
> > 
> > While ib_* defines come from IBTA and interoperable across different
> > devices and vendors, opa_* definitions are Intel proprietary ones used
> > for the product that was canceled.
> > 
> > Thanks
> > 
> > >
> > > Cheers,
> > > Nathan
> 
> Fair enough, could someone take care of properly fixing the warning that
> was introduced by this patch then? I can send a patch that just adds an
> explicit cast but that looks like an eye sore to me. Otherwise, I am not
> familiar enough with this code to know what is an appropriate fix or
> not.

Leon is correct.  opa_* values should only be used on devices which are opa.

IPoIB needs to be 'opa aware' to do the right thing (be more efficient in this
case) when running on an OPA device.

Is that the case with the current code?

Ira

> 
> Cheers,
> Nathan
Ira Weiny June 16, 2020, 7:21 p.m. UTC | #9
On Tue, Jun 16, 2020 at 03:14:51PM -0400, Dennis Dalessandro wrote:
> On 6/16/2020 2:25 AM, Leon Romanovsky wrote:
> > On Mon, Jun 15, 2020 at 05:56:50PM -0700, Nathan Chancellor wrote:
> > > On Mon, Jun 01, 2020 at 10:57:22AM -0300, Jason Gunthorpe wrote:
> > > > On Mon, Jun 01, 2020 at 09:48:47AM -0400, Dennis Dalessandro wrote:
> > > > 
> > > > > They should probably all be in "enum ib_mtu". Jason any issues with us donig
> > > > > that? I can't for certain recall the real reason they were kept separate in
> > > > > the first place.
> > > > 
> > > > It is probably OK
> > > > 
> > > > Jason
> > > 
> > > I don't mind taking a wack at this if you guys are too busy (I'm rather
> > > tired of seeing the warning across all of my builds). However, I am
> > > wondering how far should this be unwound? Should 'enum opa_mtu' be
> > > collapsed into 'enum ib_mtu' and then all of the opa conversion
> > > functions be eliminated in favor of the ib ones? It looks like
> > > OPA_MTU_8192 and OPA_MTU_10240 are used in a few places within
> > > drivers/infiniband/hw/hfi1, should all of those instances be converted
> > > over to IB_MTU_* and the defines at the top of
> > > drivers/infiniband/hw/hfi1/hfi.h be eliminated?
> 
> My opinion is yes.
> 
> > We rather keep separation due to logic separation.
> 
> To be fair, "you" rather. Not we. I'd like some others to weigh in here.
> Increasing the available MTUs an an enum just makes sense. Why does it
> matter if IB doesn't need them right now. Maybe someday.
> 
> > While ib_* defines come from IBTA and interoperable across different
> > devices and vendors, opa_* definitions are Intel proprietary ones used
> > for the product that was canceled.
> 
> But does it hurt to have more potentially available? Can you please explain
> the technical reason here?

The problem is that the IBTA _may_ define those enums to mean something
different in the future.  Hopefully, the Intel representatives within the IBTA
would try to make them compatible but I don't know that 10K 'fits' with the
IBTA's future.  8K seems like a reasonable extension for the IBTA but again we
as Linux developers can't say that will happen for sure.  We just don't control
what the IBTA does in that regard.

Ira

> 
> -Denny
>
Dennis Dalessandro June 16, 2020, 7:27 p.m. UTC | #10
On 6/16/2020 3:21 PM, Ira Weiny wrote:
> On Tue, Jun 16, 2020 at 03:14:51PM -0400, Dennis Dalessandro wrote:
>> On 6/16/2020 2:25 AM, Leon Romanovsky wrote:
>>> On Mon, Jun 15, 2020 at 05:56:50PM -0700, Nathan Chancellor wrote:
>>>> On Mon, Jun 01, 2020 at 10:57:22AM -0300, Jason Gunthorpe wrote:
>>>>> On Mon, Jun 01, 2020 at 09:48:47AM -0400, Dennis Dalessandro wrote:
>>>>>
>>>>>> They should probably all be in "enum ib_mtu". Jason any issues with us donig
>>>>>> that? I can't for certain recall the real reason they were kept separate in
>>>>>> the first place.
>>>>>
>>>>> It is probably OK
>>>>>
>>>>> Jason
>>>>
>>>> I don't mind taking a wack at this if you guys are too busy (I'm rather
>>>> tired of seeing the warning across all of my builds). However, I am
>>>> wondering how far should this be unwound? Should 'enum opa_mtu' be
>>>> collapsed into 'enum ib_mtu' and then all of the opa conversion
>>>> functions be eliminated in favor of the ib ones? It looks like
>>>> OPA_MTU_8192 and OPA_MTU_10240 are used in a few places within
>>>> drivers/infiniband/hw/hfi1, should all of those instances be converted
>>>> over to IB_MTU_* and the defines at the top of
>>>> drivers/infiniband/hw/hfi1/hfi.h be eliminated?
>>
>> My opinion is yes.
>>
>>> We rather keep separation due to logic separation.
>>
>> To be fair, "you" rather. Not we. I'd like some others to weigh in here.
>> Increasing the available MTUs an an enum just makes sense. Why does it
>> matter if IB doesn't need them right now. Maybe someday.
>>
>>> While ib_* defines come from IBTA and interoperable across different
>>> devices and vendors, opa_* definitions are Intel proprietary ones used
>>> for the product that was canceled.
>>
>> But does it hurt to have more potentially available? Can you please explain
>> the technical reason here?
> 
> The problem is that the IBTA _may_ define those enums to mean something
> different in the future.  Hopefully, the Intel representatives within the IBTA
> would try to make them compatible but I don't know that 10K 'fits' with the
> IBTA's future.  8K seems like a reasonable extension for the IBTA but again we
> as Linux developers can't say that will happen for sure.  We just don't control
> what the IBTA does in that regard.
> 

I guess I buy that. However I believe I have seen it claimed in the past 
that we aren't the IBTA, we are the Linux kernel and can do what we 
think makes sense. But to be honest I don't feel strongly, and I'm not 
gonna argue strongly one way or the other.

-Denny
Leon Romanovsky June 17, 2020, 4:35 a.m. UTC | #11
On Tue, Jun 16, 2020 at 03:27:34PM -0400, Dennis Dalessandro wrote:
> On 6/16/2020 3:21 PM, Ira Weiny wrote:
> > On Tue, Jun 16, 2020 at 03:14:51PM -0400, Dennis Dalessandro wrote:
> > > On 6/16/2020 2:25 AM, Leon Romanovsky wrote:
> > > > On Mon, Jun 15, 2020 at 05:56:50PM -0700, Nathan Chancellor wrote:
> > > > > On Mon, Jun 01, 2020 at 10:57:22AM -0300, Jason Gunthorpe wrote:
> > > > > > On Mon, Jun 01, 2020 at 09:48:47AM -0400, Dennis Dalessandro wrote:
> > > > > >
> > > > > > > They should probably all be in "enum ib_mtu". Jason any issues with us donig
> > > > > > > that? I can't for certain recall the real reason they were kept separate in
> > > > > > > the first place.
> > > > > >
> > > > > > It is probably OK
> > > > > >
> > > > > > Jason
> > > > >
> > > > > I don't mind taking a wack at this if you guys are too busy (I'm rather
> > > > > tired of seeing the warning across all of my builds). However, I am
> > > > > wondering how far should this be unwound? Should 'enum opa_mtu' be
> > > > > collapsed into 'enum ib_mtu' and then all of the opa conversion
> > > > > functions be eliminated in favor of the ib ones? It looks like
> > > > > OPA_MTU_8192 and OPA_MTU_10240 are used in a few places within
> > > > > drivers/infiniband/hw/hfi1, should all of those instances be converted
> > > > > over to IB_MTU_* and the defines at the top of
> > > > > drivers/infiniband/hw/hfi1/hfi.h be eliminated?
> > >
> > > My opinion is yes.
> > >
> > > > We rather keep separation due to logic separation.
> > >
> > > To be fair, "you" rather. Not we. I'd like some others to weigh in here.
> > > Increasing the available MTUs an an enum just makes sense. Why does it
> > > matter if IB doesn't need them right now. Maybe someday.
> > >
> > > > While ib_* defines come from IBTA and interoperable across different
> > > > devices and vendors, opa_* definitions are Intel proprietary ones used
> > > > for the product that was canceled.
> > >
> > > But does it hurt to have more potentially available? Can you please explain
> > > the technical reason here?
> >
> > The problem is that the IBTA _may_ define those enums to mean something
> > different in the future.  Hopefully, the Intel representatives within the IBTA
> > would try to make them compatible but I don't know that 10K 'fits' with the
> > IBTA's future.  8K seems like a reasonable extension for the IBTA but again we
> > as Linux developers can't say that will happen for sure.  We just don't control
> > what the IBTA does in that regard.
> >
>
> I guess I buy that. However I believe I have seen it claimed in the past
> that we aren't the IBTA, we are the Linux kernel and can do what we think
> makes sense. But to be honest I don't feel strongly, and I'm not gonna argue
> strongly one way or the other.

Thanks Ira for the explanation.

Regarding "we are the Linux kernel and can do what we think makes sense"
sentence, it is correct for SW interfaces and implementation only.
Everything that touches already defined HW interfaces and wire protocol
should follow the spec or should be separated.

Thanks

>
> -Denny
>
diff mbox series

Patch

diff --git a/drivers/infiniband/hw/hfi1/qp.c b/drivers/infiniband/hw/hfi1/qp.c
index f8e733a..0c2ae9f 100644
--- a/drivers/infiniband/hw/hfi1/qp.c
+++ b/drivers/infiniband/hw/hfi1/qp.c
@@ -1,5 +1,5 @@ 
 /*
- * Copyright(c) 2015 - 2019 Intel Corporation.
+ * Copyright(c) 2015 - 2020 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
@@ -186,15 +186,6 @@  static void flush_iowait(struct rvt_qp *qp)
 	write_sequnlock_irqrestore(lock, flags);
 }
 
-static inline int opa_mtu_enum_to_int(int mtu)
-{
-	switch (mtu) {
-	case OPA_MTU_8192:  return 8192;
-	case OPA_MTU_10240: return 10240;
-	default:            return -1;
-	}
-}
-
 /**
  * This function is what we would push to the core layer if we wanted to be a
  * "first class citizen".  Instead we hide this here and rely on Verbs ULPs
@@ -202,15 +193,10 @@  static inline int opa_mtu_enum_to_int(int mtu)
  */
 static inline int verbs_mtu_enum_to_int(struct ib_device *dev, enum ib_mtu mtu)
 {
-	int val;
-
 	/* Constraining 10KB packets to 8KB packets */
 	if (mtu == (enum ib_mtu)OPA_MTU_10240)
 		mtu = OPA_MTU_8192;
-	val = opa_mtu_enum_to_int((int)mtu);
-	if (val > 0)
-		return val;
-	return ib_mtu_enum_to_int(mtu);
+	return opa_mtu_enum_to_int((enum opa_mtu)mtu);
 }
 
 int hfi1_check_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr,
diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c
index c61b291..19d5d00 100644
--- a/drivers/infiniband/hw/hfi1/verbs.c
+++ b/drivers/infiniband/hw/hfi1/verbs.c
@@ -1439,6 +1439,8 @@  static int query_port(struct rvt_dev_info *rdi, u8 port_num,
 				      4096 : hfi1_max_mtu), IB_MTU_4096);
 	props->active_mtu = !valid_ib_mtu(ppd->ibmtu) ? props->max_mtu :
 		mtu_to_enum(ppd->ibmtu, IB_MTU_4096);
+	props->phys_mtu = HFI1_CAP_IS_KSET(AIP) ? hfi1_max_mtu :
+				ib_mtu_enum_to_int(props->max_mtu);
 
 	return 0;
 }
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index d4c6a97..22216f1 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -1855,7 +1855,7 @@  static int ipoib_parent_init(struct net_device *ndev)
 			priv->port);
 		return result;
 	}
-	priv->max_ib_mtu = ib_mtu_enum_to_int(attr.max_mtu);
+	priv->max_ib_mtu = rdma_mtu_from_attr(priv->ca, priv->port, &attr);
 
 	result = ib_query_pkey(priv->ca, priv->port, 0, &priv->pkey);
 	if (result) {
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
index b9e9562..7166ee9b 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
@@ -218,6 +218,7 @@  static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast,
 	struct rdma_ah_attr av;
 	int ret;
 	int set_qkey = 0;
+	int mtu;
 
 	mcast->mcmember = *mcmember;
 
@@ -240,13 +241,11 @@  static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast,
 		priv->broadcast->mcmember.flow_label = mcmember->flow_label;
 		priv->broadcast->mcmember.hop_limit = mcmember->hop_limit;
 		/* assume if the admin and the mcast are the same both can be changed */
+		mtu = rdma_mtu_enum_to_int(priv->ca,  priv->port,
+					   priv->broadcast->mcmember.mtu);
 		if (priv->mcast_mtu == priv->admin_mtu)
-			priv->admin_mtu =
-			priv->mcast_mtu =
-			IPOIB_UD_MTU(ib_mtu_enum_to_int(priv->broadcast->mcmember.mtu));
-		else
-			priv->mcast_mtu =
-			IPOIB_UD_MTU(ib_mtu_enum_to_int(priv->broadcast->mcmember.mtu));
+			priv->admin_mtu = IPOIB_UD_MTU(mtu);
+		priv->mcast_mtu = IPOIB_UD_MTU(mtu);
 
 		priv->qkey = be32_to_cpu(priv->broadcast->mcmember.qkey);
 		spin_unlock_irq(&priv->lock);
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 4b23ee5..792fb72 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -462,6 +462,11 @@  enum ib_mtu {
 	IB_MTU_4096 = 5
 };
 
+enum opa_mtu {
+	OPA_MTU_8192 = 6,
+	OPA_MTU_10240 = 7
+};
+
 static inline int ib_mtu_enum_to_int(enum ib_mtu mtu)
 {
 	switch (mtu) {
@@ -488,6 +493,28 @@  static inline enum ib_mtu ib_mtu_int_to_enum(int mtu)
 		return IB_MTU_256;
 }
 
+static inline int opa_mtu_enum_to_int(enum opa_mtu mtu)
+{
+	switch (mtu) {
+	case OPA_MTU_8192:
+		return 8192;
+	case OPA_MTU_10240:
+		return 10240;
+	default:
+		return(ib_mtu_enum_to_int((enum ib_mtu)mtu));
+	}
+}
+
+static inline enum opa_mtu opa_mtu_int_to_enum(int mtu)
+{
+	if (mtu >= 10240)
+		return OPA_MTU_10240;
+	else if (mtu >= 8192)
+		return OPA_MTU_8192;
+	else
+		return ((enum opa_mtu)ib_mtu_int_to_enum(mtu));
+}
+
 enum ib_port_state {
 	IB_PORT_NOP		= 0,
 	IB_PORT_DOWN		= 1,
@@ -651,6 +678,7 @@  struct ib_port_attr {
 	enum ib_port_state	state;
 	enum ib_mtu		max_mtu;
 	enum ib_mtu		active_mtu;
+	u32                     phys_mtu;
 	int			gid_tbl_len;
 	unsigned int		ip_gids:1;
 	/* This is the value from PortInfo CapabilityMask, defined by IBA */
@@ -3364,6 +3392,55 @@  static inline unsigned int rdma_find_pg_bit(unsigned long addr,
 	return __fls(pgsz);
 }
 
+/**
+ * rdma_core_cap_opa_port - Return whether the RDMA Port is OPA or not.
+ * @device: Device
+ * @port_num: 1 based Port number
+ *
+ * Return true if port is an Intel OPA port , false if not
+ */
+static inline bool rdma_core_cap_opa_port(struct ib_device *device,
+					  u32 port_num)
+{
+	return (device->port_data[port_num].immutable.core_cap_flags &
+		RDMA_CORE_PORT_INTEL_OPA) == RDMA_CORE_PORT_INTEL_OPA;
+}
+
+/**
+ * rdma_mtu_enum_to_int - Return the mtu of the port as an integer value.
+ * @device: Device
+ * @port_num: Port number
+ * @mtu: enum value of MTU
+ *
+ * Return the MTU size supported by the port as an integer value. Will return
+ * -1 if enum value of mtu is not supported.
+ */
+static inline int rdma_mtu_enum_to_int(struct ib_device *device, u8 port,
+				       int mtu)
+{
+	if (rdma_core_cap_opa_port(device, port))
+		return opa_mtu_enum_to_int((enum opa_mtu)mtu);
+	else
+		return ib_mtu_enum_to_int((enum ib_mtu)mtu);
+}
+
+/**
+ * rdma_mtu_from_attr - Return the mtu of the port from the port attribute.
+ * @device: Device
+ * @port_num: Port number
+ * @attr: port attribute
+ *
+ * Return the MTU size supported by the port as an integer value.
+ */
+static inline int rdma_mtu_from_attr(struct ib_device *device, u8 port,
+				     struct ib_port_attr *attr)
+{
+	if (rdma_core_cap_opa_port(device, port))
+		return attr->phys_mtu;
+	else
+		return ib_mtu_enum_to_int(attr->max_mtu);
+}
+
 int ib_set_vf_link_state(struct ib_device *device, int vf, u8 port,
 			 int state);
 int ib_get_vf_config(struct ib_device *device, int vf, u8 port,
diff --git a/include/rdma/opa_port_info.h b/include/rdma/opa_port_info.h
index bdbfe25..0d9e6d7 100644
--- a/include/rdma/opa_port_info.h
+++ b/include/rdma/opa_port_info.h
@@ -1,5 +1,5 @@ 
 /*
- * Copyright (c) 2014-2017 Intel Corporation.  All rights reserved.
+ * Copyright (c) 2014-2020 Intel Corporation.  All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
@@ -139,14 +139,6 @@ 
 #define OPA_CAP_MASK3_IsVLMarkerSupported         (1 << 1)
 #define OPA_CAP_MASK3_IsVLrSupported              (1 << 0)
 
-/**
- * new MTU values
- */
-enum {
-	OPA_MTU_8192  = 6,
-	OPA_MTU_10240 = 7,
-};
-
 enum {
 	OPA_PORT_PHYS_CONF_DISCONNECTED = 0,
 	OPA_PORT_PHYS_CONF_STANDARD     = 1,