diff mbox series

[v2,for-next] RDMA/core: Get IB width and speed from netdev

Message ID 20230603063833.541682-1-huangjunxian6@hisilicon.com (mailing list archive)
State Superseded
Headers show
Series [v2,for-next] RDMA/core: Get IB width and speed from netdev | expand

Commit Message

Junxian Huang June 3, 2023, 6:38 a.m. UTC
From: Haoyue Xu <xuhaoyue1@hisilicon.com>

Previously, there was no way to query the number of lanes for a network
card, so the same netdev_speed would result in a fixed pair of width and
speed. As network card specifications become more diverse, such fixed
mode is no longer suitable, so a method is needed to obtain the correct
width and speed based on the number of lanes.

This patch retrieves netdev lanes and speed from net_device and
translates them to IB width and speed. Also, add a generic function
to translating netdev speed to IB speed.

Signed-off-by: Haoyue Xu <xuhaoyue1@hisilicon.com>
Signed-off-by: Luoyouming <luoyouming@huawei.com>
Signed-off-by: Junxian Huang <huangjunxian6@hisilicon.com>
---
 drivers/infiniband/core/verbs.c | 19 +++++++++++++++++--
 include/rdma/ib_verbs.h         | 26 ++++++++++++++++++++++++++
 2 files changed, 43 insertions(+), 2 deletions(-)

Comments

Leon Romanovsky June 11, 2023, 5:46 p.m. UTC | #1
On Sat, Jun 03, 2023 at 02:38:33PM +0800, Junxian Huang wrote:
> From: Haoyue Xu <xuhaoyue1@hisilicon.com>
> 
> Previously, there was no way to query the number of lanes for a network
> card, so the same netdev_speed would result in a fixed pair of width and
> speed. As network card specifications become more diverse, such fixed
> mode is no longer suitable, so a method is needed to obtain the correct
> width and speed based on the number of lanes.

I'm sorry but I didn't understand the problem statement. Can you please
provide an example of configuration that will give different results 
before this patch and after?

> 
> This patch retrieves netdev lanes and speed from net_device and
> translates them to IB width and speed. Also, add a generic function
> to translating netdev speed to IB speed.
> 
> Signed-off-by: Haoyue Xu <xuhaoyue1@hisilicon.com>
> Signed-off-by: Luoyouming <luoyouming@huawei.com>
> Signed-off-by: Junxian Huang <huangjunxian6@hisilicon.com>
> ---
>  drivers/infiniband/core/verbs.c | 19 +++++++++++++++++--
>  include/rdma/ib_verbs.h         | 26 ++++++++++++++++++++++++++
>  2 files changed, 43 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
> index b99b3cc283b6..35f1b670600a 100644
> --- a/drivers/infiniband/core/verbs.c
> +++ b/drivers/infiniband/core/verbs.c
> @@ -1880,11 +1880,19 @@ int ib_modify_qp_with_udata(struct ib_qp *ib_qp, struct ib_qp_attr *attr,
>  }
>  EXPORT_SYMBOL(ib_modify_qp_with_udata);
>  
> +static void ib_get_width_and_speed(u32 netdev_speed, u32 lanes,
> +				   u16 *speed, u8 *width)
> +{
> +	*width = ib_int_to_ib_width(lanes);
> +	*speed = ib_eth_to_ib_speed(netdev_speed / lanes);
> +}
> +
>  int ib_get_eth_speed(struct ib_device *dev, u32 port_num, u16 *speed, u8 *width)
>  {
>  	int rc;
>  	u32 netdev_speed;
>  	struct net_device *netdev;
> +	bool cap_link_lanes_supported;
>  	struct ethtool_link_ksettings lksettings;
>  
>  	if (rdma_port_get_link_layer(dev, port_num) != IB_LINK_LAYER_ETHERNET)
> @@ -1896,16 +1904,23 @@ int ib_get_eth_speed(struct ib_device *dev, u32 port_num, u16 *speed, u8 *width)
>  
>  	rtnl_lock();
>  	rc = __ethtool_get_link_ksettings(netdev, &lksettings);
> +	cap_link_lanes_supported = netdev->ethtool_ops->cap_link_lanes_supported;
>  	rtnl_unlock();
>  
>  	dev_put(netdev);
>  
>  	if (!rc && lksettings.base.speed != (u32)SPEED_UNKNOWN) {
>  		netdev_speed = lksettings.base.speed;
> +		if (cap_link_lanes_supported && lksettings.lanes) {

According to the documentation cap_link_lanes_supported defines if
number of lanes can be supplied by user and I would expect from
__ethtool_get_link_ksettings() to get right numbers after it was
changed.

Thanks

> +			ib_get_width_and_speed(netdev_speed, lksettings.lanes,
> +					       speed, width);
> +			return 0;
> +		}
>  	} else {
>  		netdev_speed = SPEED_1000;
> -		pr_warn("%s speed is unknown, defaulting to %u\n", netdev->name,
> -			netdev_speed);
> +		if (rc)
> +			pr_warn("%s speed is unknown, defaulting to %u\n",
> +				netdev->name, netdev_speed);
>  	}
>  
>  	if (netdev_speed <= SPEED_1000) {
> diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
> index 1e7774ac808f..7dc926ec7fee 100644
> --- a/include/rdma/ib_verbs.h
> +++ b/include/rdma/ib_verbs.h
> @@ -552,6 +552,18 @@ static inline int ib_width_enum_to_int(enum ib_port_width width)
>  	}
>  }
>  
> +static inline int ib_int_to_ib_width(u32 lanes)
> +{
> +	switch (lanes) {
> +	case 1: return IB_WIDTH_1X;
> +	case 2: return IB_WIDTH_2X;
> +	case 4: return IB_WIDTH_4X;
> +	case 8: return IB_WIDTH_8X;
> +	case 12: return IB_WIDTH_12X;
> +	default: return IB_WIDTH_1X;
> +	}
> +}
> +
>  enum ib_port_speed {
>  	IB_SPEED_SDR	= 1,
>  	IB_SPEED_DDR	= 2,
> @@ -563,6 +575,20 @@ enum ib_port_speed {
>  	IB_SPEED_NDR	= 128,
>  };
>  
> +static inline int ib_eth_to_ib_speed(u32 speed)
> +{
> +	switch (speed) {
> +	case SPEED_2500: return IB_SPEED_SDR;
> +	case SPEED_5000: return IB_SPEED_DDR;
> +	case SPEED_10000: return IB_SPEED_FDR10;
> +	case SPEED_14000: return IB_SPEED_FDR;
> +	case SPEED_25000: return IB_SPEED_EDR;
> +	case SPEED_50000: return IB_SPEED_HDR;
> +	case SPEED_100000: return IB_SPEED_NDR;
> +	default: return IB_SPEED_SDR;
> +	}
> +}
> +
>  enum ib_stat_flag {
>  	IB_STAT_FLAG_OPTIONAL = 1 << 0,
>  };
> -- 
> 2.30.0
>
Junxian Huang June 19, 2023, 6:20 a.m. UTC | #2
On 2023/6/12 1:46, Leon Romanovsky wrote:
> On Sat, Jun 03, 2023 at 02:38:33PM +0800, Junxian Huang wrote:
>> From: Haoyue Xu <xuhaoyue1@hisilicon.com>
>>
>> Previously, there was no way to query the number of lanes for a network
>> card, so the same netdev_speed would result in a fixed pair of width and
>> speed. As network card specifications become more diverse, such fixed
>> mode is no longer suitable, so a method is needed to obtain the correct
>> width and speed based on the number of lanes.
> 
> I'm sorry but I didn't understand the problem statement. Can you please
> provide an example of configuration that will give different results 
> before this patch and after?
> 

I'll give examples with 20G and 200G netdevs respectively.

20G:
Before this patch, regardless of the actual number of lanes, the width and
speed displayed in ibv_devinfo would be always fixed:
	active_width: 4X
	active_speed: 5 Gbps
After this patch, there will be different combinations of width and speed
according to the number of lanes. For example, for a 20G netdev whose number
of lanes is 2, the width and speed displayed in ibv_devinfo will be:
	active_width: 2X
	active_speed: 10 Gbps

200G:
Before this patch, netdevs with netdev_speed more than 40G cannot get a right
width and speed in ibv_devinfo. Only the default result would be displayed:
	active_width: 4X
	active_speed: 25 Gbps
After this patch, taking an example with 4 lanes, the displayed results will be:
	active_width: 4X
	active_speed: 50 Gbps

>>
>> This patch retrieves netdev lanes and speed from net_device and
>> translates them to IB width and speed. Also, add a generic function
>> to translating netdev speed to IB speed.
>>
>> Signed-off-by: Haoyue Xu <xuhaoyue1@hisilicon.com>
>> Signed-off-by: Luoyouming <luoyouming@huawei.com>
>> Signed-off-by: Junxian Huang <huangjunxian6@hisilicon.com>
>> ---
>>  drivers/infiniband/core/verbs.c | 19 +++++++++++++++++--
>>  include/rdma/ib_verbs.h         | 26 ++++++++++++++++++++++++++
>>  2 files changed, 43 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
>> index b99b3cc283b6..35f1b670600a 100644
>> --- a/drivers/infiniband/core/verbs.c
>> +++ b/drivers/infiniband/core/verbs.c
>> @@ -1880,11 +1880,19 @@ int ib_modify_qp_with_udata(struct ib_qp *ib_qp, struct ib_qp_attr *attr,
>>  }
>>  EXPORT_SYMBOL(ib_modify_qp_with_udata);
>>  
>> +static void ib_get_width_and_speed(u32 netdev_speed, u32 lanes,
>> +				   u16 *speed, u8 *width)
>> +{
>> +	*width = ib_int_to_ib_width(lanes);
>> +	*speed = ib_eth_to_ib_speed(netdev_speed / lanes);
>> +}
>> +
>>  int ib_get_eth_speed(struct ib_device *dev, u32 port_num, u16 *speed, u8 *width)
>>  {
>>  	int rc;
>>  	u32 netdev_speed;
>>  	struct net_device *netdev;
>> +	bool cap_link_lanes_supported;
>>  	struct ethtool_link_ksettings lksettings;
>>  
>>  	if (rdma_port_get_link_layer(dev, port_num) != IB_LINK_LAYER_ETHERNET)
>> @@ -1896,16 +1904,23 @@ int ib_get_eth_speed(struct ib_device *dev, u32 port_num, u16 *speed, u8 *width)
>>  
>>  	rtnl_lock();
>>  	rc = __ethtool_get_link_ksettings(netdev, &lksettings);
>> +	cap_link_lanes_supported = netdev->ethtool_ops->cap_link_lanes_supported;
>>  	rtnl_unlock();
>>  
>>  	dev_put(netdev);
>>  
>>  	if (!rc && lksettings.base.speed != (u32)SPEED_UNKNOWN) {
>>  		netdev_speed = lksettings.base.speed;
>> +		if (cap_link_lanes_supported && lksettings.lanes) {
> 
> According to the documentation cap_link_lanes_supported defines if
> number of lanes can be supplied by user and I would expect from
> __ethtool_get_link_ksettings() to get right numbers after it was
> changed.
> 
> Thanks
> 

I'm sorry but I didn't quite understand. Do you mean the critical section of
rtnl_lock() here should be expanded to make sure getting the right number of
lanes?

Junxian

>> +			ib_get_width_and_speed(netdev_speed, lksettings.lanes,
>> +					       speed, width);
>> +			return 0;
>> +		}
>>  	} else {
>>  		netdev_speed = SPEED_1000;
>> -		pr_warn("%s speed is unknown, defaulting to %u\n", netdev->name,
>> -			netdev_speed);
>> +		if (rc)
>> +			pr_warn("%s speed is unknown, defaulting to %u\n",
>> +				netdev->name, netdev_speed);
>>  	}
>>  
>>  	if (netdev_speed <= SPEED_1000) {
>> diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
>> index 1e7774ac808f..7dc926ec7fee 100644
>> --- a/include/rdma/ib_verbs.h
>> +++ b/include/rdma/ib_verbs.h
>> @@ -552,6 +552,18 @@ static inline int ib_width_enum_to_int(enum ib_port_width width)
>>  	}
>>  }
>>  
>> +static inline int ib_int_to_ib_width(u32 lanes)
>> +{
>> +	switch (lanes) {
>> +	case 1: return IB_WIDTH_1X;
>> +	case 2: return IB_WIDTH_2X;
>> +	case 4: return IB_WIDTH_4X;
>> +	case 8: return IB_WIDTH_8X;
>> +	case 12: return IB_WIDTH_12X;
>> +	default: return IB_WIDTH_1X;
>> +	}
>> +}
>> +
>>  enum ib_port_speed {
>>  	IB_SPEED_SDR	= 1,
>>  	IB_SPEED_DDR	= 2,
>> @@ -563,6 +575,20 @@ enum ib_port_speed {
>>  	IB_SPEED_NDR	= 128,
>>  };
>>  
>> +static inline int ib_eth_to_ib_speed(u32 speed)
>> +{
>> +	switch (speed) {
>> +	case SPEED_2500: return IB_SPEED_SDR;
>> +	case SPEED_5000: return IB_SPEED_DDR;
>> +	case SPEED_10000: return IB_SPEED_FDR10;
>> +	case SPEED_14000: return IB_SPEED_FDR;
>> +	case SPEED_25000: return IB_SPEED_EDR;
>> +	case SPEED_50000: return IB_SPEED_HDR;
>> +	case SPEED_100000: return IB_SPEED_NDR;
>> +	default: return IB_SPEED_SDR;
>> +	}
>> +}
>> +
>>  enum ib_stat_flag {
>>  	IB_STAT_FLAG_OPTIONAL = 1 << 0,
>>  };
>> -- 
>> 2.30.0
>>
Mark Zhang June 19, 2023, 9:15 a.m. UTC | #3
On 6/19/2023 2:20 PM, Junxian Huang wrote:
> 
> 
> On 2023/6/12 1:46, Leon Romanovsky wrote:
>> On Sat, Jun 03, 2023 at 02:38:33PM +0800, Junxian Huang wrote:
>>> From: Haoyue Xu <xuhaoyue1@hisilicon.com>
>>>
>>> Previously, there was no way to query the number of lanes for a network
>>> card, so the same netdev_speed would result in a fixed pair of width and
>>> speed. As network card specifications become more diverse, such fixed
>>> mode is no longer suitable, so a method is needed to obtain the correct
>>> width and speed based on the number of lanes.
>>
>> I'm sorry but I didn't understand the problem statement. Can you please
>> provide an example of configuration that will give different results
>> before this patch and after?
>>
> 
> I'll give examples with 20G and 200G netdevs respectively.
> 
> 20G:
> Before this patch, regardless of the actual number of lanes, the width and
> speed displayed in ibv_devinfo would be always fixed:
> 	active_width: 4X
> 	active_speed: 5 Gbps
> After this patch, there will be different combinations of width and speed
> according to the number of lanes. For example, for a 20G netdev whose number
> of lanes is 2, the width and speed displayed in ibv_devinfo will be:
> 	active_width: 2X
> 	active_speed: 10 Gbps
> 
> 200G:
> Before this patch, netdevs with netdev_speed more than 40G cannot get a right
> width and speed in ibv_devinfo. Only the default result would be displayed:
> 	active_width: 4X
> 	active_speed: 25 Gbps
> After this patch, taking an example with 4 lanes, the displayed results will be:
> 	active_width: 4X
> 	active_speed: 50 Gbps

Can we use ib_query_port() instead of __ethtool_get_link_ksettings()?
   width = attr.active_width;
   speed = ib_speed_enum_to_int(attr.active_speed) *
	  ib_width_enum_to_int(attr.active_width);


>>>
>>> This patch retrieves netdev lanes and speed from net_device and
>>> translates them to IB width and speed. Also, add a generic function
>>> to translating netdev speed to IB speed.
>>>
>>> Signed-off-by: Haoyue Xu <xuhaoyue1@hisilicon.com>
>>> Signed-off-by: Luoyouming <luoyouming@huawei.com>
>>> Signed-off-by: Junxian Huang <huangjunxian6@hisilicon.com>
>>> ---
>>>   drivers/infiniband/core/verbs.c | 19 +++++++++++++++++--
>>>   include/rdma/ib_verbs.h         | 26 ++++++++++++++++++++++++++
>>>   2 files changed, 43 insertions(+), 2 deletions(-)
>>>
>>> diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
>>> index b99b3cc283b6..35f1b670600a 100644
>>> --- a/drivers/infiniband/core/verbs.c
>>> +++ b/drivers/infiniband/core/verbs.c
>>> @@ -1880,11 +1880,19 @@ int ib_modify_qp_with_udata(struct ib_qp *ib_qp, struct ib_qp_attr *attr,
>>>   }
>>>   EXPORT_SYMBOL(ib_modify_qp_with_udata);
>>>   
>>> +static void ib_get_width_and_speed(u32 netdev_speed, u32 lanes,
>>> +				   u16 *speed, u8 *width)
>>> +{
>>> +	*width = ib_int_to_ib_width(lanes);
>>> +	*speed = ib_eth_to_ib_speed(netdev_speed / lanes);
>>> +}
>>> +
>>>   int ib_get_eth_speed(struct ib_device *dev, u32 port_num, u16 *speed, u8 *width)
>>>   {
>>>   	int rc;
>>>   	u32 netdev_speed;
>>>   	struct net_device *netdev;
>>> +	bool cap_link_lanes_supported;
>>>   	struct ethtool_link_ksettings lksettings;
>>>   
>>>   	if (rdma_port_get_link_layer(dev, port_num) != IB_LINK_LAYER_ETHERNET)
>>> @@ -1896,16 +1904,23 @@ int ib_get_eth_speed(struct ib_device *dev, u32 port_num, u16 *speed, u8 *width)
>>>   
>>>   	rtnl_lock();
>>>   	rc = __ethtool_get_link_ksettings(netdev, &lksettings);
>>> +	cap_link_lanes_supported = netdev->ethtool_ops->cap_link_lanes_supported;
>>>   	rtnl_unlock();
>>>   
>>>   	dev_put(netdev);
>>>   
>>>   	if (!rc && lksettings.base.speed != (u32)SPEED_UNKNOWN) {
>>>   		netdev_speed = lksettings.base.speed;
>>> +		if (cap_link_lanes_supported && lksettings.lanes) {
>>
>> According to the documentation cap_link_lanes_supported defines if
>> number of lanes can be supplied by user and I would expect from
>> __ethtool_get_link_ksettings() to get right numbers after it was
>> changed.
>>
>> Thanks
>>
> 
> I'm sorry but I didn't quite understand. Do you mean the critical section of
> rtnl_lock() here should be expanded to make sure getting the right number of
> lanes?
> 
> Junxian
> 
>>> +			ib_get_width_and_speed(netdev_speed, lksettings.lanes,
>>> +					       speed, width);
>>> +			return 0;
>>> +		}
>>>   	} else {
>>>   		netdev_speed = SPEED_1000;
>>> -		pr_warn("%s speed is unknown, defaulting to %u\n", netdev->name,
>>> -			netdev_speed);
>>> +		if (rc)
>>> +			pr_warn("%s speed is unknown, defaulting to %u\n",
>>> +				netdev->name, netdev_speed);
>>>   	}
>>>   
>>>   	if (netdev_speed <= SPEED_1000) {
>>> diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
>>> index 1e7774ac808f..7dc926ec7fee 100644
>>> --- a/include/rdma/ib_verbs.h
>>> +++ b/include/rdma/ib_verbs.h
>>> @@ -552,6 +552,18 @@ static inline int ib_width_enum_to_int(enum ib_port_width width)
>>>   	}
>>>   }
>>>   
>>> +static inline int ib_int_to_ib_width(u32 lanes)
>>> +{
>>> +	switch (lanes) {
>>> +	case 1: return IB_WIDTH_1X;
>>> +	case 2: return IB_WIDTH_2X;
>>> +	case 4: return IB_WIDTH_4X;
>>> +	case 8: return IB_WIDTH_8X;
>>> +	case 12: return IB_WIDTH_12X;
>>> +	default: return IB_WIDTH_1X;
>>> +	}
>>> +}
>>> +
>>>   enum ib_port_speed {
>>>   	IB_SPEED_SDR	= 1,
>>>   	IB_SPEED_DDR	= 2,
>>> @@ -563,6 +575,20 @@ enum ib_port_speed {
>>>   	IB_SPEED_NDR	= 128,
>>>   };
>>>   
>>> +static inline int ib_eth_to_ib_speed(u32 speed)
>>> +{
>>> +	switch (speed) {
>>> +	case SPEED_2500: return IB_SPEED_SDR;
>>> +	case SPEED_5000: return IB_SPEED_DDR;
>>> +	case SPEED_10000: return IB_SPEED_FDR10;
>>> +	case SPEED_14000: return IB_SPEED_FDR;
>>> +	case SPEED_25000: return IB_SPEED_EDR;
>>> +	case SPEED_50000: return IB_SPEED_HDR;
>>> +	case SPEED_100000: return IB_SPEED_NDR;
>>> +	default: return IB_SPEED_SDR;
>>> +	}
>>> +}
>>> +
>>>   enum ib_stat_flag {
>>>   	IB_STAT_FLAG_OPTIONAL = 1 << 0,
>>>   };
>>> -- 
>>> 2.30.0
>>>
Junxian Huang June 19, 2023, 9:44 a.m. UTC | #4
On 2023/6/19 17:15, Mark Zhang wrote:
> On 6/19/2023 2:20 PM, Junxian Huang wrote:
>>
>>
>> On 2023/6/12 1:46, Leon Romanovsky wrote:
>>> On Sat, Jun 03, 2023 at 02:38:33PM +0800, Junxian Huang wrote:
>>>> From: Haoyue Xu <xuhaoyue1@hisilicon.com>
>>>>
>>>> Previously, there was no way to query the number of lanes for a network
>>>> card, so the same netdev_speed would result in a fixed pair of width and
>>>> speed. As network card specifications become more diverse, such fixed
>>>> mode is no longer suitable, so a method is needed to obtain the correct
>>>> width and speed based on the number of lanes.
>>>
>>> I'm sorry but I didn't understand the problem statement. Can you please
>>> provide an example of configuration that will give different results
>>> before this patch and after?
>>>
>>
>> I'll give examples with 20G and 200G netdevs respectively.
>>
>> 20G:
>> Before this patch, regardless of the actual number of lanes, the width and
>> speed displayed in ibv_devinfo would be always fixed:
>>     active_width: 4X
>>     active_speed: 5 Gbps
>> After this patch, there will be different combinations of width and speed
>> according to the number of lanes. For example, for a 20G netdev whose number
>> of lanes is 2, the width and speed displayed in ibv_devinfo will be:
>>     active_width: 2X
>>     active_speed: 10 Gbps
>>
>> 200G:
>> Before this patch, netdevs with netdev_speed more than 40G cannot get a right
>> width and speed in ibv_devinfo. Only the default result would be displayed:
>>     active_width: 4X
>>     active_speed: 25 Gbps
>> After this patch, taking an example with 4 lanes, the displayed results will be:
>>     active_width: 4X
>>     active_speed: 50 Gbps
> 
> Can we use ib_query_port() instead of __ethtool_get_link_ksettings()?
>   width = attr.active_width;
>   speed = ib_speed_enum_to_int(attr.active_speed) *
>       ib_width_enum_to_int(attr.active_width);
> 
> 

I don't think so. Actually, active_width and active_speed in ib_query_port() are
ultimately derived from ib_get_eth_speed() in many vendors' driver.

Junxian

>>>>
>>>> This patch retrieves netdev lanes and speed from net_device and
>>>> translates them to IB width and speed. Also, add a generic function
>>>> to translating netdev speed to IB speed.
>>>>
>>>> Signed-off-by: Haoyue Xu <xuhaoyue1@hisilicon.com>
>>>> Signed-off-by: Luoyouming <luoyouming@huawei.com>
>>>> Signed-off-by: Junxian Huang <huangjunxian6@hisilicon.com>
>>>> ---
>>>>   drivers/infiniband/core/verbs.c | 19 +++++++++++++++++--
>>>>   include/rdma/ib_verbs.h         | 26 ++++++++++++++++++++++++++
>>>>   2 files changed, 43 insertions(+), 2 deletions(-)
>>>>
>>>> diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
>>>> index b99b3cc283b6..35f1b670600a 100644
>>>> --- a/drivers/infiniband/core/verbs.c
>>>> +++ b/drivers/infiniband/core/verbs.c
>>>> @@ -1880,11 +1880,19 @@ int ib_modify_qp_with_udata(struct ib_qp *ib_qp, struct ib_qp_attr *attr,
>>>>   }
>>>>   EXPORT_SYMBOL(ib_modify_qp_with_udata);
>>>>   +static void ib_get_width_and_speed(u32 netdev_speed, u32 lanes,
>>>> +                   u16 *speed, u8 *width)
>>>> +{
>>>> +    *width = ib_int_to_ib_width(lanes);
>>>> +    *speed = ib_eth_to_ib_speed(netdev_speed / lanes);
>>>> +}
>>>> +
>>>>   int ib_get_eth_speed(struct ib_device *dev, u32 port_num, u16 *speed, u8 *width)
>>>>   {
>>>>       int rc;
>>>>       u32 netdev_speed;
>>>>       struct net_device *netdev;
>>>> +    bool cap_link_lanes_supported;
>>>>       struct ethtool_link_ksettings lksettings;
>>>>         if (rdma_port_get_link_layer(dev, port_num) != IB_LINK_LAYER_ETHERNET)
>>>> @@ -1896,16 +1904,23 @@ int ib_get_eth_speed(struct ib_device *dev, u32 port_num, u16 *speed, u8 *width)
>>>>         rtnl_lock();
>>>>       rc = __ethtool_get_link_ksettings(netdev, &lksettings);
>>>> +    cap_link_lanes_supported = netdev->ethtool_ops->cap_link_lanes_supported;
>>>>       rtnl_unlock();
>>>>         dev_put(netdev);
>>>>         if (!rc && lksettings.base.speed != (u32)SPEED_UNKNOWN) {
>>>>           netdev_speed = lksettings.base.speed;
>>>> +        if (cap_link_lanes_supported && lksettings.lanes) {
>>>
>>> According to the documentation cap_link_lanes_supported defines if
>>> number of lanes can be supplied by user and I would expect from
>>> __ethtool_get_link_ksettings() to get right numbers after it was
>>> changed.
>>>
>>> Thanks
>>>
>>
>> I'm sorry but I didn't quite understand. Do you mean the critical section of
>> rtnl_lock() here should be expanded to make sure getting the right number of
>> lanes?
>>
>> Junxian
>>
>>>> +            ib_get_width_and_speed(netdev_speed, lksettings.lanes,
>>>> +                           speed, width);
>>>> +            return 0;
>>>> +        }
>>>>       } else {
>>>>           netdev_speed = SPEED_1000;
>>>> -        pr_warn("%s speed is unknown, defaulting to %u\n", netdev->name,
>>>> -            netdev_speed);
>>>> +        if (rc)
>>>> +            pr_warn("%s speed is unknown, defaulting to %u\n",
>>>> +                netdev->name, netdev_speed);
>>>>       }
>>>>         if (netdev_speed <= SPEED_1000) {
>>>> diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
>>>> index 1e7774ac808f..7dc926ec7fee 100644
>>>> --- a/include/rdma/ib_verbs.h
>>>> +++ b/include/rdma/ib_verbs.h
>>>> @@ -552,6 +552,18 @@ static inline int ib_width_enum_to_int(enum ib_port_width width)
>>>>       }
>>>>   }
>>>>   +static inline int ib_int_to_ib_width(u32 lanes)
>>>> +{
>>>> +    switch (lanes) {
>>>> +    case 1: return IB_WIDTH_1X;
>>>> +    case 2: return IB_WIDTH_2X;
>>>> +    case 4: return IB_WIDTH_4X;
>>>> +    case 8: return IB_WIDTH_8X;
>>>> +    case 12: return IB_WIDTH_12X;
>>>> +    default: return IB_WIDTH_1X;
>>>> +    }
>>>> +}
>>>> +
>>>>   enum ib_port_speed {
>>>>       IB_SPEED_SDR    = 1,
>>>>       IB_SPEED_DDR    = 2,
>>>> @@ -563,6 +575,20 @@ enum ib_port_speed {
>>>>       IB_SPEED_NDR    = 128,
>>>>   };
>>>>   +static inline int ib_eth_to_ib_speed(u32 speed)
>>>> +{
>>>> +    switch (speed) {
>>>> +    case SPEED_2500: return IB_SPEED_SDR;
>>>> +    case SPEED_5000: return IB_SPEED_DDR;
>>>> +    case SPEED_10000: return IB_SPEED_FDR10;
>>>> +    case SPEED_14000: return IB_SPEED_FDR;
>>>> +    case SPEED_25000: return IB_SPEED_EDR;
>>>> +    case SPEED_50000: return IB_SPEED_HDR;
>>>> +    case SPEED_100000: return IB_SPEED_NDR;
>>>> +    default: return IB_SPEED_SDR;
>>>> +    }
>>>> +}
>>>> +
>>>>   enum ib_stat_flag {
>>>>       IB_STAT_FLAG_OPTIONAL = 1 << 0,
>>>>   };
>>>> -- 
>>>> 2.30.0
>>>>
>
Leon Romanovsky June 28, 2023, 5 a.m. UTC | #5
On Mon, Jun 19, 2023 at 02:20:54PM +0800, Junxian Huang wrote:
> 
> 
> On 2023/6/12 1:46, Leon Romanovsky wrote:
> > On Sat, Jun 03, 2023 at 02:38:33PM +0800, Junxian Huang wrote:
> >> From: Haoyue Xu <xuhaoyue1@hisilicon.com>
> >>
> >> Previously, there was no way to query the number of lanes for a network
> >> card, so the same netdev_speed would result in a fixed pair of width and
> >> speed. As network card specifications become more diverse, such fixed
> >> mode is no longer suitable, so a method is needed to obtain the correct
> >> width and speed based on the number of lanes.
> > 
> > I'm sorry but I didn't understand the problem statement. Can you please
> > provide an example of configuration that will give different results 
> > before this patch and after?
> > 
> 
> I'll give examples with 20G and 200G netdevs respectively.
> 
> 20G:
> Before this patch, regardless of the actual number of lanes, the width and
> speed displayed in ibv_devinfo would be always fixed:
> 	active_width: 4X
> 	active_speed: 5 Gbps
> After this patch, there will be different combinations of width and speed
> according to the number of lanes. For example, for a 20G netdev whose number
> of lanes is 2, the width and speed displayed in ibv_devinfo will be:
> 	active_width: 2X
> 	active_speed: 10 Gbps
> 
> 200G:
> Before this patch, netdevs with netdev_speed more than 40G cannot get a right
> width and speed in ibv_devinfo. Only the default result would be displayed:
> 	active_width: 4X
> 	active_speed: 25 Gbps
> After this patch, taking an example with 4 lanes, the displayed results will be:
> 	active_width: 4X
> 	active_speed: 50 Gbps
> 

<...>

> >> +	cap_link_lanes_supported = netdev->ethtool_ops->cap_link_lanes_supported;
> >>  	rtnl_unlock();
> >>  
> >>  	dev_put(netdev);
> >>  
> >>  	if (!rc && lksettings.base.speed != (u32)SPEED_UNKNOWN) {
> >>  		netdev_speed = lksettings.base.speed;
> >> +		if (cap_link_lanes_supported && lksettings.lanes) {
> > 
> > According to the documentation cap_link_lanes_supported defines if
> > number of lanes can be supplied by user and I would expect from
> > __ethtool_get_link_ksettings() to get right numbers after it was
> > changed.

No, I'm saying that cap_link_lanes_supported is variable which only says
if number of lanes can be changed and __ethtool_get_link_ksettings()
will return right number of lanes every time it is called without need
to call to ib_get_width_and_speed() again.

Thanks

> > 
> > Thanks
> > 
> 
> I'm sorry but I didn't quite understand. Do you mean the critical section of
> rtnl_lock() here should be expanded to make sure getting the right number of
> lanes?
> 
> Junxian
> 
> >> +			ib_get_width_and_speed(netdev_speed, lksettings.lanes,
> >> +					       speed, width);
> >> +			return 0;
> >> +		}
> >>  	} else {
> >>  		netdev_speed = SPEED_1000;
> >> -		pr_warn("%s speed is unknown, defaulting to %u\n", netdev->name,
> >> -			netdev_speed);
> >> +		if (rc)
> >> +			pr_warn("%s speed is unknown, defaulting to %u\n",
> >> +				netdev->name, netdev_speed);
> >>  	}
> >>  
> >>  	if (netdev_speed <= SPEED_1000) {
> >> diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
> >> index 1e7774ac808f..7dc926ec7fee 100644
> >> --- a/include/rdma/ib_verbs.h
> >> +++ b/include/rdma/ib_verbs.h
> >> @@ -552,6 +552,18 @@ static inline int ib_width_enum_to_int(enum ib_port_width width)
> >>  	}
> >>  }
> >>  
> >> +static inline int ib_int_to_ib_width(u32 lanes)
> >> +{
> >> +	switch (lanes) {
> >> +	case 1: return IB_WIDTH_1X;
> >> +	case 2: return IB_WIDTH_2X;
> >> +	case 4: return IB_WIDTH_4X;
> >> +	case 8: return IB_WIDTH_8X;
> >> +	case 12: return IB_WIDTH_12X;
> >> +	default: return IB_WIDTH_1X;
> >> +	}
> >> +}
> >> +
> >>  enum ib_port_speed {
> >>  	IB_SPEED_SDR	= 1,
> >>  	IB_SPEED_DDR	= 2,
> >> @@ -563,6 +575,20 @@ enum ib_port_speed {
> >>  	IB_SPEED_NDR	= 128,
> >>  };
> >>  
> >> +static inline int ib_eth_to_ib_speed(u32 speed)
> >> +{
> >> +	switch (speed) {
> >> +	case SPEED_2500: return IB_SPEED_SDR;
> >> +	case SPEED_5000: return IB_SPEED_DDR;
> >> +	case SPEED_10000: return IB_SPEED_FDR10;
> >> +	case SPEED_14000: return IB_SPEED_FDR;
> >> +	case SPEED_25000: return IB_SPEED_EDR;
> >> +	case SPEED_50000: return IB_SPEED_HDR;
> >> +	case SPEED_100000: return IB_SPEED_NDR;
> >> +	default: return IB_SPEED_SDR;
> >> +	}
> >> +}
> >> +
> >>  enum ib_stat_flag {
> >>  	IB_STAT_FLAG_OPTIONAL = 1 << 0,
> >>  };
> >> -- 
> >> 2.30.0
> >>
Junxian Huang July 5, 2023, 3:05 a.m. UTC | #6
On 2023/6/28 13:00, Leon Romanovsky wrote:
> On Mon, Jun 19, 2023 at 02:20:54PM +0800, Junxian Huang wrote:
>>
>>
>> On 2023/6/12 1:46, Leon Romanovsky wrote:
>>> On Sat, Jun 03, 2023 at 02:38:33PM +0800, Junxian Huang wrote:
>>>> From: Haoyue Xu <xuhaoyue1@hisilicon.com>
>>>>
>>>> Previously, there was no way to query the number of lanes for a network
>>>> card, so the same netdev_speed would result in a fixed pair of width and
>>>> speed. As network card specifications become more diverse, such fixed
>>>> mode is no longer suitable, so a method is needed to obtain the correct
>>>> width and speed based on the number of lanes.
>>>
>>> I'm sorry but I didn't understand the problem statement. Can you please
>>> provide an example of configuration that will give different results 
>>> before this patch and after?
>>>
>>
>> I'll give examples with 20G and 200G netdevs respectively.
>>
>> 20G:
>> Before this patch, regardless of the actual number of lanes, the width and
>> speed displayed in ibv_devinfo would be always fixed:
>> 	active_width: 4X
>> 	active_speed: 5 Gbps
>> After this patch, there will be different combinations of width and speed
>> according to the number of lanes. For example, for a 20G netdev whose number
>> of lanes is 2, the width and speed displayed in ibv_devinfo will be:
>> 	active_width: 2X
>> 	active_speed: 10 Gbps
>>
>> 200G:
>> Before this patch, netdevs with netdev_speed more than 40G cannot get a right
>> width and speed in ibv_devinfo. Only the default result would be displayed:
>> 	active_width: 4X
>> 	active_speed: 25 Gbps
>> After this patch, taking an example with 4 lanes, the displayed results will be:
>> 	active_width: 4X
>> 	active_speed: 50 Gbps
>>
> 
> <...>
> 
>>>> +	cap_link_lanes_supported = netdev->ethtool_ops->cap_link_lanes_supported;
>>>>  	rtnl_unlock();
>>>>  
>>>>  	dev_put(netdev);
>>>>  
>>>>  	if (!rc && lksettings.base.speed != (u32)SPEED_UNKNOWN) {
>>>>  		netdev_speed = lksettings.base.speed;
>>>> +		if (cap_link_lanes_supported && lksettings.lanes) {
>>>
>>> According to the documentation cap_link_lanes_supported defines if
>>> number of lanes can be supplied by user and I would expect from
>>> __ethtool_get_link_ksettings() to get right numbers after it was
>>> changed.
> 
> No, I'm saying that cap_link_lanes_supported is variable which only says
> if number of lanes can be changed and __ethtool_get_link_ksettings()
> will return right number of lanes every time it is called without need
> to call to ib_get_width_and_speed() again.
> 
> Thanks
> 

These two functions have different purposes.

The number of lanes is indeed obtained from __ethtool_get_link_ksettings(),
and ib_get_width_and_speed() converts the number of lanes into ib_width and
ib_speed, which are the output of ib_get_eth_speed().

Junxian

>>>
>>> Thanks
>>>
>>
>> I'm sorry but I didn't quite understand. Do you mean the critical section of
>> rtnl_lock() here should be expanded to make sure getting the right number of
>> lanes?
>>
>> Junxian
>>
>>>> +			ib_get_width_and_speed(netdev_speed, lksettings.lanes,
>>>> +					       speed, width);
>>>> +			return 0;
>>>> +		}
>>>>  	} else {
>>>>  		netdev_speed = SPEED_1000;
>>>> -		pr_warn("%s speed is unknown, defaulting to %u\n", netdev->name,
>>>> -			netdev_speed);
>>>> +		if (rc)
>>>> +			pr_warn("%s speed is unknown, defaulting to %u\n",
>>>> +				netdev->name, netdev_speed);
>>>>  	}
>>>>  
>>>>  	if (netdev_speed <= SPEED_1000) {
>>>> diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
>>>> index 1e7774ac808f..7dc926ec7fee 100644
>>>> --- a/include/rdma/ib_verbs.h
>>>> +++ b/include/rdma/ib_verbs.h
>>>> @@ -552,6 +552,18 @@ static inline int ib_width_enum_to_int(enum ib_port_width width)
>>>>  	}
>>>>  }
>>>>  
>>>> +static inline int ib_int_to_ib_width(u32 lanes)
>>>> +{
>>>> +	switch (lanes) {
>>>> +	case 1: return IB_WIDTH_1X;
>>>> +	case 2: return IB_WIDTH_2X;
>>>> +	case 4: return IB_WIDTH_4X;
>>>> +	case 8: return IB_WIDTH_8X;
>>>> +	case 12: return IB_WIDTH_12X;
>>>> +	default: return IB_WIDTH_1X;
>>>> +	}
>>>> +}
>>>> +
>>>>  enum ib_port_speed {
>>>>  	IB_SPEED_SDR	= 1,
>>>>  	IB_SPEED_DDR	= 2,
>>>> @@ -563,6 +575,20 @@ enum ib_port_speed {
>>>>  	IB_SPEED_NDR	= 128,
>>>>  };
>>>>  
>>>> +static inline int ib_eth_to_ib_speed(u32 speed)
>>>> +{
>>>> +	switch (speed) {
>>>> +	case SPEED_2500: return IB_SPEED_SDR;
>>>> +	case SPEED_5000: return IB_SPEED_DDR;
>>>> +	case SPEED_10000: return IB_SPEED_FDR10;
>>>> +	case SPEED_14000: return IB_SPEED_FDR;
>>>> +	case SPEED_25000: return IB_SPEED_EDR;
>>>> +	case SPEED_50000: return IB_SPEED_HDR;
>>>> +	case SPEED_100000: return IB_SPEED_NDR;
>>>> +	default: return IB_SPEED_SDR;
>>>> +	}
>>>> +}
>>>> +
>>>>  enum ib_stat_flag {
>>>>  	IB_STAT_FLAG_OPTIONAL = 1 << 0,
>>>>  };
>>>> -- 
>>>> 2.30.0
>>>>
Leon Romanovsky July 5, 2023, 7:12 a.m. UTC | #7
On Wed, Jul 05, 2023 at 11:05:50AM +0800, Junxian Huang wrote:
> 
> 
> On 2023/6/28 13:00, Leon Romanovsky wrote:
> > On Mon, Jun 19, 2023 at 02:20:54PM +0800, Junxian Huang wrote:
> >>
> >>
> >> On 2023/6/12 1:46, Leon Romanovsky wrote:
> >>> On Sat, Jun 03, 2023 at 02:38:33PM +0800, Junxian Huang wrote:
> >>>> From: Haoyue Xu <xuhaoyue1@hisilicon.com>
> >>>>
> >>>> Previously, there was no way to query the number of lanes for a network
> >>>> card, so the same netdev_speed would result in a fixed pair of width and
> >>>> speed. As network card specifications become more diverse, such fixed
> >>>> mode is no longer suitable, so a method is needed to obtain the correct
> >>>> width and speed based on the number of lanes.
> >>>
> >>> I'm sorry but I didn't understand the problem statement. Can you please
> >>> provide an example of configuration that will give different results 
> >>> before this patch and after?
> >>>
> >>
> >> I'll give examples with 20G and 200G netdevs respectively.
> >>
> >> 20G:
> >> Before this patch, regardless of the actual number of lanes, the width and
> >> speed displayed in ibv_devinfo would be always fixed:
> >> 	active_width: 4X
> >> 	active_speed: 5 Gbps
> >> After this patch, there will be different combinations of width and speed
> >> according to the number of lanes. For example, for a 20G netdev whose number
> >> of lanes is 2, the width and speed displayed in ibv_devinfo will be:
> >> 	active_width: 2X
> >> 	active_speed: 10 Gbps
> >>
> >> 200G:
> >> Before this patch, netdevs with netdev_speed more than 40G cannot get a right
> >> width and speed in ibv_devinfo. Only the default result would be displayed:
> >> 	active_width: 4X
> >> 	active_speed: 25 Gbps
> >> After this patch, taking an example with 4 lanes, the displayed results will be:
> >> 	active_width: 4X
> >> 	active_speed: 50 Gbps
> >>
> > 
> > <...>
> > 
> >>>> +	cap_link_lanes_supported = netdev->ethtool_ops->cap_link_lanes_supported;
> >>>>  	rtnl_unlock();
> >>>>  
> >>>>  	dev_put(netdev);
> >>>>  
> >>>>  	if (!rc && lksettings.base.speed != (u32)SPEED_UNKNOWN) {
> >>>>  		netdev_speed = lksettings.base.speed;
> >>>> +		if (cap_link_lanes_supported && lksettings.lanes) {
> >>>
> >>> According to the documentation cap_link_lanes_supported defines if
> >>> number of lanes can be supplied by user and I would expect from
> >>> __ethtool_get_link_ksettings() to get right numbers after it was
> >>> changed.
> > 
> > No, I'm saying that cap_link_lanes_supported is variable which only says
> > if number of lanes can be changed and __ethtool_get_link_ksettings()
> > will return right number of lanes every time it is called without need
> > to call to ib_get_width_and_speed() again.
> > 
> > Thanks
> > 
> 
> These two functions have different purposes.
> 
> The number of lanes is indeed obtained from __ethtool_get_link_ksettings(),
> and ib_get_width_and_speed() converts the number of lanes into ib_width and
> ib_speed, which are the output of ib_get_eth_speed().

Great, so why do you need to rely on cap_link_lanes_supported in ib_get_width_and_speed()?

Thanks
Junxian Huang July 6, 2023, 6:39 a.m. UTC | #8
On 2023/7/5 15:12, Leon Romanovsky wrote:
> On Wed, Jul 05, 2023 at 11:05:50AM +0800, Junxian Huang wrote:
>>
>>
>> On 2023/6/28 13:00, Leon Romanovsky wrote:
>>> On Mon, Jun 19, 2023 at 02:20:54PM +0800, Junxian Huang wrote:
>>>>
>>>>
>>>> On 2023/6/12 1:46, Leon Romanovsky wrote:
>>>>> On Sat, Jun 03, 2023 at 02:38:33PM +0800, Junxian Huang wrote:
>>>>>> From: Haoyue Xu <xuhaoyue1@hisilicon.com>
>>>>>>
>>>>>> Previously, there was no way to query the number of lanes for a network
>>>>>> card, so the same netdev_speed would result in a fixed pair of width and
>>>>>> speed. As network card specifications become more diverse, such fixed
>>>>>> mode is no longer suitable, so a method is needed to obtain the correct
>>>>>> width and speed based on the number of lanes.
>>>>>
>>>>> I'm sorry but I didn't understand the problem statement. Can you please
>>>>> provide an example of configuration that will give different results 
>>>>> before this patch and after?
>>>>>
>>>>
>>>> I'll give examples with 20G and 200G netdevs respectively.
>>>>
>>>> 20G:
>>>> Before this patch, regardless of the actual number of lanes, the width and
>>>> speed displayed in ibv_devinfo would be always fixed:
>>>> 	active_width: 4X
>>>> 	active_speed: 5 Gbps
>>>> After this patch, there will be different combinations of width and speed
>>>> according to the number of lanes. For example, for a 20G netdev whose number
>>>> of lanes is 2, the width and speed displayed in ibv_devinfo will be:
>>>> 	active_width: 2X
>>>> 	active_speed: 10 Gbps
>>>>
>>>> 200G:
>>>> Before this patch, netdevs with netdev_speed more than 40G cannot get a right
>>>> width and speed in ibv_devinfo. Only the default result would be displayed:
>>>> 	active_width: 4X
>>>> 	active_speed: 25 Gbps
>>>> After this patch, taking an example with 4 lanes, the displayed results will be:
>>>> 	active_width: 4X
>>>> 	active_speed: 50 Gbps
>>>>
>>>
>>> <...>
>>>
>>>>>> +	cap_link_lanes_supported = netdev->ethtool_ops->cap_link_lanes_supported;
>>>>>>  	rtnl_unlock();
>>>>>>  
>>>>>>  	dev_put(netdev);
>>>>>>  
>>>>>>  	if (!rc && lksettings.base.speed != (u32)SPEED_UNKNOWN) {
>>>>>>  		netdev_speed = lksettings.base.speed;
>>>>>> +		if (cap_link_lanes_supported && lksettings.lanes) {
>>>>>
>>>>> According to the documentation cap_link_lanes_supported defines if
>>>>> number of lanes can be supplied by user and I would expect from
>>>>> __ethtool_get_link_ksettings() to get right numbers after it was
>>>>> changed.
>>>
>>> No, I'm saying that cap_link_lanes_supported is variable which only says
>>> if number of lanes can be changed and __ethtool_get_link_ksettings()
>>> will return right number of lanes every time it is called without need
>>> to call to ib_get_width_and_speed() again.
>>>
>>> Thanks
>>>
>>
>> These two functions have different purposes.
>>
>> The number of lanes is indeed obtained from __ethtool_get_link_ksettings(),
>> and ib_get_width_and_speed() converts the number of lanes into ib_width and
>> ib_speed, which are the output of ib_get_eth_speed().
> 
> Great, so why do you need to rely on cap_link_lanes_supported in ib_get_width_and_speed()?
> 
> Thanks

Ah, I see what you mean. If cap_link_lanes_supported is false, lksettings.lanes will become 0,
and ib_get_width_and_speed() won't be called. Therefore, cap_link_lanes_supported is redundant.

I'll fix it in v3. Thanks.

Junxian
diff mbox series

Patch

diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index b99b3cc283b6..35f1b670600a 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -1880,11 +1880,19 @@  int ib_modify_qp_with_udata(struct ib_qp *ib_qp, struct ib_qp_attr *attr,
 }
 EXPORT_SYMBOL(ib_modify_qp_with_udata);
 
+static void ib_get_width_and_speed(u32 netdev_speed, u32 lanes,
+				   u16 *speed, u8 *width)
+{
+	*width = ib_int_to_ib_width(lanes);
+	*speed = ib_eth_to_ib_speed(netdev_speed / lanes);
+}
+
 int ib_get_eth_speed(struct ib_device *dev, u32 port_num, u16 *speed, u8 *width)
 {
 	int rc;
 	u32 netdev_speed;
 	struct net_device *netdev;
+	bool cap_link_lanes_supported;
 	struct ethtool_link_ksettings lksettings;
 
 	if (rdma_port_get_link_layer(dev, port_num) != IB_LINK_LAYER_ETHERNET)
@@ -1896,16 +1904,23 @@  int ib_get_eth_speed(struct ib_device *dev, u32 port_num, u16 *speed, u8 *width)
 
 	rtnl_lock();
 	rc = __ethtool_get_link_ksettings(netdev, &lksettings);
+	cap_link_lanes_supported = netdev->ethtool_ops->cap_link_lanes_supported;
 	rtnl_unlock();
 
 	dev_put(netdev);
 
 	if (!rc && lksettings.base.speed != (u32)SPEED_UNKNOWN) {
 		netdev_speed = lksettings.base.speed;
+		if (cap_link_lanes_supported && lksettings.lanes) {
+			ib_get_width_and_speed(netdev_speed, lksettings.lanes,
+					       speed, width);
+			return 0;
+		}
 	} else {
 		netdev_speed = SPEED_1000;
-		pr_warn("%s speed is unknown, defaulting to %u\n", netdev->name,
-			netdev_speed);
+		if (rc)
+			pr_warn("%s speed is unknown, defaulting to %u\n",
+				netdev->name, netdev_speed);
 	}
 
 	if (netdev_speed <= SPEED_1000) {
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 1e7774ac808f..7dc926ec7fee 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -552,6 +552,18 @@  static inline int ib_width_enum_to_int(enum ib_port_width width)
 	}
 }
 
+static inline int ib_int_to_ib_width(u32 lanes)
+{
+	switch (lanes) {
+	case 1: return IB_WIDTH_1X;
+	case 2: return IB_WIDTH_2X;
+	case 4: return IB_WIDTH_4X;
+	case 8: return IB_WIDTH_8X;
+	case 12: return IB_WIDTH_12X;
+	default: return IB_WIDTH_1X;
+	}
+}
+
 enum ib_port_speed {
 	IB_SPEED_SDR	= 1,
 	IB_SPEED_DDR	= 2,
@@ -563,6 +575,20 @@  enum ib_port_speed {
 	IB_SPEED_NDR	= 128,
 };
 
+static inline int ib_eth_to_ib_speed(u32 speed)
+{
+	switch (speed) {
+	case SPEED_2500: return IB_SPEED_SDR;
+	case SPEED_5000: return IB_SPEED_DDR;
+	case SPEED_10000: return IB_SPEED_FDR10;
+	case SPEED_14000: return IB_SPEED_FDR;
+	case SPEED_25000: return IB_SPEED_EDR;
+	case SPEED_50000: return IB_SPEED_HDR;
+	case SPEED_100000: return IB_SPEED_NDR;
+	default: return IB_SPEED_SDR;
+	}
+}
+
 enum ib_stat_flag {
 	IB_STAT_FLAG_OPTIONAL = 1 << 0,
 };