diff mbox series

[net-next] mlx5: count all link events

Message ID 20210519171825.600110-1-kuba@kernel.org (mailing list archive)
State Awaiting Upstream
Delegated to: Netdev Maintainers
Headers show
Series [net-next] mlx5: count all link events | expand

Checks

Context Check Description
netdev/cover_letter success Link
netdev/fixes_present success Link
netdev/patch_count success Link
netdev/tree_selection success Clearly marked for net-next
netdev/subject_prefix success Link
netdev/cc_maintainers warning 5 maintainers not CCed: jiri@resnulli.us leon@kernel.org jhs@mojatatu.com xiyou.wangcong@gmail.com linux-rdma@vger.kernel.org
netdev/source_inline success Was 0 now: 0
netdev/verify_signedoff success Link
netdev/module_param success Was 0 now: 0
netdev/build_32bit success Errors and warnings before: 5129 this patch: 5129
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/verify_fixes success Link
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 50 lines checked
netdev/build_allmodconfig_warn success Errors and warnings before: 5194 this patch: 5194
netdev/header_inline success Link

Commit Message

Jakub Kicinski May 19, 2021, 5:18 p.m. UTC
mlx5 devices were observed generating MLX5_PORT_CHANGE_SUBTYPE_ACTIVE
events without an intervening MLX5_PORT_CHANGE_SUBTYPE_DOWN. This
breaks link flap detection based on Linux carrier state transition
count as netif_carrier_on() does nothing if carrier is already on.
Make sure we count such events.

netif_carrier_event() increments the counters and fires the linkwatch
events. The latter is not necessary for the use case but seems like
the right thing to do.

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../net/ethernet/mellanox/mlx5/core/en_main.c  |  6 +++++-
 include/linux/netdevice.h                      |  2 +-
 net/sched/sch_generic.c                        | 18 ++++++++++++++++++
 3 files changed, 24 insertions(+), 2 deletions(-)

Comments

Lijun Pan May 19, 2021, 7:34 p.m. UTC | #1
> On May 19, 2021, at 12:18 PM, Jakub Kicinski <kuba@kernel.org> wrote:
> 
> mlx5 devices were observed generating MLX5_PORT_CHANGE_SUBTYPE_ACTIVE
> events without an intervening MLX5_PORT_CHANGE_SUBTYPE_DOWN. This
> breaks link flap detection based on Linux carrier state transition
> count as netif_carrier_on() does nothing if carrier is already on.
> Make sure we count such events.
> 
> netif_carrier_event() increments the counters and fires the linkwatch
> events. The latter is not necessary for the use case but seems like
> the right thing to do.
> 
> Signed-off-by: Jakub Kicinski <kuba@kernel.org>
> ---
> .../net/ethernet/mellanox/mlx5/core/en_main.c  |  6 +++++-
> include/linux/netdevice.h                      |  2 +-
> net/sched/sch_generic.c                        | 18 ++++++++++++++++++
> 3 files changed, 24 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
> index bca832cdc4cb..5a67ebc0c96c 100644
> --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
> +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
> @@ -91,12 +91,16 @@ void mlx5e_update_carrier(struct mlx5e_priv *priv)
> {
> 	struct mlx5_core_dev *mdev = priv->mdev;
> 	u8 port_state;
> +	bool up;
> 
> 	port_state = mlx5_query_vport_state(mdev,
> 					    MLX5_VPORT_STATE_OP_MOD_VNIC_VPORT,
> 					    0);
> 
> -	if (port_state == VPORT_STATE_UP) {
> +	up = port_state == VPORT_STATE_UP;
> +	if (up == netif_carrier_ok(priv->netdev))
> +		netif_carrier_event(priv->netdev);
> +	if (up) {
> 		netdev_info(priv->netdev, "Link up\n");
> 		netif_carrier_on(priv->netdev);
> 	} else {
> diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
> index 5cbc950b34df..be1dcceda5e4 100644
> --- a/include/linux/netdevice.h
> +++ b/include/linux/netdevice.h
> @@ -4187,8 +4187,8 @@ unsigned long dev_trans_start(struct net_device *dev);
> void __netdev_watchdog_up(struct net_device *dev);
> 
> void netif_carrier_on(struct net_device *dev);
> -
> void netif_carrier_off(struct net_device *dev);
> +void netif_carrier_event(struct net_device *dev);
> 
> /**
>  *	netif_dormant_on - mark device as dormant.
> diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
> index 44991ea726fc..3090ae32307b 100644
> --- a/net/sched/sch_generic.c
> +++ b/net/sched/sch_generic.c
> @@ -515,6 +515,24 @@ void netif_carrier_off(struct net_device *dev)
> }
> EXPORT_SYMBOL(netif_carrier_off);
> 
> +/**
> + *	netif_carrier_event - report carrier state event
> + *	@dev: network device
> + *
> + * Device has detected a carrier event but the carrier state wasn't changed.
> + * Use in drivers when querying carrier state asynchronously, to avoid missing
> + * events (link flaps) if link recovers before it's queried.
> + */
> +void netif_carrier_event(struct net_device *dev)
> +{
> +	if (dev->reg_state == NETREG_UNINITIALIZED)
> +		return;
> +	atomic_inc(&dev->carrier_up_count);
> +	atomic_inc(&dev->carrier_down_count);
> +	linkwatch_fire_event(dev);
> +}
> +EXPORT_SYMBOL_GPL(netif_carrier_event);

Is it possible to integrate netif_carrier_event into netif_carrier_on? like,


void netif_carrier_on(struct net_device *dev)
{
	if (test_and_clear_bit(__LINK_STATE_NOCARRIER, &dev->state)) {
		if (dev->reg_state == NETREG_UNINITIALIZED)
			return;
		atomic_inc(&dev->carrier_up_count);
		linkwatch_fire_event(dev);
		if (netif_running(dev))
			__netdev_watchdog_up(dev);
	} else {
		if (dev->reg_state == NETREG_UNINITIALIZED)
			return;
		atomic_inc(&dev->carrier_down_count);
		atomic_inc(&dev->carrier_up_count);
	}
}
EXPORT_SYMBOL(netif_carrier_on);
Jakub Kicinski May 19, 2021, 7:51 p.m. UTC | #2
On Wed, 19 May 2021 14:34:34 -0500 Lijun Pan wrote:
> Is it possible to integrate netif_carrier_event into netif_carrier_on? like,
> 
> 
> void netif_carrier_on(struct net_device *dev)
> {
> 	if (test_and_clear_bit(__LINK_STATE_NOCARRIER, &dev->state)) {
> 		if (dev->reg_state == NETREG_UNINITIALIZED)
> 			return;
> 		atomic_inc(&dev->carrier_up_count);
> 		linkwatch_fire_event(dev);
> 		if (netif_running(dev))
> 			__netdev_watchdog_up(dev);
> 	} else {
> 		if (dev->reg_state == NETREG_UNINITIALIZED)
> 			return;
> 		atomic_inc(&dev->carrier_down_count);
> 		atomic_inc(&dev->carrier_up_count);
> 	}
> }
> EXPORT_SYMBOL(netif_carrier_on);

Ah, I meant to address that in the commit message, thanks for bringing
this up. I suspect drivers may depend on the current behavior of
netif_carrier_on()/off() being idempotent. We have no real reason for
removing that assumption.

I assumed netif_carrier_event() would be used specifically in places
driver is actually servicing a link event from the device, and
therefore is relatively certain that _something_ has happened.
Saeed Mahameed May 19, 2021, 8:18 p.m. UTC | #3
On Wed, 2021-05-19 at 12:51 -0700, Jakub Kicinski wrote:
> On Wed, 19 May 2021 14:34:34 -0500 Lijun Pan wrote:
> > Is it possible to integrate netif_carrier_event into
> > netif_carrier_on? like,
> > 
> > 
> > void netif_carrier_on(struct net_device *dev)
> > {
> >         if (test_and_clear_bit(__LINK_STATE_NOCARRIER, &dev->state))
> > {
> >                 if (dev->reg_state == NETREG_UNINITIALIZED)
> >                         return;
> >                 atomic_inc(&dev->carrier_up_count);
> >                 linkwatch_fire_event(dev);
> >                 if (netif_running(dev))
> >                         __netdev_watchdog_up(dev);
> >         } else {
> >                 if (dev->reg_state == NETREG_UNINITIALIZED)
> >                         return;
> >                 atomic_inc(&dev->carrier_down_count);
> >                 atomic_inc(&dev->carrier_up_count);
> >         }
> > }
> > EXPORT_SYMBOL(netif_carrier_on);
> 
> Ah, I meant to address that in the commit message, thanks for bringing
> this up. I suspect drivers may depend on the current behavior of
> netif_carrier_on()/off() being idempotent. We have no real reason for
> removing that assumption.
> 
> I assumed netif_carrier_event() would be used specifically in places
> driver is actually servicing a link event from the device, and
> therefore is relatively certain that _something_ has happened.

then according to the above assumption it is safe to make
netif_carrier_event() do everything.

netif_carrier_event(netdev, up) {
	if (dev->reg_state == NETREG_UNINITIALIZED)
		return;

	if (up == netif_carrier_ok(netdev) {
		atomic_inc(&netdev->carrier_up_count);
		atomic_inc(&netdev->carrier_down_count);
		linkwatch_fire_event(netdev);
	}

	if (up) {
		netdev_info(netdev, "Link up\n");
		netif_carrier_on(netdev);
	} else {
		netdev_info(netdev, "Link down\n");
		netif_carrier_off(netdev);
	}
}
Saeed Mahameed May 19, 2021, 8:49 p.m. UTC | #4
On Wed, 2021-05-19 at 10:18 -0700, Jakub Kicinski wrote:
> mlx5 devices were observed generating MLX5_PORT_CHANGE_SUBTYPE_ACTIVE
> events without an intervening MLX5_PORT_CHANGE_SUBTYPE_DOWN. This
> breaks link flap detection based on Linux carrier state transition
> count as netif_carrier_on() does nothing if carrier is already on.
> Make sure we count such events.
> 

Can you share more on the actual scenario that has happened ? 
in mlx5 i know of situations where fw might generate such events, just
as FYI for virtual ports (vports) on some configuration changes.

another explanation is that in the driver we explicitly query the link
state and we never take the event value, so it could have been that the
link flapped so fast we missed the intermediate state.

According to HW spec for some reason we should always query and not
rely on the event. 

<quote>
If software retrieves this indication (port state change event), this
signifies that the state has been
changed and a QUERY_VPORT_STATE command should be performed to get the
new state.
</quote>

> netif_carrier_event() increments the counters and fires the linkwatch
> events. The latter is not necessary for the use case but seems like
> the right thing to do.
>
Jakub Kicinski May 19, 2021, 8:56 p.m. UTC | #5
On Wed, 19 May 2021 13:18:36 -0700 Saeed Mahameed wrote:
> On Wed, 2021-05-19 at 12:51 -0700, Jakub Kicinski wrote:
> > On Wed, 19 May 2021 14:34:34 -0500 Lijun Pan wrote:  
> > > Is it possible to integrate netif_carrier_event into
> > > netif_carrier_on? like,
> > > 
> > > void netif_carrier_on(struct net_device *dev)
> > > {
> > >         if (test_and_clear_bit(__LINK_STATE_NOCARRIER, &dev->state))
> > > {
> > >                 if (dev->reg_state == NETREG_UNINITIALIZED)
> > >                         return;
> > >                 atomic_inc(&dev->carrier_up_count);
> > >                 linkwatch_fire_event(dev);
> > >                 if (netif_running(dev))
> > >                         __netdev_watchdog_up(dev);
> > >         } else {
> > >                 if (dev->reg_state == NETREG_UNINITIALIZED)
> > >                         return;
> > >                 atomic_inc(&dev->carrier_down_count);
> > >                 atomic_inc(&dev->carrier_up_count);
> > >         }
> > > }
> > > EXPORT_SYMBOL(netif_carrier_on);  
> > 
> > Ah, I meant to address that in the commit message, thanks for bringing
> > this up. I suspect drivers may depend on the current behavior of
> > netif_carrier_on()/off() being idempotent. We have no real reason for
> > removing that assumption.
> > 
> > I assumed netif_carrier_event() would be used specifically in places
> > driver is actually servicing a link event from the device, and
> > therefore is relatively certain that _something_ has happened.  
> 
> then according to the above assumption it is safe to make
> netif_carrier_event() do everything.
> 
> netif_carrier_event(netdev, up) {
> 	if (dev->reg_state == NETREG_UNINITIALIZED)
> 		return;
> 
> 	if (up == netif_carrier_ok(netdev) {
> 		atomic_inc(&netdev->carrier_up_count);
> 		atomic_inc(&netdev->carrier_down_count);
> 		linkwatch_fire_event(netdev);
> 	}
> 
> 	if (up) {
> 		netdev_info(netdev, "Link up\n");
> 		netif_carrier_on(netdev);
> 	} else {
> 		netdev_info(netdev, "Link down\n");
> 		netif_carrier_off(netdev);
> 	}
> }

Two things to consider are:
 - some drivers print more info than just "link up/link down" so they'd
   have to drop that extra stuff (as much as I'd like the consistency)
 - again with the unnecessary events I was afraid that drivers reuse 
   the same handler for device events and to read the state in which
   case we may do something like:

	if (from_event && up == netif_carrier_ok(netdev)

Maybe we can revisit when there's more users?
Jakub Kicinski May 19, 2021, 9:06 p.m. UTC | #6
On Wed, 19 May 2021 13:49:00 -0700 Saeed Mahameed wrote:
> On Wed, 2021-05-19 at 10:18 -0700, Jakub Kicinski wrote:
> > mlx5 devices were observed generating MLX5_PORT_CHANGE_SUBTYPE_ACTIVE
> > events without an intervening MLX5_PORT_CHANGE_SUBTYPE_DOWN. This
> > breaks link flap detection based on Linux carrier state transition
> > count as netif_carrier_on() does nothing if carrier is already on.
> > Make sure we count such events.
> 
> Can you share more on the actual scenario that has happened ? 
> in mlx5 i know of situations where fw might generate such events, just
> as FYI for virtual ports (vports) on some configuration changes.
> 
> another explanation is that in the driver we explicitly query the link
> state and we never take the event value, so it could have been that the
> link flapped so fast we missed the intermediate state.

The link flaps quite a bit, this is likely a bad cable or port.
I scanned the fleet a little bit more and I see a couple machines 
in such state, in each case the switch is also seeing the link flaps, 
not just the NIC. Without this patch the driver registers a full flap
once every ~15min, with the patch it's once a second. That's much
closer to what the switch registers.

Also the issue affects all hosts in MH, and persists across reboots
of a single host (hence I could test this patch).

> According to HW spec for some reason we should always query and not
> rely on the event. 
> 
> <quote>
> If software retrieves this indication (port state change event), this
> signifies that the state has been
> changed and a QUERY_VPORT_STATE command should be performed to get the
> new state.
> </quote>

I see, seems reasonable. I'm guessing the FW generates only one of the
events on minor type of faults? I don't think the link goes fully down,
because I can SSH to those machines, they just periodically drop
traffic. But the can't fully retrain the link at such high rate, 
I don't think.

> > netif_carrier_event() increments the counters and fires the linkwatch
> > events. The latter is not necessary for the use case but seems like
> > the right thing to do.
Saeed Mahameed May 20, 2021, 12:07 a.m. UTC | #7
On Wed, 2021-05-19 at 14:06 -0700, Jakub Kicinski wrote:
> On Wed, 19 May 2021 13:49:00 -0700 Saeed Mahameed wrote:
> > On Wed, 2021-05-19 at 10:18 -0700, Jakub Kicinski wrote:
> > > mlx5 devices were observed generating
> > > MLX5_PORT_CHANGE_SUBTYPE_ACTIVE
> > > events without an intervening MLX5_PORT_CHANGE_SUBTYPE_DOWN. This
> > > breaks link flap detection based on Linux carrier state
> > > transition
> > > count as netif_carrier_on() does nothing if carrier is already
> > > on.
> > > Make sure we count such events.
> > 
> > Can you share more on the actual scenario that has happened ? 
> > in mlx5 i know of situations where fw might generate such events,
> > just
> > as FYI for virtual ports (vports) on some configuration changes.
> > 
> > another explanation is that in the driver we explicitly query the
> > link
> > state and we never take the event value, so it could have been that
> > the
> > link flapped so fast we missed the intermediate state.
> 
> The link flaps quite a bit, this is likely a bad cable or port.
> I scanned the fleet a little bit more and I see a couple machines 
> in such state, in each case the switch is also seeing the link flaps,
> not just the NIC. Without this patch the driver registers a full flap
> once every ~15min, with the patch it's once a second. That's much
> closer to what the switch registers.
> 
> Also the issue affects all hosts in MH, and persists across reboots
> of a single host (hence I could test this patch).
> 

reproduces on reboots even with a good cable ? 
you reboot the peer machine or the DUT (under test) machine ?

> > According to HW spec for some reason we should always query and not
> > rely on the event. 
> > 
> > <quote>
> > If software retrieves this indication (port state change event),
> > this
> > signifies that the state has been
> > changed and a QUERY_VPORT_STATE command should be performed to get
> > the
> > new state.
> > </quote>
> 
> I see, seems reasonable. I'm guessing the FW generates only one of
> the
> events on minor type of faults? I don't think the link goes fully
> down,
> because I can SSH to those machines, they just periodically drop
> traffic. But the can't fully retrain the link at such high rate, 
> I don't think.
> 

hmm, Then i would like to get to the bottom of this, so i will have to
consult with FW.
But regardless, we can progress with the patch, I think the HW spec
description forces us to do so..
Jakub Kicinski May 20, 2021, 12:44 a.m. UTC | #8
On Wed, 19 May 2021 17:07:00 -0700 Saeed Mahameed wrote:
> On Wed, 2021-05-19 at 14:06 -0700, Jakub Kicinski wrote:
> > On Wed, 19 May 2021 13:49:00 -0700 Saeed Mahameed wrote:  
> > > Can you share more on the actual scenario that has happened ? 
> > > in mlx5 i know of situations where fw might generate such events,
> > > just
> > > as FYI for virtual ports (vports) on some configuration changes.
> > > 
> > > another explanation is that in the driver we explicitly query the
> > > link
> > > state and we never take the event value, so it could have been that
> > > the
> > > link flapped so fast we missed the intermediate state.  
> > 
> > The link flaps quite a bit, this is likely a bad cable or port.
> > I scanned the fleet a little bit more and I see a couple machines 
> > in such state, in each case the switch is also seeing the link flaps,
> > not just the NIC. Without this patch the driver registers a full flap
> > once every ~15min, with the patch it's once a second. That's much
> > closer to what the switch registers.
> > 
> > Also the issue affects all hosts in MH, and persists across reboots
> > of a single host (hence I could test this patch).
> 
> reproduces on reboots even with a good cable ? 

I don't have access to the machines so the cable stays the same. I was
just saying that it doesn't seem like a driver issue since it persists
across reboots.

> you reboot the peer machine or the DUT (under test) machine ?

DUT

> > > According to HW spec for some reason we should always query and not
> > > rely on the event. 
> > > 
> > > <quote>
> > > If software retrieves this indication (port state change event),
> > > this
> > > signifies that the state has been
> > > changed and a QUERY_VPORT_STATE command should be performed to get
> > > the
> > > new state.
> > > </quote>  
> > 
> > I see, seems reasonable. I'm guessing the FW generates only one of
> > the
> > events on minor type of faults? I don't think the link goes fully
> > down,
> > because I can SSH to those machines, they just periodically drop
> > traffic. But the can't fully retrain the link at such high rate, 
> > I don't think.
> >   
> 
> hmm, Then i would like to get to the bottom of this, so i will have to
> consult with FW.
> But regardless, we can progress with the patch, I think the HW spec
> description forces us to do so.. 

SGTM :)
Saeed Mahameed May 20, 2021, 5:36 a.m. UTC | #9
On Wed, 2021-05-19 at 13:56 -0700, Jakub Kicinski wrote:
> On Wed, 19 May 2021 13:18:36 -0700 Saeed Mahameed wrote:
> > On Wed, 2021-05-19 at 12:51 -0700, Jakub Kicinski wrote:
> > > 
> > > 
> > > I assumed netif_carrier_event() would be used specifically in
> > > places
> > > driver is actually servicing a link event from the device, and
> > > therefore is relatively certain that _something_ has happened.  
> > 
> > then according to the above assumption it is safe to make
> > netif_carrier_event() do everything.
> > 
> > netif_carrier_event(netdev, up) {
> >         if (dev->reg_state == NETREG_UNINITIALIZED)
> >                 return;
> > 
> >         if (up == netif_carrier_ok(netdev) {
> >                 atomic_inc(&netdev->carrier_up_count);
> >                 atomic_inc(&netdev->carrier_down_count);
> >                 linkwatch_fire_event(netdev);
> >         }
> > 
> >         if (up) {
> >                 netdev_info(netdev, "Link up\n");
> >                 netif_carrier_on(netdev);
> >         } else {
> >                 netdev_info(netdev, "Link down\n");
> >                 netif_carrier_off(netdev);
> >         }
> > }
> 
> Two things to consider are:
>  - some drivers print more info than just "link up/link down" so
> they'd
>    have to drop that extra stuff (as much as I'd like the
> consistency)

+1 for the consistency

>  - again with the unnecessary events I was afraid that drivers reuse 
>    the same handler for device events and to read the state in which
>    case we may do something like:
> 
>         if (from_event && up == netif_carrier_ok(netdev)
> 

I don't actually understand your point here .. what kind of scenarios
it is wrong to use this function ? 

But anyway, the name of the function makes it very clear this is from
event.. 
also we can document this.

> Maybe we can revisit when there's more users?
goes both ways :), we can do what fits the requirement for mlx5 now and
revisit in the future, if we do believe this should be general behavior
for all/most vendors of-course!
Jakub Kicinski May 20, 2021, 3:48 p.m. UTC | #10
On Wed, 19 May 2021 22:36:10 -0700 Saeed Mahameed wrote:
> On Wed, 2021-05-19 at 13:56 -0700, Jakub Kicinski wrote:
> > On Wed, 19 May 2021 13:18:36 -0700 Saeed Mahameed wrote:  
> > > then according to the above assumption it is safe to make
> > > netif_carrier_event() do everything.
> > > 
> > > netif_carrier_event(netdev, up) {
> > >         if (dev->reg_state == NETREG_UNINITIALIZED)
> > >                 return;
> > > 
> > >         if (up == netif_carrier_ok(netdev) {
> > >                 atomic_inc(&netdev->carrier_up_count);
> > >                 atomic_inc(&netdev->carrier_down_count);
> > >                 linkwatch_fire_event(netdev);
> > >         }
> > > 
> > >         if (up) {
> > >                 netdev_info(netdev, "Link up\n");
> > >                 netif_carrier_on(netdev);
> > >         } else {
> > >                 netdev_info(netdev, "Link down\n");
> > >                 netif_carrier_off(netdev);
> > >         }
> > > }  
> > 
> > Two things to consider are:
> >  - some drivers print more info than just "link up/link down" so
> > they'd
> >    have to drop that extra stuff (as much as I'd like the
> > consistency)  
> 
> +1 for the consistency
> 
> >  - again with the unnecessary events I was afraid that drivers reuse 
> >    the same handler for device events and to read the state in which
> >    case we may do something like:
> > 
> >         if (from_event && up == netif_carrier_ok(netdev)
> >   
> 
> I don't actually understand your point here .. what kind of scenarios
> it is wrong to use this function ? 
> 
> But anyway, the name of the function makes it very clear this is from
> event.. also we can document this.

I don't have any proof of this but drivers may check link state
periodically from a service job or such.

> > Maybe we can revisit when there's more users?  
> goes both ways :), we can do what fits the requirement for mlx5 now and
> revisit in the future, if we do believe this should be general behavior
> for all/most vendors of-course!

I think it'd be more of a "add this function so the future drivers can
use it". I've scanned the drivers I'm familiar with and none of them
seemed like they could make use of the "wider" version of the helper.
Does mlx4 need it?

The problem seems slightly unusual, I feel like targeted helper would
lead to a cleaner API, but can change if we really need to..
Saeed Mahameed May 20, 2021, 6:03 p.m. UTC | #11
On Thu, 2021-05-20 at 08:48 -0700, Jakub Kicinski wrote:
> On Wed, 19 May 2021 22:36:10 -0700 Saeed Mahameed wrote:
> > On Wed, 2021-05-19 at 13:56 -0700, Jakub Kicinski wrote:
> > > On Wed, 19 May 2021 13:18:36 -0700 Saeed Mahameed wrote:  
> > > > then according to the above assumption it is safe to make
> > > > netif_carrier_event() do everything.
> > > > 
> > > > netif_carrier_event(netdev, up) {
> > > >         if (dev->reg_state == NETREG_UNINITIALIZED)
> > > >                 return;
> > > > 
> > > >         if (up == netif_carrier_ok(netdev) {
> > > >                 atomic_inc(&netdev->carrier_up_count);
> > > >                 atomic_inc(&netdev->carrier_down_count);
> > > >                 linkwatch_fire_event(netdev);
> > > >         }
> > > > 
> > > >         if (up) {
> > > >                 netdev_info(netdev, "Link up\n");
> > > >                 netif_carrier_on(netdev);
> > > >         } else {
> > > >                 netdev_info(netdev, "Link down\n");
> > > >                 netif_carrier_off(netdev);
> > > >         }
> > > > }  
> > > 
> > > Two things to consider are:
> > >  - some drivers print more info than just "link up/link down" so
> > > they'd
> > >    have to drop that extra stuff (as much as I'd like the
> > > consistency)  
> > 
> > +1 for the consistency
> > 
> > >  - again with the unnecessary events I was afraid that drivers
> > > reuse 
> > >    the same handler for device events and to read the state in
> > > which
> > >    case we may do something like:
> > > 
> > >         if (from_event && up == netif_carrier_ok(netdev)
> > >   
> > 
> > I don't actually understand your point here .. what kind of
> > scenarios
> > it is wrong to use this function ? 
> > 
> > But anyway, the name of the function makes it very clear this is
> > from
> > event.. also we can document this.
> 
> I don't have any proof of this but drivers may check link state
> periodically from a service job or such.
> 

I see.

> > > Maybe we can revisit when there's more users?  
> > goes both ways :), we can do what fits the requirement for mlx5 now
> > and
> > revisit in the future, if we do believe this should be general
> > behavior
> > for all/most vendors of-course!
> 
> I think it'd be more of a "add this function so the future drivers
> can
> use it". I've scanned the drivers I'm familiar with and none of them
> seemed like they could make use of the "wider" version of the helper.
> Does mlx4 need it?
> 

No, mlx4 relies on the event type.

> The problem seems slightly unusual, I feel like targeted helper would
> lead to a cleaner API, but can change if we really need to..

Sure, I have no strong opinion on the matter.
diff mbox series

Patch

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index bca832cdc4cb..5a67ebc0c96c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -91,12 +91,16 @@  void mlx5e_update_carrier(struct mlx5e_priv *priv)
 {
 	struct mlx5_core_dev *mdev = priv->mdev;
 	u8 port_state;
+	bool up;
 
 	port_state = mlx5_query_vport_state(mdev,
 					    MLX5_VPORT_STATE_OP_MOD_VNIC_VPORT,
 					    0);
 
-	if (port_state == VPORT_STATE_UP) {
+	up = port_state == VPORT_STATE_UP;
+	if (up == netif_carrier_ok(priv->netdev))
+		netif_carrier_event(priv->netdev);
+	if (up) {
 		netdev_info(priv->netdev, "Link up\n");
 		netif_carrier_on(priv->netdev);
 	} else {
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 5cbc950b34df..be1dcceda5e4 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -4187,8 +4187,8 @@  unsigned long dev_trans_start(struct net_device *dev);
 void __netdev_watchdog_up(struct net_device *dev);
 
 void netif_carrier_on(struct net_device *dev);
-
 void netif_carrier_off(struct net_device *dev);
+void netif_carrier_event(struct net_device *dev);
 
 /**
  *	netif_dormant_on - mark device as dormant.
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 44991ea726fc..3090ae32307b 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -515,6 +515,24 @@  void netif_carrier_off(struct net_device *dev)
 }
 EXPORT_SYMBOL(netif_carrier_off);
 
+/**
+ *	netif_carrier_event - report carrier state event
+ *	@dev: network device
+ *
+ * Device has detected a carrier event but the carrier state wasn't changed.
+ * Use in drivers when querying carrier state asynchronously, to avoid missing
+ * events (link flaps) if link recovers before it's queried.
+ */
+void netif_carrier_event(struct net_device *dev)
+{
+	if (dev->reg_state == NETREG_UNINITIALIZED)
+		return;
+	atomic_inc(&dev->carrier_up_count);
+	atomic_inc(&dev->carrier_down_count);
+	linkwatch_fire_event(dev);
+}
+EXPORT_SYMBOL_GPL(netif_carrier_event);
+
 /* "NOOP" scheduler: the best scheduler, recommended for all interfaces
    under all circumstances. It is difficult to invent anything faster or
    cheaper.