diff mbox series

[net,09/15] net/mlx5e: Forbid devlink reload if IPSec rules are offloaded

Message ID 20231122014804.27716-10-saeed@kernel.org (mailing list archive)
State Superseded
Delegated to: Netdev Maintainers
Headers show
Series [net,01/15] net/mlx5e: Honor user choice of IPsec replay window size | expand

Checks

Context Check Description
netdev/series_format success Pull request is its own cover letter
netdev/codegen success Generated files up to date
netdev/tree_selection success Clearly marked for net
netdev/fixes_present success Fixes tag present in non-next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 1115 this patch: 1115
netdev/cc_maintainers success CCed 4 of 4 maintainers
netdev/build_clang success Errors and warnings before: 1142 this patch: 1142
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success Fixes tag looks correct
netdev/build_allmodconfig_warn success Errors and warnings before: 1142 this patch: 1142
netdev/checkpatch warning WARNING: line length of 92 exceeds 80 columns WARNING: line length of 98 exceeds 80 columns
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 1 this patch: 1
netdev/source_inline success Was 0 now: 0

Commit Message

Saeed Mahameed Nov. 22, 2023, 1:47 a.m. UTC
From: Jianbo Liu <jianbol@nvidia.com>

When devlink reload, mlx5 IPSec module can't be safely cleaned up if
there is any IPSec rule offloaded, so forbid it in this condition.

Fixes: edd8b295f9e2 ("Merge branch 'mlx5-ipsec-packet-offload-support-in-eswitch-mode'")
Signed-off-by: Jianbo Liu <jianbol@nvidia.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/devlink.c |  5 +++++
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.h |  2 ++
 .../mellanox/mlx5/core/eswitch_offloads.c         | 15 +++++++++++++++
 3 files changed, 22 insertions(+)

Comments

Jiri Pirko Nov. 22, 2023, 9:13 a.m. UTC | #1
Wed, Nov 22, 2023 at 02:47:58AM CET, saeed@kernel.org wrote:
>From: Jianbo Liu <jianbol@nvidia.com>
>
>When devlink reload, mlx5 IPSec module can't be safely cleaned up if
>there is any IPSec rule offloaded, so forbid it in this condition.
>
>Fixes: edd8b295f9e2 ("Merge branch 'mlx5-ipsec-packet-offload-support-in-eswitch-mode'")
>Signed-off-by: Jianbo Liu <jianbol@nvidia.com>
>Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
>Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
>---
> drivers/net/ethernet/mellanox/mlx5/core/devlink.c |  5 +++++
> drivers/net/ethernet/mellanox/mlx5/core/eswitch.h |  2 ++
> .../mellanox/mlx5/core/eswitch_offloads.c         | 15 +++++++++++++++
> 3 files changed, 22 insertions(+)
>
>diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
>index 3e064234f6fe..8925e87a3ed5 100644
>--- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
>+++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
>@@ -157,6 +157,11 @@ static int mlx5_devlink_reload_down(struct devlink *devlink, bool netns_change,
> 		return -EOPNOTSUPP;
> 	}
> 
>+	if (mlx5_eswitch_mode_is_blocked(dev)) {
>+		NL_SET_ERR_MSG_MOD(extack, "reload is unsupported if IPSec rules are configured");

That sounds a bit odd to me to be honest. Is pci device unbind forbidden
if ipsec rules are present too? This should be gracefully handled
instead of forbid.


>+		return -EOPNOTSUPP;
>+	}
>+
> 	if (mlx5_core_is_pf(dev) && pci_num_vf(pdev))
> 		NL_SET_ERR_MSG_MOD(extack, "reload while VFs are present is unfavorable");
> 
>diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
>index b674b57d05aa..88524c2a4355 100644
>--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
>+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
>@@ -846,6 +846,7 @@ void mlx5_eswitch_unblock_encap(struct mlx5_core_dev *dev);
> 
> int mlx5_eswitch_block_mode(struct mlx5_core_dev *dev);
> void mlx5_eswitch_unblock_mode(struct mlx5_core_dev *dev);
>+bool mlx5_eswitch_mode_is_blocked(struct mlx5_core_dev *dev);
> 
> static inline int mlx5_eswitch_num_vfs(struct mlx5_eswitch *esw)
> {
>@@ -947,6 +948,7 @@ static inline void mlx5_eswitch_unblock_encap(struct mlx5_core_dev *dev)
> 
> static inline int mlx5_eswitch_block_mode(struct mlx5_core_dev *dev) { return 0; }
> static inline void mlx5_eswitch_unblock_mode(struct mlx5_core_dev *dev) {}
>+static inline bool mlx5_eswitch_mode_is_blocked(struct mlx5_core_dev *dev) { return false; }
> static inline bool mlx5_eswitch_block_ipsec(struct mlx5_core_dev *dev)
> {
> 	return false;
>diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
>index bf78eeca401b..85c2a20e68fa 100644
>--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
>+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
>@@ -3693,6 +3693,21 @@ void mlx5_eswitch_unblock_mode(struct mlx5_core_dev *dev)
> 	up_write(&esw->mode_lock);
> }
> 
>+bool mlx5_eswitch_mode_is_blocked(struct mlx5_core_dev *dev)
>+{
>+	struct mlx5_eswitch *esw = dev->priv.eswitch;
>+	bool blocked;
>+
>+	if (!mlx5_esw_allowed(esw))
>+		return false;
>+
>+	down_write(&esw->mode_lock);
>+	blocked = esw->offloads.num_block_mode;
>+	up_write(&esw->mode_lock);
>+
>+	return blocked;
>+}
>+
> int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode,
> 				  struct netlink_ext_ack *extack)
> {
>-- 
>2.42.0
>
>
Leon Romanovsky Nov. 22, 2023, 9:35 a.m. UTC | #2
On Wed, Nov 22, 2023 at 10:13:45AM +0100, Jiri Pirko wrote:
> Wed, Nov 22, 2023 at 02:47:58AM CET, saeed@kernel.org wrote:
> >From: Jianbo Liu <jianbol@nvidia.com>
> >
> >When devlink reload, mlx5 IPSec module can't be safely cleaned up if
> >there is any IPSec rule offloaded, so forbid it in this condition.
> >
> >Fixes: edd8b295f9e2 ("Merge branch 'mlx5-ipsec-packet-offload-support-in-eswitch-mode'")
> >Signed-off-by: Jianbo Liu <jianbol@nvidia.com>
> >Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
> >Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
> >---
> > drivers/net/ethernet/mellanox/mlx5/core/devlink.c |  5 +++++
> > drivers/net/ethernet/mellanox/mlx5/core/eswitch.h |  2 ++
> > .../mellanox/mlx5/core/eswitch_offloads.c         | 15 +++++++++++++++
> > 3 files changed, 22 insertions(+)
> >
> >diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
> >index 3e064234f6fe..8925e87a3ed5 100644
> >--- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
> >+++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
> >@@ -157,6 +157,11 @@ static int mlx5_devlink_reload_down(struct devlink *devlink, bool netns_change,
> > 		return -EOPNOTSUPP;
> > 	}
> > 
> >+	if (mlx5_eswitch_mode_is_blocked(dev)) {
> >+		NL_SET_ERR_MSG_MOD(extack, "reload is unsupported if IPSec rules are configured");
> 
> That sounds a bit odd to me to be honest. Is pci device unbind forbidden
> if ipsec rules are present too? This should be gracefully handled
> instead of forbid.

unbind is handled differently because that operation will call to
unregister netdevice event which will clean everything.

devlink reload behaves differently from unbind.

Thanks
Jiri Pirko Nov. 22, 2023, 9:50 a.m. UTC | #3
Wed, Nov 22, 2023 at 10:35:46AM CET, leon@kernel.org wrote:
>On Wed, Nov 22, 2023 at 10:13:45AM +0100, Jiri Pirko wrote:
>> Wed, Nov 22, 2023 at 02:47:58AM CET, saeed@kernel.org wrote:
>> >From: Jianbo Liu <jianbol@nvidia.com>
>> >
>> >When devlink reload, mlx5 IPSec module can't be safely cleaned up if
>> >there is any IPSec rule offloaded, so forbid it in this condition.
>> >
>> >Fixes: edd8b295f9e2 ("Merge branch 'mlx5-ipsec-packet-offload-support-in-eswitch-mode'")
>> >Signed-off-by: Jianbo Liu <jianbol@nvidia.com>
>> >Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
>> >Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
>> >---
>> > drivers/net/ethernet/mellanox/mlx5/core/devlink.c |  5 +++++
>> > drivers/net/ethernet/mellanox/mlx5/core/eswitch.h |  2 ++
>> > .../mellanox/mlx5/core/eswitch_offloads.c         | 15 +++++++++++++++
>> > 3 files changed, 22 insertions(+)
>> >
>> >diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
>> >index 3e064234f6fe..8925e87a3ed5 100644
>> >--- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
>> >+++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
>> >@@ -157,6 +157,11 @@ static int mlx5_devlink_reload_down(struct devlink *devlink, bool netns_change,
>> > 		return -EOPNOTSUPP;
>> > 	}
>> > 
>> >+	if (mlx5_eswitch_mode_is_blocked(dev)) {
>> >+		NL_SET_ERR_MSG_MOD(extack, "reload is unsupported if IPSec rules are configured");
>> 
>> That sounds a bit odd to me to be honest. Is pci device unbind forbidden
>> if ipsec rules are present too? This should be gracefully handled
>> instead of forbid.
>
>unbind is handled differently because that operation will call to
>unregister netdevice event which will clean everything.

But in reload, the netdevice is also unregistered. Same flow, isn't it?

>
>devlink reload behaves differently from unbind.

I don't see why. Forget about the driver implementation for now. From
the perspective of the user, what's the difference between these flows:
1) unbind->netdevremoval
2) reload->netdevremoval

Both should be working and do necessary cleanups.


>
>Thanks
Leon Romanovsky Nov. 22, 2023, 11:28 a.m. UTC | #4
On Wed, Nov 22, 2023 at 10:50:37AM +0100, Jiri Pirko wrote:
> Wed, Nov 22, 2023 at 10:35:46AM CET, leon@kernel.org wrote:
> >On Wed, Nov 22, 2023 at 10:13:45AM +0100, Jiri Pirko wrote:
> >> Wed, Nov 22, 2023 at 02:47:58AM CET, saeed@kernel.org wrote:
> >> >From: Jianbo Liu <jianbol@nvidia.com>
> >> >
> >> >When devlink reload, mlx5 IPSec module can't be safely cleaned up if
> >> >there is any IPSec rule offloaded, so forbid it in this condition.
> >> >
> >> >Fixes: edd8b295f9e2 ("Merge branch 'mlx5-ipsec-packet-offload-support-in-eswitch-mode'")
> >> >Signed-off-by: Jianbo Liu <jianbol@nvidia.com>
> >> >Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
> >> >Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
> >> >---
> >> > drivers/net/ethernet/mellanox/mlx5/core/devlink.c |  5 +++++
> >> > drivers/net/ethernet/mellanox/mlx5/core/eswitch.h |  2 ++
> >> > .../mellanox/mlx5/core/eswitch_offloads.c         | 15 +++++++++++++++
> >> > 3 files changed, 22 insertions(+)
> >> >
> >> >diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
> >> >index 3e064234f6fe..8925e87a3ed5 100644
> >> >--- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
> >> >+++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
> >> >@@ -157,6 +157,11 @@ static int mlx5_devlink_reload_down(struct devlink *devlink, bool netns_change,
> >> > 		return -EOPNOTSUPP;
> >> > 	}
> >> > 
> >> >+	if (mlx5_eswitch_mode_is_blocked(dev)) {
> >> >+		NL_SET_ERR_MSG_MOD(extack, "reload is unsupported if IPSec rules are configured");
> >> 
> >> That sounds a bit odd to me to be honest. Is pci device unbind forbidden
> >> if ipsec rules are present too? This should be gracefully handled
> >> instead of forbid.
> >
> >unbind is handled differently because that operation will call to
> >unregister netdevice event which will clean everything.
> 
> But in reload, the netdevice is also unregistered. Same flow, isn't it?

Unfortunately not, we (mlx5) were forced by employer of one of
the netdev maintainers to keep uplink netdev in devlink reload
while we are in eswitch. It is skipped in lines 1556-1558:

  1548 static void
  1549 mlx5e_vport_rep_unload(struct mlx5_eswitch_rep *rep)
  1550 {
  1551         struct mlx5e_rep_priv *rpriv = mlx5e_rep_to_rep_priv(rep);
  1552         struct net_device *netdev = rpriv->netdev;
  1553         struct mlx5e_priv *priv = netdev_priv(netdev);
  1554         void *ppriv = priv->ppriv;
  1555
  1556         if (rep->vport == MLX5_VPORT_UPLINK) {
  1557                 mlx5e_vport_uplink_rep_unload(rpriv);
  1558                 goto free_ppriv;
  1559         }
  1560
  1561         unregister_netdev(netdev);
  1562         mlx5e_rep_vnic_reporter_destroy(priv);
  1563         mlx5e_detach_netdev(priv);
  1564         priv->profile->cleanup(priv);
  1565         mlx5e_destroy_netdev(priv);
  1566 free_ppriv:
  1567         kvfree(ppriv); /* mlx5e_rep_priv */
  1568 }

> 
> >
> >devlink reload behaves differently from unbind.
> 
> I don't see why. Forget about the driver implementation for now. From
> the perspective of the user, what's the difference between these flows:
> 1) unbind->netdevremoval

netdevice can be removed and there is no way to inform users about errors.

> 2) reload->netdevremoval

According to that employer, netdevice should stay.

> 
> Both should be working and do necessary cleanups.

I would be more than happy to see same flow, but this is above my
pay grade and I have little desire to be in the middle between
that netdev maintainer and his management.

Feel free to approach me offline, and I will give you the names.

Thanks

> 
> 
> >
> >Thanks
Jiri Pirko Nov. 22, 2023, 12:25 p.m. UTC | #5
Wed, Nov 22, 2023 at 12:28:32PM CET, leon@kernel.org wrote:
>On Wed, Nov 22, 2023 at 10:50:37AM +0100, Jiri Pirko wrote:
>> Wed, Nov 22, 2023 at 10:35:46AM CET, leon@kernel.org wrote:
>> >On Wed, Nov 22, 2023 at 10:13:45AM +0100, Jiri Pirko wrote:
>> >> Wed, Nov 22, 2023 at 02:47:58AM CET, saeed@kernel.org wrote:
>> >> >From: Jianbo Liu <jianbol@nvidia.com>
>> >> >
>> >> >When devlink reload, mlx5 IPSec module can't be safely cleaned up if
>> >> >there is any IPSec rule offloaded, so forbid it in this condition.
>> >> >
>> >> >Fixes: edd8b295f9e2 ("Merge branch 'mlx5-ipsec-packet-offload-support-in-eswitch-mode'")
>> >> >Signed-off-by: Jianbo Liu <jianbol@nvidia.com>
>> >> >Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
>> >> >Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
>> >> >---
>> >> > drivers/net/ethernet/mellanox/mlx5/core/devlink.c |  5 +++++
>> >> > drivers/net/ethernet/mellanox/mlx5/core/eswitch.h |  2 ++
>> >> > .../mellanox/mlx5/core/eswitch_offloads.c         | 15 +++++++++++++++
>> >> > 3 files changed, 22 insertions(+)
>> >> >
>> >> >diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
>> >> >index 3e064234f6fe..8925e87a3ed5 100644
>> >> >--- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
>> >> >+++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
>> >> >@@ -157,6 +157,11 @@ static int mlx5_devlink_reload_down(struct devlink *devlink, bool netns_change,
>> >> > 		return -EOPNOTSUPP;
>> >> > 	}
>> >> > 
>> >> >+	if (mlx5_eswitch_mode_is_blocked(dev)) {
>> >> >+		NL_SET_ERR_MSG_MOD(extack, "reload is unsupported if IPSec rules are configured");
>> >> 
>> >> That sounds a bit odd to me to be honest. Is pci device unbind forbidden
>> >> if ipsec rules are present too? This should be gracefully handled
>> >> instead of forbid.
>> >
>> >unbind is handled differently because that operation will call to
>> >unregister netdevice event which will clean everything.
>> 
>> But in reload, the netdevice is also unregistered. Same flow, isn't it?
>
>Unfortunately not, we (mlx5) were forced by employer of one of
>the netdev maintainers to keep uplink netdev in devlink reload
>while we are in eswitch. It is skipped in lines 1556-1558:

That is clearly a bug that should be fixed. That will solve the issue.


>
>  1548 static void
>  1549 mlx5e_vport_rep_unload(struct mlx5_eswitch_rep *rep)
>  1550 {
>  1551         struct mlx5e_rep_priv *rpriv = mlx5e_rep_to_rep_priv(rep);
>  1552         struct net_device *netdev = rpriv->netdev;
>  1553         struct mlx5e_priv *priv = netdev_priv(netdev);
>  1554         void *ppriv = priv->ppriv;
>  1555
>  1556         if (rep->vport == MLX5_VPORT_UPLINK) {
>  1557                 mlx5e_vport_uplink_rep_unload(rpriv);
>  1558                 goto free_ppriv;
>  1559         }
>  1560
>  1561         unregister_netdev(netdev);
>  1562         mlx5e_rep_vnic_reporter_destroy(priv);
>  1563         mlx5e_detach_netdev(priv);
>  1564         priv->profile->cleanup(priv);
>  1565         mlx5e_destroy_netdev(priv);
>  1566 free_ppriv:
>  1567         kvfree(ppriv); /* mlx5e_rep_priv */
>  1568 }
>
>> 
>> >
>> >devlink reload behaves differently from unbind.
>> 
>> I don't see why. Forget about the driver implementation for now. From
>> the perspective of the user, what's the difference between these flows:
>> 1) unbind->netdevremoval
>
>netdevice can be removed and there is no way to inform users about errors.
>
>> 2) reload->netdevremoval
>
>According to that employer, netdevice should stay.
>
>> 
>> Both should be working and do necessary cleanups.
>
>I would be more than happy to see same flow, but this is above my
>pay grade and I have little desire to be in the middle between
>that netdev maintainer and his management.
>
>Feel free to approach me offline, and I will give you the names.
>
>Thanks
>
>> 
>> 
>> >
>> >Thanks
Jakub Kicinski Nov. 23, 2023, 3:53 a.m. UTC | #6
On Wed, 22 Nov 2023 13:28:32 +0200 Leon Romanovsky wrote:
> Unfortunately not, we (mlx5) were forced by employer of one of
> the netdev maintainers to keep uplink netdev in devlink reload
> while we are in eswitch.

The way you phrased this makes it sound like employers of netdev
maintainers get to exert power over this community.

This is an unacceptable insinuation.

DEVLINK_RELOAD_LIMIT_NO_RESET should not cause link loss, sure.
Even if Meta required that you implemented that (which it does
not, AFAIK) - it's just an upstream API.
Saeed Mahameed Nov. 23, 2023, 6:12 a.m. UTC | #7
On 22 Nov 19:53, Jakub Kicinski wrote:
>On Wed, 22 Nov 2023 13:28:32 +0200 Leon Romanovsky wrote:
>> Unfortunately not, we (mlx5) were forced by employer of one of
>> the netdev maintainers to keep uplink netdev in devlink reload
>> while we are in eswitch.
>
>The way you phrased this makes it sound like employers of netdev
>maintainers get to exert power over this community.
>

I think Leon is just misinformed, the mlx5 netdev behavior Leon is
talking about was already removed and has nothing to do with eswitch,
and even that was never required by any employer or maintainer,
sorry for the confusion .. 

>This is an unacceptable insinuation.
>
>DEVLINK_RELOAD_LIMIT_NO_RESET should not cause link loss, sure.
>Even if Meta required that you implemented that (which it does
>not, AFAIK) - it's just an upstream API.
>

We only support this limit for FW_ACTIVATE_ACTION, and has no issue
in this flow.

Leon's issue is with internal mlx5 uplink implementation where on eswitch
mode changes we don't unregister the netdev which causes eswitch resource
leaks with ipsec rules, since we move eswitch to legacy mode on devlink
reload then the same issue happens on relaod, hence he needs to block it
in this patch, and we are already discussing a new design to fix devlink
reload in net-next.

This is Just a bug and has nothing to do with any requirements from anyone.

Thanks.
Leon Romanovsky Nov. 23, 2023, 6:33 p.m. UTC | #8
On Wed, Nov 22, 2023 at 07:53:32PM -0800, Jakub Kicinski wrote:
> On Wed, 22 Nov 2023 13:28:32 +0200 Leon Romanovsky wrote:
> > Unfortunately not, we (mlx5) were forced by employer of one of
> > the netdev maintainers to keep uplink netdev in devlink reload
> > while we are in eswitch.
> 
> The way you phrased this makes it sound like employers of netdev
> maintainers get to exert power over this community.
> 
> This is an unacceptable insinuation.

It will be much beneficial if you stop to seek extra level of meanings
in our conversations. There are differences in our ability to express
and feel intent in English language.

> 
> DEVLINK_RELOAD_LIMIT_NO_RESET should not cause link loss, sure.
> Even if Meta required that you implemented that (which it does
> not, AFAIK) - it's just an upstream API.

Excellent.
Jiri Pirko Nov. 28, 2023, 12:02 p.m. UTC | #9
Wed, Nov 22, 2023 at 12:28:32PM CET, leon@kernel.org wrote:
>On Wed, Nov 22, 2023 at 10:50:37AM +0100, Jiri Pirko wrote:
>> Wed, Nov 22, 2023 at 10:35:46AM CET, leon@kernel.org wrote:
>> >On Wed, Nov 22, 2023 at 10:13:45AM +0100, Jiri Pirko wrote:
>> >> Wed, Nov 22, 2023 at 02:47:58AM CET, saeed@kernel.org wrote:
>> >> >From: Jianbo Liu <jianbol@nvidia.com>

[...]


>while we are in eswitch. It is skipped in lines 1556-1558:
>
>  1548 static void
>  1549 mlx5e_vport_rep_unload(struct mlx5_eswitch_rep *rep)
>  1550 {
>  1551         struct mlx5e_rep_priv *rpriv = mlx5e_rep_to_rep_priv(rep);
>  1552         struct net_device *netdev = rpriv->netdev;
>  1553         struct mlx5e_priv *priv = netdev_priv(netdev);
>  1554         void *ppriv = priv->ppriv;
>  1555
>  1556         if (rep->vport == MLX5_VPORT_UPLINK) {
>  1557                 mlx5e_vport_uplink_rep_unload(rpriv);
>  1558                 goto free_ppriv;
>  1559         }

Uplink netdev is created and destroyed by a different code:
mlx5e_probe()
mlx5e_remove()

According to my testing. The uplink netdev is properly removed and
re-added during reload-reinit. What am I missing?



>  1560
>  1561         unregister_netdev(netdev);
>  1562         mlx5e_rep_vnic_reporter_destroy(priv);
>  1563         mlx5e_detach_netdev(priv);
>  1564         priv->profile->cleanup(priv);
>  1565         mlx5e_destroy_netdev(priv);
>  1566 free_ppriv:
>  1567         kvfree(ppriv); /* mlx5e_rep_priv */
>  1568 }
>

[...]
Leon Romanovsky Nov. 28, 2023, 4:08 p.m. UTC | #10
On Tue, Nov 28, 2023 at 01:02:46PM +0100, Jiri Pirko wrote:
> Wed, Nov 22, 2023 at 12:28:32PM CET, leon@kernel.org wrote:
> >On Wed, Nov 22, 2023 at 10:50:37AM +0100, Jiri Pirko wrote:
> >> Wed, Nov 22, 2023 at 10:35:46AM CET, leon@kernel.org wrote:
> >> >On Wed, Nov 22, 2023 at 10:13:45AM +0100, Jiri Pirko wrote:
> >> >> Wed, Nov 22, 2023 at 02:47:58AM CET, saeed@kernel.org wrote:
> >> >> >From: Jianbo Liu <jianbol@nvidia.com>
> 
> [...]
> 
> 
> >while we are in eswitch. It is skipped in lines 1556-1558:
> >
> >  1548 static void
> >  1549 mlx5e_vport_rep_unload(struct mlx5_eswitch_rep *rep)
> >  1550 {
> >  1551         struct mlx5e_rep_priv *rpriv = mlx5e_rep_to_rep_priv(rep);
> >  1552         struct net_device *netdev = rpriv->netdev;
> >  1553         struct mlx5e_priv *priv = netdev_priv(netdev);
> >  1554         void *ppriv = priv->ppriv;
> >  1555
> >  1556         if (rep->vport == MLX5_VPORT_UPLINK) {
> >  1557                 mlx5e_vport_uplink_rep_unload(rpriv);
> >  1558                 goto free_ppriv;
> >  1559         }
> 
> Uplink netdev is created and destroyed by a different code:
> mlx5e_probe()
> mlx5e_remove()
> 
> According to my testing. The uplink netdev is properly removed and
> re-added during reload-reinit. What am I missing?

You are missing internal profile switch from eswitch to legacy,
when you perform driver unload.

Feel free to contact me or Jianbo offline if you need more mlx5 specific
details.

Thanks

> 
> 
> 
> >  1560
> >  1561         unregister_netdev(netdev);
> >  1562         mlx5e_rep_vnic_reporter_destroy(priv);
> >  1563         mlx5e_detach_netdev(priv);
> >  1564         priv->profile->cleanup(priv);
> >  1565         mlx5e_destroy_netdev(priv);
> >  1566 free_ppriv:
> >  1567         kvfree(ppriv); /* mlx5e_rep_priv */
> >  1568 }
> >
> 
> [...]
> 
>
Jiri Pirko Nov. 29, 2023, 2:51 p.m. UTC | #11
Tue, Nov 28, 2023 at 05:08:49PM CET, leon@kernel.org wrote:
>On Tue, Nov 28, 2023 at 01:02:46PM +0100, Jiri Pirko wrote:
>> Wed, Nov 22, 2023 at 12:28:32PM CET, leon@kernel.org wrote:
>> >On Wed, Nov 22, 2023 at 10:50:37AM +0100, Jiri Pirko wrote:
>> >> Wed, Nov 22, 2023 at 10:35:46AM CET, leon@kernel.org wrote:
>> >> >On Wed, Nov 22, 2023 at 10:13:45AM +0100, Jiri Pirko wrote:
>> >> >> Wed, Nov 22, 2023 at 02:47:58AM CET, saeed@kernel.org wrote:
>> >> >> >From: Jianbo Liu <jianbol@nvidia.com>
>> 
>> [...]
>> 
>> 
>> >while we are in eswitch. It is skipped in lines 1556-1558:
>> >
>> >  1548 static void
>> >  1549 mlx5e_vport_rep_unload(struct mlx5_eswitch_rep *rep)
>> >  1550 {
>> >  1551         struct mlx5e_rep_priv *rpriv = mlx5e_rep_to_rep_priv(rep);
>> >  1552         struct net_device *netdev = rpriv->netdev;
>> >  1553         struct mlx5e_priv *priv = netdev_priv(netdev);
>> >  1554         void *ppriv = priv->ppriv;
>> >  1555
>> >  1556         if (rep->vport == MLX5_VPORT_UPLINK) {
>> >  1557                 mlx5e_vport_uplink_rep_unload(rpriv);
>> >  1558                 goto free_ppriv;
>> >  1559         }
>> 
>> Uplink netdev is created and destroyed by a different code:
>> mlx5e_probe()
>> mlx5e_remove()
>> 
>> According to my testing. The uplink netdev is properly removed and
>> re-added during reload-reinit. What am I missing?
>
>You are missing internal profile switch from eswitch to legacy,
>when you perform driver unload.
>
>Feel free to contact me or Jianbo offline if you need more mlx5 specific
>details.

Got it. But that switch can happend independently of devlink reload
reinit. Also, I think it cause more issues than just abandoned
ipsec rules.


>
>Thanks
>
>> 
>> 
>> 
>> >  1560
>> >  1561         unregister_netdev(netdev);
>> >  1562         mlx5e_rep_vnic_reporter_destroy(priv);
>> >  1563         mlx5e_detach_netdev(priv);
>> >  1564         priv->profile->cleanup(priv);
>> >  1565         mlx5e_destroy_netdev(priv);
>> >  1566 free_ppriv:
>> >  1567         kvfree(ppriv); /* mlx5e_rep_priv */
>> >  1568 }
>> >
>> 
>> [...]
>> 
>>
Leon Romanovsky Dec. 4, 2023, 5:05 p.m. UTC | #12
On Wed, Nov 29, 2023 at 03:51:14PM +0100, Jiri Pirko wrote:
> Tue, Nov 28, 2023 at 05:08:49PM CET, leon@kernel.org wrote:
> >On Tue, Nov 28, 2023 at 01:02:46PM +0100, Jiri Pirko wrote:
> >> Wed, Nov 22, 2023 at 12:28:32PM CET, leon@kernel.org wrote:
> >> >On Wed, Nov 22, 2023 at 10:50:37AM +0100, Jiri Pirko wrote:
> >> >> Wed, Nov 22, 2023 at 10:35:46AM CET, leon@kernel.org wrote:
> >> >> >On Wed, Nov 22, 2023 at 10:13:45AM +0100, Jiri Pirko wrote:
> >> >> >> Wed, Nov 22, 2023 at 02:47:58AM CET, saeed@kernel.org wrote:
> >> >> >> >From: Jianbo Liu <jianbol@nvidia.com>
> >> 
> >> [...]
> >> 
> >> 
> >> >while we are in eswitch. It is skipped in lines 1556-1558:
> >> >
> >> >  1548 static void
> >> >  1549 mlx5e_vport_rep_unload(struct mlx5_eswitch_rep *rep)
> >> >  1550 {
> >> >  1551         struct mlx5e_rep_priv *rpriv = mlx5e_rep_to_rep_priv(rep);
> >> >  1552         struct net_device *netdev = rpriv->netdev;
> >> >  1553         struct mlx5e_priv *priv = netdev_priv(netdev);
> >> >  1554         void *ppriv = priv->ppriv;
> >> >  1555
> >> >  1556         if (rep->vport == MLX5_VPORT_UPLINK) {
> >> >  1557                 mlx5e_vport_uplink_rep_unload(rpriv);
> >> >  1558                 goto free_ppriv;
> >> >  1559         }
> >> 
> >> Uplink netdev is created and destroyed by a different code:
> >> mlx5e_probe()
> >> mlx5e_remove()
> >> 
> >> According to my testing. The uplink netdev is properly removed and
> >> re-added during reload-reinit. What am I missing?
> >
> >You are missing internal profile switch from eswitch to legacy,
> >when you perform driver unload.
> >
> >Feel free to contact me or Jianbo offline if you need more mlx5 specific
> >details.
> 
> Got it. But that switch can happend independently of devlink reload
> reinit. 

Right, devlink reload was relatively easy "to close" and users would see
the reason for it.


> Also, I think it cause more issues than just abandoned ipsec rules.

Yes, it is.

> 
> 
> >
> >Thanks
> >
> >> 
> >> 
> >> 
> >> >  1560
> >> >  1561         unregister_netdev(netdev);
> >> >  1562         mlx5e_rep_vnic_reporter_destroy(priv);
> >> >  1563         mlx5e_detach_netdev(priv);
> >> >  1564         priv->profile->cleanup(priv);
> >> >  1565         mlx5e_destroy_netdev(priv);
> >> >  1566 free_ppriv:
> >> >  1567         kvfree(ppriv); /* mlx5e_rep_priv */
> >> >  1568 }
> >> >
> >> 
> >> [...]
> >> 
> >>
Jiri Pirko Dec. 5, 2023, 9:30 a.m. UTC | #13
Mon, Dec 04, 2023 at 06:05:38PM CET, leon@kernel.org wrote:
>On Wed, Nov 29, 2023 at 03:51:14PM +0100, Jiri Pirko wrote:
>> Tue, Nov 28, 2023 at 05:08:49PM CET, leon@kernel.org wrote:
>> >On Tue, Nov 28, 2023 at 01:02:46PM +0100, Jiri Pirko wrote:
>> >> Wed, Nov 22, 2023 at 12:28:32PM CET, leon@kernel.org wrote:
>> >> >On Wed, Nov 22, 2023 at 10:50:37AM +0100, Jiri Pirko wrote:
>> >> >> Wed, Nov 22, 2023 at 10:35:46AM CET, leon@kernel.org wrote:
>> >> >> >On Wed, Nov 22, 2023 at 10:13:45AM +0100, Jiri Pirko wrote:
>> >> >> >> Wed, Nov 22, 2023 at 02:47:58AM CET, saeed@kernel.org wrote:
>> >> >> >> >From: Jianbo Liu <jianbol@nvidia.com>
>> >> 
>> >> [...]
>> >> 
>> >> 
>> >> >while we are in eswitch. It is skipped in lines 1556-1558:
>> >> >
>> >> >  1548 static void
>> >> >  1549 mlx5e_vport_rep_unload(struct mlx5_eswitch_rep *rep)
>> >> >  1550 {
>> >> >  1551         struct mlx5e_rep_priv *rpriv = mlx5e_rep_to_rep_priv(rep);
>> >> >  1552         struct net_device *netdev = rpriv->netdev;
>> >> >  1553         struct mlx5e_priv *priv = netdev_priv(netdev);
>> >> >  1554         void *ppriv = priv->ppriv;
>> >> >  1555
>> >> >  1556         if (rep->vport == MLX5_VPORT_UPLINK) {
>> >> >  1557                 mlx5e_vport_uplink_rep_unload(rpriv);
>> >> >  1558                 goto free_ppriv;
>> >> >  1559         }
>> >> 
>> >> Uplink netdev is created and destroyed by a different code:
>> >> mlx5e_probe()
>> >> mlx5e_remove()
>> >> 
>> >> According to my testing. The uplink netdev is properly removed and
>> >> re-added during reload-reinit. What am I missing?
>> >
>> >You are missing internal profile switch from eswitch to legacy,
>> >when you perform driver unload.
>> >
>> >Feel free to contact me or Jianbo offline if you need more mlx5 specific
>> >details.
>> 
>> Got it. But that switch can happend independently of devlink reload
>> reinit. 
>
>Right, devlink reload was relatively easy "to close" and users would see
>the reason for it.

It's a wrong fix. We should fix this properly.

>
>
>> Also, I think it cause more issues than just abandoned ipsec rules.
>
>Yes, it is.
>
>> 
>> 
>> >
>> >Thanks
>> >
>> >> 
>> >> 
>> >> 
>> >> >  1560
>> >> >  1561         unregister_netdev(netdev);
>> >> >  1562         mlx5e_rep_vnic_reporter_destroy(priv);
>> >> >  1563         mlx5e_detach_netdev(priv);
>> >> >  1564         priv->profile->cleanup(priv);
>> >> >  1565         mlx5e_destroy_netdev(priv);
>> >> >  1566 free_ppriv:
>> >> >  1567         kvfree(ppriv); /* mlx5e_rep_priv */
>> >> >  1568 }
>> >> >
>> >> 
>> >> [...]
>> >> 
>> >>
diff mbox series

Patch

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
index 3e064234f6fe..8925e87a3ed5 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
@@ -157,6 +157,11 @@  static int mlx5_devlink_reload_down(struct devlink *devlink, bool netns_change,
 		return -EOPNOTSUPP;
 	}
 
+	if (mlx5_eswitch_mode_is_blocked(dev)) {
+		NL_SET_ERR_MSG_MOD(extack, "reload is unsupported if IPSec rules are configured");
+		return -EOPNOTSUPP;
+	}
+
 	if (mlx5_core_is_pf(dev) && pci_num_vf(pdev))
 		NL_SET_ERR_MSG_MOD(extack, "reload while VFs are present is unfavorable");
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
index b674b57d05aa..88524c2a4355 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
@@ -846,6 +846,7 @@  void mlx5_eswitch_unblock_encap(struct mlx5_core_dev *dev);
 
 int mlx5_eswitch_block_mode(struct mlx5_core_dev *dev);
 void mlx5_eswitch_unblock_mode(struct mlx5_core_dev *dev);
+bool mlx5_eswitch_mode_is_blocked(struct mlx5_core_dev *dev);
 
 static inline int mlx5_eswitch_num_vfs(struct mlx5_eswitch *esw)
 {
@@ -947,6 +948,7 @@  static inline void mlx5_eswitch_unblock_encap(struct mlx5_core_dev *dev)
 
 static inline int mlx5_eswitch_block_mode(struct mlx5_core_dev *dev) { return 0; }
 static inline void mlx5_eswitch_unblock_mode(struct mlx5_core_dev *dev) {}
+static inline bool mlx5_eswitch_mode_is_blocked(struct mlx5_core_dev *dev) { return false; }
 static inline bool mlx5_eswitch_block_ipsec(struct mlx5_core_dev *dev)
 {
 	return false;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index bf78eeca401b..85c2a20e68fa 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -3693,6 +3693,21 @@  void mlx5_eswitch_unblock_mode(struct mlx5_core_dev *dev)
 	up_write(&esw->mode_lock);
 }
 
+bool mlx5_eswitch_mode_is_blocked(struct mlx5_core_dev *dev)
+{
+	struct mlx5_eswitch *esw = dev->priv.eswitch;
+	bool blocked;
+
+	if (!mlx5_esw_allowed(esw))
+		return false;
+
+	down_write(&esw->mode_lock);
+	blocked = esw->offloads.num_block_mode;
+	up_write(&esw->mode_lock);
+
+	return blocked;
+}
+
 int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode,
 				  struct netlink_ext_ack *extack)
 {