diff mbox series

[net-next,5/6] mlxsw: Add VXLAN bridge ports to same hardware domain as physical bridge ports

Message ID 7279056843140fae3a72c2d204c7886b79d03899.1742224300.git.petrm@nvidia.com (mailing list archive)
State New
Delegated to: Netdev Maintainers
Headers show
Series mlxsw: Add VXLAN to the same hardware domain as physical bridge ports | expand

Commit Message

Petr Machata March 17, 2025, 5:37 p.m. UTC
From: Amit Cohen <amcohen@nvidia.com>

When hardware floods packets to bridge ports, but flooding to VXLAN bridge
port fails during encapsulation to one of the remote VTEPs, the packets are
trapped to CPU. In such case, the packets are marked with
skb->offload_fwd_mark, which means that packet was L2-forwarded in
hardware. Software data path repeats flooding, but packets which are
marked with skb->offload_fwd_mark will not be flooded by the bridge to
bridge ports which are in the same hardware domain as the ingress port.

Currently, mlxsw does not add VXLAN bridge ports to the same hardware
domain as physical bridge ports despite the fact that the device is able
to forward packets to and from VXLAN tunnels in hardware. In some scenarios
(as mentioned above) this can result in remote VTEPs receiving duplicate
packets. The packets are first flooded by hardware and after an
encapsulation failure, they are flooded again to all remote VTEPs by
software.

Solve this by adding VXLAN bridge ports to the same hardware domain as
physical bridge ports, so then nbp_switchdev_allowed_egress() will return
false also for VXLAN, and packets will not be sent twice from VXLAN device.

switchdev_bridge_port_offload() should get vxlan_dev not as const, so
some changes are required. Call switchdev API from
mlxsw_sp_bridge_vxlan_{join,leave}() which handle offload configurations.

Reported-by: Vladimir Oltean <olteanv@gmail.com>
Closes: https://lore.kernel.org/all/20250210152246.4ajumdchwhvbarik@skbuf/
Reported-by: Vladyslav Mykhaliuk <vmykhaliuk@nvidia.com>
Signed-off-by: Amit Cohen <amcohen@nvidia.com>
Reviewed-by: Petr Machata <petrm@nvidia.com>
Reviewed-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: Petr Machata <petrm@nvidia.com>
---
 .../net/ethernet/mellanox/mlxsw/spectrum.h    |  4 +--
 .../mellanox/mlxsw/spectrum_switchdev.c       | 28 ++++++++++++++++---
 2 files changed, 26 insertions(+), 6 deletions(-)
diff mbox series

Patch

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
index fa7082ee5183..37cd1d002b3b 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
@@ -661,10 +661,10 @@  bool mlxsw_sp_bridge_device_is_offloaded(const struct mlxsw_sp *mlxsw_sp,
 					 const struct net_device *br_dev);
 int mlxsw_sp_bridge_vxlan_join(struct mlxsw_sp *mlxsw_sp,
 			       const struct net_device *br_dev,
-			       const struct net_device *vxlan_dev, u16 vid,
+			       struct net_device *vxlan_dev, u16 vid,
 			       struct netlink_ext_ack *extack);
 void mlxsw_sp_bridge_vxlan_leave(struct mlxsw_sp *mlxsw_sp,
-				 const struct net_device *vxlan_dev);
+				 struct net_device *vxlan_dev);
 extern struct notifier_block mlxsw_sp_switchdev_notifier;
 
 /* spectrum.c */
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
index 13ad4e31d701..a48bf342084d 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
@@ -2950,22 +2950,42 @@  static void __mlxsw_sp_bridge_vxlan_leave(struct mlxsw_sp *mlxsw_sp,
 
 int mlxsw_sp_bridge_vxlan_join(struct mlxsw_sp *mlxsw_sp,
 			       const struct net_device *br_dev,
-			       const struct net_device *vxlan_dev, u16 vid,
+			       struct net_device *vxlan_dev, u16 vid,
 			       struct netlink_ext_ack *extack)
 {
 	struct mlxsw_sp_bridge_device *bridge_device;
+	struct mlxsw_sp_port *mlxsw_sp_port;
+	int err;
 
 	bridge_device = mlxsw_sp_bridge_device_find(mlxsw_sp->bridge, br_dev);
 	if (WARN_ON(!bridge_device))
 		return -EINVAL;
 
-	return bridge_device->ops->vxlan_join(bridge_device, vxlan_dev, vid,
-					      extack);
+	mlxsw_sp_port = mlxsw_sp_port_dev_lower_find(bridge_device->dev);
+	if (!mlxsw_sp_port)
+		return -EINVAL;
+
+	err = bridge_device->ops->vxlan_join(bridge_device, vxlan_dev, vid,
+					     extack);
+	if (err)
+		return err;
+
+	err = switchdev_bridge_port_offload(vxlan_dev, mlxsw_sp_port->dev,
+					    NULL, NULL, NULL, false, extack);
+	if (err)
+		goto err_bridge_port_offload;
+
+	return 0;
+
+err_bridge_port_offload:
+	__mlxsw_sp_bridge_vxlan_leave(mlxsw_sp, vxlan_dev);
+	return err;
 }
 
 void mlxsw_sp_bridge_vxlan_leave(struct mlxsw_sp *mlxsw_sp,
-				 const struct net_device *vxlan_dev)
+				 struct net_device *vxlan_dev)
 {
+	switchdev_bridge_port_unoffload(vxlan_dev, NULL, NULL, NULL);
 	__mlxsw_sp_bridge_vxlan_leave(mlxsw_sp, vxlan_dev);
 }