diff mbox series

[rdma-next,2/2] RDMA/mlx5: Send correct port events

Message ID 86a8473d0ccea1b66e59eb86457359be9005cfcb.1692168533.git.leon@kernel.org (mailing list archive)
State Changes Requested
Headers show
Series mlx5 RDMA LAG fixes | expand

Commit Message

Leon Romanovsky Aug. 16, 2023, 6:52 a.m. UTC
From: Mark Bloch <mbloch@nvidia.com>

When operating in switchdev mode and with an active LAG, the function
mlx5_lag_get_roce_netdev() fails to return a valid net device as this
function is designed specifically for RoCE LAGs.

Consequently, this issue resulted in the driver sending incorrect event
reports. To address this, a new API is introduced to properly obtain the
net device. Additionally, some code logic is cleaned up during this
modification.

Signed-off-by: Mark Bloch <mbloch@nvidia.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
---
 drivers/infiniband/hw/mlx5/main.c             | 39 +++++++++++++++----
 .../net/ethernet/mellanox/mlx5/core/lag/lag.c | 29 ++++++++++++++
 include/linux/mlx5/driver.h                   |  2 +
 3 files changed, 62 insertions(+), 8 deletions(-)
diff mbox series

Patch

diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index 215d7b0add8f..8b98200bd94c 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -159,6 +159,29 @@  static struct mlx5_roce *mlx5_get_rep_roce(struct mlx5_ib_dev *dev,
 	return NULL;
 }
 
+static bool mlx5_netdev_send_event(struct mlx5_ib_dev *dev,
+				   struct net_device *ndev,
+				   struct net_device *upper,
+				   struct mlx5_roce *roce)
+{
+	if (!dev->ib_active)
+		return false;
+
+	/* Event is about our upper device */
+	if (upper == ndev)
+		return true;
+
+	/* RDMA device not in lag and not in switchdev */
+	if (!dev->is_rep && !upper && ndev == roce->netdev)
+		return true;
+
+	/* RDMA device in switchdev */
+	if (dev->is_rep && ndev == roce->netdev)
+		return true;
+
+	return false;
+}
+
 static int mlx5_netdev_event(struct notifier_block *this,
 			     unsigned long event, void *ptr)
 {
@@ -200,7 +223,7 @@  static int mlx5_netdev_event(struct notifier_block *this,
 		if (ibdev->lag_active) {
 			struct net_device *lag_ndev;
 
-			lag_ndev = mlx5_lag_get_roce_netdev(mdev);
+			lag_ndev = mlx5_lag_get_netdev(mdev);
 			if (lag_ndev) {
 				upper = netdev_master_upper_dev_get(lag_ndev);
 				dev_put(lag_ndev);
@@ -209,13 +232,13 @@  static int mlx5_netdev_event(struct notifier_block *this,
 			}
 		}
 
-		if (ibdev->is_rep)
+		if (ibdev->is_rep) {
 			roce = mlx5_get_rep_roce(ibdev, ndev, upper, &port_num);
-		if (!roce)
-			return NOTIFY_DONE;
-		if ((upper == ndev ||
-		     ((!upper || ibdev->is_rep) && ndev == roce->netdev)) &&
-		    ibdev->ib_active) {
+			if (!roce)
+				return NOTIFY_DONE;
+		}
+
+		if (mlx5_netdev_send_event(ibdev, ndev, upper, roce)) {
 			struct ib_event ibev = { };
 			enum ib_port_state port_state;
 
@@ -260,7 +283,7 @@  static struct net_device *mlx5_ib_get_netdev(struct ib_device *device,
 	if (!mdev)
 		return NULL;
 
-	if (ibdev->lag_active) {
+	if (!ibdev->is_rep && ibdev->lag_active) {
 		ndev = mlx5_lag_get_roce_netdev(mdev);
 		if (ndev)
 			goto out;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c
index f0a074b2fcdf..83298e9addd3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c
@@ -1498,6 +1498,35 @@  struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev)
 }
 EXPORT_SYMBOL(mlx5_lag_get_roce_netdev);
 
+struct net_device *mlx5_lag_get_netdev(struct mlx5_core_dev *dev)
+{
+	struct net_device *ndev = NULL;
+	struct mlx5_lag *ldev;
+	unsigned long flags;
+	int i;
+
+	spin_lock_irqsave(&lag_lock, flags);
+	ldev = mlx5_lag_dev(dev);
+
+	if (!(ldev && __mlx5_lag_is_active(ldev)))
+		goto unlock;
+
+	for (i = 0; i < ldev->ports; i++) {
+		if (ldev->pf[i].dev == dev) {
+			ndev = ldev->pf[i].netdev;
+			break;
+		}
+	}
+
+	if (ndev)
+		dev_hold(ndev);
+
+unlock:
+	spin_unlock_irqrestore(&lag_lock, flags);
+	return ndev;
+}
+EXPORT_SYMBOL(mlx5_lag_get_netdev);
+
 u8 mlx5_lag_get_slave_port(struct mlx5_core_dev *dev,
 			   struct net_device *slave)
 {
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 25d0528f9219..bc7e3a974f62 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -1160,6 +1160,8 @@  bool mlx5_lag_is_master(struct mlx5_core_dev *dev);
 bool mlx5_lag_is_shared_fdb(struct mlx5_core_dev *dev);
 bool mlx5_lag_is_mpesw(struct mlx5_core_dev *dev);
 struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev);
+
+struct net_device *mlx5_lag_get_netdev(struct mlx5_core_dev *dev);
 u8 mlx5_lag_get_slave_port(struct mlx5_core_dev *dev,
 			   struct net_device *slave);
 int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev,