@@ -1458,8 +1458,6 @@ int mlx5_eswitch_enable_locked(struct mlx5_eswitch *esw, int mode, int num_vfs)
esw->mode = mode;
- mlx5_lag_update(esw->dev);
-
if (mode == MLX5_ESWITCH_LEGACY) {
err = esw_legacy_enable(esw);
} else {
@@ -1506,6 +1504,7 @@ int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int num_vfs)
if (!mlx5_esw_allowed(esw))
return 0;
+ mlx5_lag_disable_change(esw->dev);
down_write(&esw->mode_lock);
if (esw->mode == MLX5_ESWITCH_NONE) {
ret = mlx5_eswitch_enable_locked(esw, MLX5_ESWITCH_LEGACY, num_vfs);
@@ -1519,6 +1518,7 @@ int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int num_vfs)
esw->esw_funcs.num_vfs = num_vfs;
}
up_write(&esw->mode_lock);
+ mlx5_lag_enable_change(esw->dev);
return ret;
}
@@ -1550,8 +1550,6 @@ void mlx5_eswitch_disable_locked(struct mlx5_eswitch *esw, bool clear_vf)
old_mode = esw->mode;
esw->mode = MLX5_ESWITCH_NONE;
- mlx5_lag_update(esw->dev);
-
if (old_mode == MLX5_ESWITCH_OFFLOADS)
mlx5_rescan_drivers(esw->dev);
@@ -1567,10 +1565,12 @@ void mlx5_eswitch_disable(struct mlx5_eswitch *esw, bool clear_vf)
if (!mlx5_esw_allowed(esw))
return;
+ mlx5_lag_disable_change(esw->dev);
down_write(&esw->mode_lock);
mlx5_eswitch_disable_locked(esw, clear_vf);
esw->esw_funcs.num_vfs = 0;
up_write(&esw->mode_lock);
+ mlx5_lag_enable_change(esw->dev);
}
static int mlx5_query_hca_cap_host_pf(struct mlx5_core_dev *dev, void *out)
@@ -1759,7 +1759,9 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev)
ida_init(&esw->offloads.vport_metadata_ida);
xa_init_flags(&esw->offloads.vhca_map, XA_FLAGS_ALLOC);
mutex_init(&esw->state_lock);
+ lockdep_register_key(&esw->mode_lock_key);
init_rwsem(&esw->mode_lock);
+ lockdep_set_class(&esw->mode_lock, &esw->mode_lock_key);
esw->enabled_vports = 0;
esw->mode = MLX5_ESWITCH_NONE;
@@ -1793,6 +1795,7 @@ void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw)
esw->dev->priv.eswitch = NULL;
destroy_workqueue(esw->work_queue);
+ lockdep_unregister_key(&esw->mode_lock_key);
mutex_destroy(&esw->state_lock);
WARN_ON(!xa_empty(&esw->offloads.vhca_map));
xa_destroy(&esw->offloads.vhca_map);
@@ -2366,9 +2369,22 @@ int mlx5_esw_try_lock(struct mlx5_eswitch *esw)
*/
void mlx5_esw_unlock(struct mlx5_eswitch *esw)
{
+ if (!mlx5_esw_allowed(esw))
+ return;
up_write(&esw->mode_lock);
}
+/**
+ * mlx5_esw_lock() - Take write lock on esw mode lock
+ * @esw: eswitch device.
+ */
+void mlx5_esw_lock(struct mlx5_eswitch *esw)
+{
+ if (!mlx5_esw_allowed(esw))
+ return;
+ down_write(&esw->mode_lock);
+}
+
/**
* mlx5_eswitch_get_total_vports - Get total vports of the eswitch
*
@@ -323,6 +323,7 @@ struct mlx5_eswitch {
u32 large_group_num;
} params;
struct blocking_notifier_head n_head;
+ struct lock_class_key mode_lock_key;
};
void esw_offloads_disable(struct mlx5_eswitch *esw);
@@ -707,6 +708,7 @@ void mlx5_esw_get(struct mlx5_core_dev *dev);
void mlx5_esw_put(struct mlx5_core_dev *dev);
int mlx5_esw_try_lock(struct mlx5_eswitch *esw);
void mlx5_esw_unlock(struct mlx5_eswitch *esw);
+void mlx5_esw_lock(struct mlx5_eswitch *esw);
void esw_vport_change_handle_locked(struct mlx5_vport *vport);
@@ -727,6 +729,9 @@ static inline const u32 *mlx5_esw_query_functions(struct mlx5_core_dev *dev)
return ERR_PTR(-EOPNOTSUPP);
}
+static inline void mlx5_esw_unlock(struct mlx5_eswitch *esw) { return; }
+static inline void mlx5_esw_lock(struct mlx5_eswitch *esw) { return; }
+
static inline struct mlx5_flow_handle *
esw_add_restore_rule(struct mlx5_eswitch *esw, u32 tag)
{
@@ -3051,10 +3051,11 @@ int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode,
if (esw_mode_from_devlink(mode, &mlx5_mode))
return -EINVAL;
+ mlx5_lag_disable_change(esw->dev);
err = mlx5_esw_try_lock(esw);
if (err < 0) {
NL_SET_ERR_MSG_MOD(extack, "Can't change mode, E-Switch is busy");
- return err;
+ goto enable_lag;
}
cur_mlx5_mode = err;
err = 0;
@@ -3071,6 +3072,8 @@ int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode,
unlock:
mlx5_esw_unlock(esw);
+enable_lag:
+ mlx5_lag_enable_change(esw->dev);
return err;
}
@@ -418,21 +418,48 @@ static void mlx5_queue_bond_work(struct mlx5_lag *ldev, unsigned long delay)
queue_delayed_work(ldev->wq, &ldev->bond_work, delay);
}
+static void mlx5_lag_lock_eswitches(struct mlx5_core_dev *dev0,
+ struct mlx5_core_dev *dev1)
+{
+ if (dev0)
+ mlx5_esw_lock(dev0->priv.eswitch);
+ if (dev1)
+ mlx5_esw_lock(dev1->priv.eswitch);
+}
+
+static void mlx5_lag_unlock_eswitches(struct mlx5_core_dev *dev0,
+ struct mlx5_core_dev *dev1)
+{
+ if (dev1)
+ mlx5_esw_unlock(dev1->priv.eswitch);
+ if (dev0)
+ mlx5_esw_unlock(dev0->priv.eswitch);
+}
+
static void mlx5_do_bond_work(struct work_struct *work)
{
struct delayed_work *delayed_work = to_delayed_work(work);
struct mlx5_lag *ldev = container_of(delayed_work, struct mlx5_lag,
bond_work);
+ struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
+ struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev;
int status;
status = mlx5_dev_list_trylock();
if (!status) {
- /* 1 sec delay. */
mlx5_queue_bond_work(ldev, HZ);
return;
}
+ if (ldev->mode_changes_in_progress) {
+ mlx5_dev_list_unlock();
+ mlx5_queue_bond_work(ldev, HZ);
+ return;
+ }
+
+ mlx5_lag_lock_eswitches(dev0, dev1);
mlx5_do_bond(ldev);
+ mlx5_lag_unlock_eswitches(dev0, dev1);
mlx5_dev_list_unlock();
}
@@ -630,7 +657,7 @@ static void mlx5_ldev_remove_mdev(struct mlx5_lag *ldev,
}
/* Must be called with intf_mutex held */
-static void __mlx5_lag_dev_add_mdev(struct mlx5_core_dev *dev)
+static int __mlx5_lag_dev_add_mdev(struct mlx5_core_dev *dev)
{
struct mlx5_lag *ldev = NULL;
struct mlx5_core_dev *tmp_dev;
@@ -638,7 +665,7 @@ static void __mlx5_lag_dev_add_mdev(struct mlx5_core_dev *dev)
if (!MLX5_CAP_GEN(dev, vport_group_manager) ||
!MLX5_CAP_GEN(dev, lag_master) ||
MLX5_CAP_GEN(dev, num_lag_ports) != MLX5_MAX_PORTS)
- return;
+ return 0;
tmp_dev = mlx5_get_next_phys_dev(dev);
if (tmp_dev)
@@ -648,15 +675,17 @@ static void __mlx5_lag_dev_add_mdev(struct mlx5_core_dev *dev)
ldev = mlx5_lag_dev_alloc(dev);
if (!ldev) {
mlx5_core_err(dev, "Failed to alloc lag dev\n");
- return;
+ return 0;
}
} else {
+ if (ldev->mode_changes_in_progress)
+ return -EAGAIN;
mlx5_ldev_get(ldev);
}
mlx5_ldev_add_mdev(ldev, dev);
- return;
+ return 0;
}
void mlx5_lag_remove_mdev(struct mlx5_core_dev *dev)
@@ -667,7 +696,13 @@ void mlx5_lag_remove_mdev(struct mlx5_core_dev *dev)
if (!ldev)
return;
+recheck:
mlx5_dev_list_lock();
+ if (ldev->mode_changes_in_progress) {
+ mlx5_dev_list_unlock();
+ msleep(100);
+ goto recheck;
+ }
mlx5_ldev_remove_mdev(ldev, dev);
mlx5_dev_list_unlock();
mlx5_ldev_put(ldev);
@@ -675,8 +710,16 @@ void mlx5_lag_remove_mdev(struct mlx5_core_dev *dev)
void mlx5_lag_add_mdev(struct mlx5_core_dev *dev)
{
+ int err;
+
+recheck:
mlx5_dev_list_lock();
- __mlx5_lag_dev_add_mdev(dev);
+ err = __mlx5_lag_dev_add_mdev(dev);
+ if (err) {
+ mlx5_dev_list_unlock();
+ msleep(100);
+ goto recheck;
+ }
mlx5_dev_list_unlock();
}
@@ -716,6 +759,7 @@ void mlx5_lag_add_netdev(struct mlx5_core_dev *dev,
if (i >= MLX5_MAX_PORTS)
ldev->flags |= MLX5_LAG_FLAG_READY;
+ mlx5_queue_bond_work(ldev, 0);
}
bool mlx5_lag_is_roce(struct mlx5_core_dev *dev)
@@ -789,19 +833,36 @@ bool mlx5_lag_is_shared_fdb(struct mlx5_core_dev *dev)
}
EXPORT_SYMBOL(mlx5_lag_is_shared_fdb);
-void mlx5_lag_update(struct mlx5_core_dev *dev)
+void mlx5_lag_disable_change(struct mlx5_core_dev *dev)
{
+ struct mlx5_core_dev *dev0;
+ struct mlx5_core_dev *dev1;
struct mlx5_lag *ldev;
mlx5_dev_list_lock();
+
ldev = mlx5_lag_dev(dev);
- if (!ldev)
- goto unlock;
+ dev0 = ldev->pf[MLX5_LAG_P1].dev;
+ dev1 = ldev->pf[MLX5_LAG_P2].dev;
- mlx5_do_bond(ldev);
+ ldev->mode_changes_in_progress++;
+ if (__mlx5_lag_is_active(ldev)) {
+ mlx5_lag_lock_eswitches(dev0, dev1);
+ mlx5_disable_lag(ldev);
+ mlx5_lag_unlock_eswitches(dev0, dev1);
+ }
+ mlx5_dev_list_unlock();
+}
-unlock:
+void mlx5_lag_enable_change(struct mlx5_core_dev *dev)
+{
+ struct mlx5_lag *ldev;
+
+ mlx5_dev_list_lock();
+ ldev = mlx5_lag_dev(dev);
+ ldev->mode_changes_in_progress--;
mlx5_dev_list_unlock();
+ mlx5_queue_bond_work(ldev, 0);
}
struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev)
@@ -39,6 +39,7 @@ struct lag_tracker {
*/
struct mlx5_lag {
u8 flags;
+ int mode_changes_in_progress;
bool shared_fdb;
u8 v2p_map[MLX5_MAX_PORTS];
struct kref ref;
@@ -1179,6 +1179,7 @@ static int mlx5_load(struct mlx5_core_dev *dev)
goto err_ec;
}
+ mlx5_lag_add_mdev(dev);
err = mlx5_sriov_attach(dev);
if (err) {
mlx5_core_err(dev, "sriov init failed %d\n", err);
@@ -1186,11 +1187,11 @@ static int mlx5_load(struct mlx5_core_dev *dev)
}
mlx5_sf_dev_table_create(dev);
- mlx5_lag_add_mdev(dev);
return 0;
err_sriov:
+ mlx5_lag_remove_mdev(dev);
mlx5_ec_cleanup(dev);
err_ec:
mlx5_sf_hw_table_destroy(dev);
@@ -1222,9 +1223,9 @@ static int mlx5_load(struct mlx5_core_dev *dev)
static void mlx5_unload(struct mlx5_core_dev *dev)
{
- mlx5_lag_remove_mdev(dev);
mlx5_sf_dev_table_destroy(dev);
mlx5_sriov_detach(dev);
+ mlx5_lag_remove_mdev(dev);
mlx5_ec_cleanup(dev);
mlx5_sf_hw_table_destroy(dev);
mlx5_vhca_event_stop(dev);
@@ -168,6 +168,8 @@ void mlx5_lag_add_netdev(struct mlx5_core_dev *dev, struct net_device *netdev);
void mlx5_lag_remove_netdev(struct mlx5_core_dev *dev, struct net_device *netdev);
void mlx5_lag_add_mdev(struct mlx5_core_dev *dev);
void mlx5_lag_remove_mdev(struct mlx5_core_dev *dev);
+void mlx5_lag_disable_change(struct mlx5_core_dev *dev);
+void mlx5_lag_enable_change(struct mlx5_core_dev *dev);
int mlx5_events_init(struct mlx5_core_dev *dev);
void mlx5_events_cleanup(struct mlx5_core_dev *dev);