@@ -155,7 +155,9 @@ struct mlx5_esw_sched_node {
enum sched_node_type type;
/* The eswitch this node belongs to. */
struct mlx5_eswitch *esw;
- /* The children nodes of this node, empty list for leaf nodes. */
+ /* The children nodes of this node, empty list for leaf nodes.
+ * Can be from multiple E-Switches.
+ */
struct list_head children;
/* Valid only if this node is associated with a vport. */
struct mlx5_vport *vport;
@@ -471,6 +473,7 @@ static int esw_qos_vport_create_sched_element(struct mlx5_esw_sched_node *vport_
{
u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
struct mlx5_core_dev *dev = vport_node->esw->dev;
+ struct mlx5_vport *vport = vport_node->vport;
void *attr;
if (!mlx5_qos_element_type_supported(dev,
@@ -481,7 +484,13 @@ static int esw_qos_vport_create_sched_element(struct mlx5_esw_sched_node *vport_
MLX5_SET(scheduling_context, sched_ctx, element_type,
SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT);
attr = MLX5_ADDR_OF(scheduling_context, sched_ctx, element_attributes);
- MLX5_SET(vport_element, attr, vport_number, vport_node->vport->vport);
+ MLX5_SET(vport_element, attr, vport_number, vport->vport);
+ if (vport->dev != dev) {
+ /* The port is assigned to a node on another eswitch. */
+ MLX5_SET(vport_element, attr, eswitch_owner_vhca_id_valid, true);
+ MLX5_SET(vport_element, attr, eswitch_owner_vhca_id,
+ MLX5_CAP_GEN(vport->dev, vhca_id));
+ }
MLX5_SET(scheduling_context, sched_ctx, parent_element_id, vport_node->parent->ix);
MLX5_SET(scheduling_context, sched_ctx, max_average_bw, vport_node->max_rate);
@@ -494,6 +503,7 @@ static int esw_qos_vport_tc_create_sched_element(struct mlx5_esw_sched_node *vpo
{
u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
struct mlx5_core_dev *dev = vport_tc_node->esw->dev;
+ struct mlx5_vport *vport = vport_tc_node->vport;
void *attr;
if (!mlx5_qos_element_type_supported(dev,
@@ -504,9 +514,15 @@ static int esw_qos_vport_tc_create_sched_element(struct mlx5_esw_sched_node *vpo
MLX5_SET(scheduling_context, sched_ctx, element_type,
SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT_TC);
attr = MLX5_ADDR_OF(scheduling_context, sched_ctx, element_attributes);
- MLX5_SET(vport_tc_element, attr, vport_number, vport_tc_node->vport->vport);
+ MLX5_SET(vport_tc_element, attr, vport_number, vport->vport);
MLX5_SET(vport_tc_element, attr, traffic_class, vport_tc_node->tc);
MLX5_SET(scheduling_context, sched_ctx, max_bw_obj_id, rate_limit_elem_ix);
+ if (vport->dev != dev) {
+ /* The port is assigned to a node on another eswitch. */
+ MLX5_SET(vport_tc_element, attr, eswitch_owner_vhca_id_valid, true);
+ MLX5_SET(vport_tc_element, attr, eswitch_owner_vhca_id,
+ MLX5_CAP_GEN(vport->dev, vhca_id));
+ }
MLX5_SET(scheduling_context, sched_ctx, parent_element_id, vport_tc_node->parent->ix);
MLX5_SET(scheduling_context, sched_ctx, bw_share, vport_tc_node->bw_share);
@@ -947,7 +963,6 @@ static int esw_qos_vport_tc_enable(struct mlx5_vport *vport, enum sched_node_typ
NL_SET_ERR_MSG_MOD(extack, "Setting up TC Arbiter for a vport is not supported.");
return -EOPNOTSUPP;
}
-
esw_assert_qos_lock_held(vport->dev->priv.eswitch);
if (type == SCHED_NODE_TYPE_RATE_LIMITER)
@@ -1182,6 +1197,26 @@ static int esw_qos_vport_tc_check_type(enum sched_node_type curr_type,
return 0;
}
+static bool esw_qos_validate_unsupported_tc_bw(struct mlx5_eswitch *esw, u32 *tc_bw)
+{
+ int i, num_tcs = esw_qos_num_tcs(esw->dev);
+
+ for (i = num_tcs; i < IEEE_8021QAZ_MAX_TCS; i++) {
+ if (tc_bw[i])
+ return false;
+ }
+
+ return true;
+}
+
+static bool esw_qos_vport_validate_unsupported_tc_bw(struct mlx5_vport *vport, u32 *tc_bw)
+{
+ struct mlx5_eswitch *esw = vport->qos.sched_node ?
+ vport->qos.sched_node->parent->esw : vport->dev->priv.eswitch;
+
+ return esw_qos_validate_unsupported_tc_bw(esw, tc_bw);
+}
+
static int esw_qos_vport_update(struct mlx5_vport *vport, enum sched_node_type type,
struct mlx5_esw_sched_node *parent,
struct netlink_ext_ack *extack)
@@ -1200,8 +1235,14 @@ static int esw_qos_vport_update(struct mlx5_vport *vport, enum sched_node_type t
if (err)
return err;
- if (curr_type == SCHED_NODE_TYPE_TC_ARBITER_TSAR && curr_type == type)
+ if (curr_type == SCHED_NODE_TYPE_TC_ARBITER_TSAR && curr_type == type) {
esw_qos_tc_arbiter_get_bw_shares(vport->qos.sched_node, curr_tc_bw);
+ if (!esw_qos_validate_unsupported_tc_bw(parent->esw, curr_tc_bw)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Unsupported traffic classes on the new device");
+ return -EOPNOTSUPP;
+ }
+ }
esw_qos_vport_disable(vport, extack);
@@ -1519,26 +1560,6 @@ static int esw_qos_devlink_rate_to_mbps(struct mlx5_core_dev *mdev, const char *
return 0;
}
-static bool esw_qos_validate_unsupported_tc_bw(struct mlx5_eswitch *esw, u32 *tc_bw)
-{
- int i, num_tcs = esw_qos_num_tcs(esw->dev);
-
- for (i = num_tcs; i < IEEE_8021QAZ_MAX_TCS; i++) {
- if (tc_bw[i])
- return false;
- }
-
- return true;
-}
-
-static bool esw_qos_vport_validate_unsupported_tc_bw(struct mlx5_vport *vport, u32 *tc_bw)
-{
- struct mlx5_eswitch *esw = vport->qos.sched_node ?
- vport->qos.sched_node->parent->esw : vport->dev->priv.eswitch;
-
- return esw_qos_validate_unsupported_tc_bw(esw, tc_bw);
-}
-
static bool esw_qos_tc_bw_disabled(u32 *tc_bw)
{
int i;
@@ -1553,10 +1574,16 @@ static bool esw_qos_tc_bw_disabled(u32 *tc_bw)
int mlx5_esw_qos_init(struct mlx5_eswitch *esw)
{
- if (esw->qos.domain)
- return 0; /* Nothing to change. */
+ bool use_shared_domain = esw->mode == MLX5_ESWITCH_OFFLOADS &&
+ MLX5_CAP_QOS(esw->dev, esw_cross_esw_sched);
- return esw_qos_domain_init(esw, false);
+ if (esw->qos.domain) {
+ if (esw->qos.domain->shared == use_shared_domain)
+ return 0; /* Nothing to change. */
+ esw_qos_domain_release(esw);
+ }
+
+ return esw_qos_domain_init(esw, use_shared_domain);
}
void mlx5_esw_qos_cleanup(struct mlx5_eswitch *esw)
@@ -1760,16 +1787,40 @@ int mlx5_esw_devlink_rate_node_del(struct devlink_rate *rate_node, void *priv,
return 0;
}
-int mlx5_esw_qos_vport_update_parent(struct mlx5_vport *vport, struct mlx5_esw_sched_node *parent,
- struct netlink_ext_ack *extack)
+static bool mlx5_esw_validate_cross_esw_scheduling(struct mlx5_eswitch *esw,
+ struct mlx5_esw_sched_node *parent,
+ struct netlink_ext_ack *extack)
{
- struct mlx5_eswitch *esw = vport->dev->priv.eswitch;
- int err = 0;
+ if (!parent || esw == parent->esw)
+ return 0;
- if (parent && parent->esw != esw) {
+ if (!MLX5_CAP_QOS(esw->dev, esw_cross_esw_sched)) {
NL_SET_ERR_MSG_MOD(extack, "Cross E-Switch scheduling is not supported");
return -EOPNOTSUPP;
}
+ if (esw->qos.domain != parent->esw->qos.domain) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Cannot add vport to a parent belonging to a different qos domain");
+ return -EOPNOTSUPP;
+ }
+ if (!mlx5_lag_is_active(esw->dev)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Cross E-Switch scheduling requires LAG to be activated");
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
+int mlx5_esw_qos_vport_update_parent(struct mlx5_vport *vport, struct mlx5_esw_sched_node *parent,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5_eswitch *esw = vport->dev->priv.eswitch;
+ int err;
+
+ err = mlx5_esw_validate_cross_esw_scheduling(esw, parent, extack);
+ if (err)
+ return err;
esw_qos_lock(esw);
if (!vport->qos.sched_node && parent) {
@@ -1095,7 +1095,9 @@ struct mlx5_ifc_qos_cap_bits {
u8 log_esw_max_sched_depth[0x4];
u8 reserved_at_10[0x10];
- u8 reserved_at_20[0xb];
+ u8 reserved_at_20[0x9];
+ u8 esw_cross_esw_sched[0x1];
+ u8 reserved_at_2a[0x1];
u8 log_max_qos_nic_queue_group[0x5];
u8 reserved_at_30[0x10];
@@ -4130,13 +4132,16 @@ struct mlx5_ifc_tsar_element_bits {
};
struct mlx5_ifc_vport_element_bits {
- u8 reserved_at_0[0x10];
+ u8 reserved_at_0[0x4];
+ u8 eswitch_owner_vhca_id_valid[0x1];
+ u8 eswitch_owner_vhca_id[0xb];
u8 vport_number[0x10];
};
struct mlx5_ifc_vport_tc_element_bits {
u8 traffic_class[0x4];
- u8 reserved_at_4[0xc];
+ u8 eswitch_owner_vhca_id_valid[0x1];
+ u8 eswitch_owner_vhca_id[0xb];
u8 vport_number[0x10];
};
Up to now, rate groups could only contain vports from the same E-Switch. This patch relaxes that restriction if the device supports it (HCA_CAP.esw_cross_esw_sched == true) and the right conditions are met: - Link AGgregation (LAG) is enabled. - The E-Switches use the same qos domain. This also enables the use of the previously added shared esw qos domains. Issue: 3645895 Change-Id: I282f0ecad258fa2dbe6a49e88cc7bc9a06ccfcce Signed-off-by: Cosmin Ratiu <cratiu@nvidia.com> --- .../net/ethernet/mellanox/mlx5/core/esw/qos.c | 117 +++++++++++++----- include/linux/mlx5/mlx5_ifc.h | 11 +- 2 files changed, 92 insertions(+), 36 deletions(-)