diff mbox series

[net-next,08/12] net/mlx5e: Support IPsec packet offload for TX in switchdev mode

Message ID daffa9160f0fb3295309d601f67b7364d7768f07.1689064922.git.leonro@nvidia.com (mailing list archive)
State Changes Requested
Delegated to: Netdev Maintainers
Headers show
Series mlx5 IPsec packet offload support in eswitch mode | expand

Checks

Context Check Description
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for net-next
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 1345 this patch: 1345
netdev/cc_maintainers warning 4 maintainers not CCed: raeds@nvidia.com borisp@nvidia.com linux-rdma@vger.kernel.org steffen.klassert@secunet.com
netdev/build_clang success Errors and warnings before: 1364 this patch: 1364
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 1368 this patch: 1368
netdev/checkpatch warning WARNING: line length of 82 exceeds 80 columns WARNING: line length of 84 exceeds 80 columns WARNING: line length of 96 exceeds 80 columns
netdev/kdoc success Errors and warnings before: 1 this patch: 1
netdev/source_inline success Was 0 now: 0

Commit Message

Leon Romanovsky July 11, 2023, 9:29 a.m. UTC
From: Jianbo Liu <jianbol@nvidia.com>

The IPsec encryption is done at the last, so add new prio for IPsec
offload in FDB, and put it just lower than the slow path prio and
higher than the per-vport prio.
Three levels are added for TX. The first one is for ip xfrm policy.
The sa table is created in the second level for ip xfrm state. The
status table is created at the last to count the number of packets
encrypted.
The rules, which forward packets to uplink, are changed to forward
them to IPsec TX tables first. These rules are restored after those
tables are destroyed, which is done immediately when there is no
reference to them, just as what does in legacy mode. The support for
slow path is added here, by refreshing uplink's channels. But, the
handling for TC fast path, which is more complicated, will be added
later. Besides, reg c4 is used instead to match reqid.

Signed-off-by: Jianbo Liu <jianbol@nvidia.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
---
 .../mellanox/mlx5/core/en_accel/ipsec_fs.c    | 70 ++++++++++++++++---
 .../mellanox/mlx5/core/esw/ipsec_fs.c         | 16 +++++
 .../mellanox/mlx5/core/esw/ipsec_fs.h         |  5 ++
 .../net/ethernet/mellanox/mlx5/core/eswitch.h |  1 +
 .../mellanox/mlx5/core/eswitch_offloads.c     | 11 +++
 .../net/ethernet/mellanox/mlx5/core/fs_core.c |  6 ++
 include/linux/mlx5/fs.h                       |  1 +
 7 files changed, 101 insertions(+), 9 deletions(-)
diff mbox series

Patch

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c
index f89174dbed92..a2f3ba811af8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c
@@ -10,6 +10,7 @@ 
 #include "lib/ipsec_fs_roce.h"
 #include "lib/fs_chains.h"
 #include "esw/ipsec_fs.h"
+#include "en_rep.h"
 
 #define NUM_IPSEC_FTE BIT(15)
 #define MLX5_REFORMAT_TYPE_ADD_ESP_TRANSPORT_SIZE 16
@@ -23,6 +24,7 @@  struct mlx5e_ipsec_fc {
 struct mlx5e_ipsec_tx {
 	struct mlx5e_ipsec_ft ft;
 	struct mlx5e_ipsec_miss pol;
+	struct mlx5e_ipsec_miss sa;
 	struct mlx5e_ipsec_rule status;
 	struct mlx5_flow_namespace *ns;
 	struct mlx5e_ipsec_fc *fc;
@@ -550,7 +552,7 @@  static int ipsec_counter_rule_tx(struct mlx5_core_dev *mdev, struct mlx5e_ipsec_
 }
 
 /* IPsec TX flow steering */
-static void tx_destroy(struct mlx5_core_dev *mdev, struct mlx5e_ipsec_tx *tx,
+static void tx_destroy(struct mlx5e_ipsec *ipsec, struct mlx5e_ipsec_tx *tx,
 		       struct mlx5_ipsec_fs *roce)
 {
 	mlx5_ipsec_fs_roce_tx_destroy(roce);
@@ -562,9 +564,13 @@  static void tx_destroy(struct mlx5_core_dev *mdev, struct mlx5e_ipsec_tx *tx,
 		mlx5_destroy_flow_table(tx->ft.pol);
 	}
 
+	if (tx == ipsec->tx_esw) {
+		mlx5_del_flow_rules(tx->sa.rule);
+		mlx5_destroy_flow_group(tx->sa.group);
+	}
 	mlx5_destroy_flow_table(tx->ft.sa);
 	if (tx->allow_tunnel_mode)
-		mlx5_eswitch_unblock_encap(mdev);
+		mlx5_eswitch_unblock_encap(ipsec->mdev);
 	mlx5_del_flow_rules(tx->status.rule);
 	mlx5_destroy_flow_table(tx->ft.status);
 }
@@ -573,6 +579,11 @@  static void ipsec_tx_create_attr_set(struct mlx5e_ipsec *ipsec,
 				     struct mlx5e_ipsec_tx *tx,
 				     struct mlx5e_ipsec_tx_create_attr *attr)
 {
+	if (tx == ipsec->tx_esw) {
+		mlx5_esw_ipsec_tx_create_attr_set(ipsec, attr);
+		return;
+	}
+
 	attr->prio = 0;
 	attr->pol_level = 0;
 	attr->sa_level = 1;
@@ -611,6 +622,15 @@  static int tx_create(struct mlx5e_ipsec *ipsec, struct mlx5e_ipsec_tx *tx,
 	}
 	tx->ft.sa = ft;
 
+	if (tx == ipsec->tx_esw) {
+		dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
+		dest.vport.num = MLX5_VPORT_UPLINK;
+		err = ipsec_miss_create(mdev, tx->ft.sa, &tx->sa, &dest);
+		if (err)
+			goto err_sa_miss;
+		memset(&dest, 0, sizeof(dest));
+	}
+
 	if (mlx5_ipsec_device_caps(mdev) & MLX5_IPSEC_CAP_PRIO) {
 		tx->chains = ipsec_chains_create(
 			mdev, tx->ft.sa, attr.chains_ns, attr.prio, attr.pol_level,
@@ -652,6 +672,11 @@  static int tx_create(struct mlx5e_ipsec *ipsec, struct mlx5e_ipsec_tx *tx,
 		mlx5_destroy_flow_table(tx->ft.pol);
 	}
 err_pol_ft:
+	if (tx == ipsec->tx_esw) {
+		mlx5_del_flow_rules(tx->sa.rule);
+		mlx5_destroy_flow_group(tx->sa.group);
+	}
+err_sa_miss:
 	mlx5_destroy_flow_table(tx->ft.sa);
 err_sa_ft:
 	if (tx->allow_tunnel_mode)
@@ -662,6 +687,25 @@  static int tx_create(struct mlx5e_ipsec *ipsec, struct mlx5e_ipsec_tx *tx,
 	return err;
 }
 
+static void ipsec_esw_tx_ft_policy_set(struct mlx5_core_dev *mdev,
+				       struct mlx5_flow_table *ft)
+{
+#ifdef CONFIG_MLX5_ESWITCH
+	struct mlx5_eswitch *esw = mdev->priv.eswitch;
+	struct mlx5e_rep_priv *uplink_rpriv;
+	struct mlx5e_priv *priv;
+
+	esw->offloads.ft_ipsec_tx_pol = ft;
+	uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
+	priv = netdev_priv(uplink_rpriv->netdev);
+	if (!priv->channels.num)
+		return;
+
+	mlx5e_rep_deactivate_channels(priv);
+	mlx5e_rep_activate_channels(priv);
+#endif
+}
+
 static int tx_get(struct mlx5_core_dev *mdev, struct mlx5e_ipsec *ipsec,
 		  struct mlx5e_ipsec_tx *tx)
 {
@@ -674,6 +718,9 @@  static int tx_get(struct mlx5_core_dev *mdev, struct mlx5e_ipsec *ipsec,
 	if (err)
 		return err;
 
+	if (tx == ipsec->tx_esw)
+		ipsec_esw_tx_ft_policy_set(mdev, tx->ft.pol);
+
 skip:
 	tx->ft.refcnt++;
 	return 0;
@@ -684,7 +731,10 @@  static void tx_put(struct mlx5e_ipsec *ipsec, struct mlx5e_ipsec_tx *tx)
 	if (--tx->ft.refcnt)
 		return;
 
-	tx_destroy(ipsec->mdev, tx, ipsec->roce);
+	if (tx == ipsec->tx_esw)
+		ipsec_esw_tx_ft_policy_set(ipsec->mdev, NULL);
+
+	tx_destroy(ipsec, tx, ipsec->roce);
 }
 
 static struct mlx5_flow_table *tx_ft_get_policy(struct mlx5_core_dev *mdev,
@@ -842,15 +892,15 @@  static void setup_fte_reg_a(struct mlx5_flow_spec *spec)
 		 misc_parameters_2.metadata_reg_a, MLX5_ETH_WQE_FT_META_IPSEC);
 }
 
-static void setup_fte_reg_c0(struct mlx5_flow_spec *spec, u32 reqid)
+static void setup_fte_reg_c4(struct mlx5_flow_spec *spec, u32 reqid)
 {
 	/* Pass policy check before choosing this SA */
 	spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_2;
 
-	MLX5_SET(fte_match_param, spec->match_criteria,
-		 misc_parameters_2.metadata_reg_c_0, reqid);
+	MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
+			 misc_parameters_2.metadata_reg_c_4);
 	MLX5_SET(fte_match_param, spec->match_value,
-		 misc_parameters_2.metadata_reg_c_0, reqid);
+		 misc_parameters_2.metadata_reg_c_4, reqid);
 }
 
 static void setup_fte_upper_proto_match(struct mlx5_flow_spec *spec, struct upspec *upspec)
@@ -902,7 +952,7 @@  static int setup_modify_header(struct mlx5e_ipsec *ipsec, int type, u32 val, u8
 		break;
 	case XFRM_DEV_OFFLOAD_OUT:
 		MLX5_SET(set_action_in, action, field,
-			 MLX5_ACTION_IN_FIELD_METADATA_REG_C_0);
+			 MLX5_ACTION_IN_FIELD_METADATA_REG_C_4);
 		break;
 	default:
 		return -EINVAL;
@@ -1235,7 +1285,7 @@  static int tx_add_rule(struct mlx5e_ipsec_sa_entry *sa_entry)
 		break;
 	case XFRM_DEV_OFFLOAD_PACKET:
 		if (attrs->reqid)
-			setup_fte_reg_c0(spec, attrs->reqid);
+			setup_fte_reg_c4(spec, attrs->reqid);
 		err = setup_pkt_reformat(ipsec, attrs, &flow_act);
 		if (err)
 			goto err_pkt_reformat;
@@ -1346,6 +1396,8 @@  static int tx_add_policy(struct mlx5e_ipsec_pol_entry *pol_entry)
 	}
 
 	flow_act.flags |= FLOW_ACT_NO_APPEND;
+	if (tx == ipsec->tx_esw && tx->chains)
+		flow_act.flags |= FLOW_ACT_IGNORE_FLOW_LEVEL;
 	dest[dstn].ft = tx->ft.sa;
 	dest[dstn].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
 	dstn++;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/ipsec_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/ipsec_fs.c
index 0675587c1a79..e60cd3dc1b13 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/ipsec_fs.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/ipsec_fs.c
@@ -12,6 +12,12 @@  enum {
 	MLX5_ESW_IPSEC_RX_ESP_FT_CHK_LEVEL,
 };
 
+enum {
+	MLX5_ESW_IPSEC_TX_POL_FT_LEVEL,
+	MLX5_ESW_IPSEC_TX_ESP_FT_LEVEL,
+	MLX5_ESW_IPSEC_TX_ESP_FT_CNT_LEVEL,
+};
+
 static void esw_ipsec_rx_status_drop_destroy(struct mlx5e_ipsec *ipsec,
 					     struct mlx5e_ipsec_rx *rx)
 {
@@ -251,3 +257,13 @@  int mlx5_esw_ipsec_rx_ipsec_obj_id_search(struct mlx5e_priv *priv, u32 id,
 
 	return 0;
 }
+
+void mlx5_esw_ipsec_tx_create_attr_set(struct mlx5e_ipsec *ipsec,
+				       struct mlx5e_ipsec_tx_create_attr *attr)
+{
+	attr->prio = FDB_CRYPTO_EGRESS;
+	attr->pol_level = MLX5_ESW_IPSEC_TX_POL_FT_LEVEL;
+	attr->sa_level = MLX5_ESW_IPSEC_TX_ESP_FT_LEVEL;
+	attr->cnt_level = MLX5_ESW_IPSEC_TX_ESP_FT_CNT_LEVEL;
+	attr->chains_ns = MLX5_FLOW_NAMESPACE_FDB;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/ipsec_fs.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/ipsec_fs.h
index 44df34032d1e..275684f99ed3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/ipsec_fs.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/ipsec_fs.h
@@ -22,6 +22,8 @@  int mlx5_esw_ipsec_rx_setup_modify_header(struct mlx5e_ipsec_sa_entry *sa_entry,
 void mlx5_esw_ipsec_rx_id_mapping_remove(struct mlx5e_ipsec_sa_entry *sa_entry);
 int mlx5_esw_ipsec_rx_ipsec_obj_id_search(struct mlx5e_priv *priv, u32 id,
 					  u32 *ipsec_obj_id);
+void mlx5_esw_ipsec_tx_create_attr_set(struct mlx5e_ipsec *ipsec,
+				       struct mlx5e_ipsec_tx_create_attr *attr);
 #else
 static inline void mlx5_esw_ipsec_rx_status_destroy(struct mlx5e_ipsec *ipsec,
 						    struct mlx5e_ipsec_rx *rx) {}
@@ -55,5 +57,8 @@  static inline int mlx5_esw_ipsec_rx_ipsec_obj_id_search(struct mlx5e_priv *priv,
 {
 	return -EINVAL;
 }
+
+static inline void mlx5_esw_ipsec_tx_create_attr_set(struct mlx5e_ipsec *ipsec,
+						     struct mlx5e_ipsec_tx_create_attr *attr) {}
 #endif /* CONFIG_MLX5_ESWITCH */
 #endif /* __MLX5_ESW_IPSEC_FS_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
index ae0dc8a3060d..1070cfa72678 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
@@ -254,6 +254,7 @@  struct mlx5_esw_offload {
 	struct mlx5_flow_group *vport_rx_group;
 	struct mlx5_flow_group *vport_rx_drop_group;
 	struct mlx5_flow_handle *vport_rx_drop_rule;
+	struct mlx5_flow_table *ft_ipsec_tx_pol;
 	struct xarray vport_reps;
 	struct list_head peer_flows[MLX5_MAX_PORTS];
 	struct mutex peer_mutex;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index bdfe609cc9ec..e8bf18098203 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -884,6 +884,17 @@  mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *on_esw,
 	dest.vport.flags |= MLX5_FLOW_DEST_VPORT_VHCA_ID;
 	flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
 
+	if (rep->vport == MLX5_VPORT_UPLINK && on_esw->offloads.ft_ipsec_tx_pol) {
+		dest.ft = on_esw->offloads.ft_ipsec_tx_pol;
+		flow_act.flags = FLOW_ACT_IGNORE_FLOW_LEVEL;
+		dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+	} else {
+		dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
+		dest.vport.num = rep->vport;
+		dest.vport.vhca_id = MLX5_CAP_GEN(rep->esw->dev, vhca_id);
+		dest.vport.flags |= MLX5_FLOW_DEST_VPORT_VHCA_ID;
+	}
+
 	if (MLX5_CAP_ESW_FLOWTABLE(on_esw->dev, flow_source) &&
 	    rep->vport == MLX5_VPORT_UPLINK)
 		spec->flow_context.flow_source = MLX5_FLOW_CONTEXT_FLOW_SOURCE_LOCAL_VPORT;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
index 8ae1854d6b73..830ff8480fe1 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -3015,6 +3015,12 @@  static int init_fdb_root_ns(struct mlx5_flow_steering *steering)
 		goto out_err;
 	}
 
+	maj_prio = fs_create_prio(&steering->fdb_root_ns->ns, FDB_CRYPTO_EGRESS, 3);
+	if (IS_ERR(maj_prio)) {
+		err = PTR_ERR(maj_prio);
+		goto out_err;
+	}
+
 	/* We put this priority last, knowing that nothing will get here
 	 * unless explicitly forwarded to. This is possible because the
 	 * slow path tables have catch all rules and nothing gets passed
diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h
index 6b1fa94f69c8..c302ec34255b 100644
--- a/include/linux/mlx5/fs.h
+++ b/include/linux/mlx5/fs.h
@@ -115,6 +115,7 @@  enum {
 	FDB_TC_MISS,
 	FDB_BR_OFFLOAD,
 	FDB_SLOW_PATH,
+	FDB_CRYPTO_EGRESS,
 	FDB_PER_VPORT,
 };