diff mbox series

[net-next,7/8] mlxsw: Add support for VxLAN with IPv6 underlay

Message ID 20211214142551.606542-8-idosch@nvidia.com (mailing list archive)
State Accepted
Commit 06c08f869c0eda8a466288b8ec32bc217d22a8fb
Delegated to: Netdev Maintainers
Headers show
Series mlxsw: Add support for VxLAN with IPv6 underlay | expand

Checks

Context Check Description
netdev/tree_selection success Clearly marked for net-next
netdev/fixes_present success Fixes tag not required for -next series
netdev/subject_prefix success Link
netdev/cover_letter success Series has a cover letter
netdev/patch_count success Link
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 0 this patch: 0
netdev/cc_maintainers success CCed 5 of 5 maintainers
netdev/build_clang success Errors and warnings before: 0 this patch: 0
netdev/module_param success Was 0 now: 0
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 0 this patch: 0
netdev/checkpatch warning WARNING: line length of 94 exceeds 80 columns
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Ido Schimmel Dec. 14, 2021, 2:25 p.m. UTC
From: Amit Cohen <amcohen@nvidia.com>

Currently, mlxsw driver supports VxLAN with IPv4 underlay only.
Add support for IPv6 underlay.

The main differences are:

* Learning is not supported for IPv6 FDB entries, use static entries and
  do not allow 'learning' flag for IPv6 VxLAN.

* IPv6 addresses for FDB entries should be saved as part of KVDL.
  Use the new API to allocate and release entries for IPv6 addresses.

* Spectrum ASICs do not fill UDP checksum, while in software IPv6 UDP
  packets with checksum zero are dropped.
  Force the relevant flags which allow the VxLAN device to generate UDP
  packets with zero checksum and also receive them.

Signed-off-by: Amit Cohen <amcohen@nvidia.com>
Signed-off-by: Ido Schimmel <idosch@nvidia.com>
---
 .../ethernet/mellanox/mlxsw/spectrum_nve.c    | 14 +++-
 .../mellanox/mlxsw/spectrum_nve_vxlan.c       | 66 +++++++++++++--
 .../ethernet/mellanox/mlxsw/spectrum_router.c | 14 ++++
 .../mellanox/mlxsw/spectrum_switchdev.c       | 84 +++++++++++++++++++
 4 files changed, 168 insertions(+), 10 deletions(-)
diff mbox series

Patch

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve.c
index dfe070434cbe..d2b57a045aa4 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve.c
@@ -130,15 +130,25 @@  mlxsw_sp_nve_mc_record_ipv6_entry_add(struct mlxsw_sp_nve_mc_record *mc_record,
 				      struct mlxsw_sp_nve_mc_entry *mc_entry,
 				      const union mlxsw_sp_l3addr *addr)
 {
-	WARN_ON(1);
+	u32 kvdl_index;
+	int err;
+
+	err = mlxsw_sp_ipv6_addr_kvdl_index_get(mc_record->mlxsw_sp,
+						&addr->addr6, &kvdl_index);
+	if (err)
+		return err;
 
-	return -EINVAL;
+	mc_entry->ipv6_entry.addr6 = addr->addr6;
+	mc_entry->ipv6_entry.addr6_kvdl_index = kvdl_index;
+	return 0;
 }
 
 static void
 mlxsw_sp_nve_mc_record_ipv6_entry_del(const struct mlxsw_sp_nve_mc_record *mc_record,
 				      const struct mlxsw_sp_nve_mc_entry *mc_entry)
 {
+	mlxsw_sp_ipv6_addr_put(mc_record->mlxsw_sp,
+			       &mc_entry->ipv6_entry.addr6);
 }
 
 static void
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve_vxlan.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve_vxlan.c
index 766a20e05393..d309b77a0194 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve_vxlan.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve_vxlan.c
@@ -12,6 +12,9 @@ 
 
 #define MLXSW_SP_NVE_VXLAN_IPV4_SUPPORTED_FLAGS (VXLAN_F_UDP_ZERO_CSUM_TX | \
 						 VXLAN_F_LEARN)
+#define MLXSW_SP_NVE_VXLAN_IPV6_SUPPORTED_FLAGS (VXLAN_F_IPV6 | \
+						 VXLAN_F_UDP_ZERO_CSUM6_TX | \
+						 VXLAN_F_UDP_ZERO_CSUM6_RX)
 
 static bool mlxsw_sp_nve_vxlan_ipv4_flags_check(const struct vxlan_config *cfg,
 						struct netlink_ext_ack *extack)
@@ -29,6 +32,27 @@  static bool mlxsw_sp_nve_vxlan_ipv4_flags_check(const struct vxlan_config *cfg,
 	return true;
 }
 
+static bool mlxsw_sp_nve_vxlan_ipv6_flags_check(const struct vxlan_config *cfg,
+						struct netlink_ext_ack *extack)
+{
+	if (!(cfg->flags & VXLAN_F_UDP_ZERO_CSUM6_TX)) {
+		NL_SET_ERR_MSG_MOD(extack, "VxLAN: Zero UDP checksum must be allowed for TX");
+		return false;
+	}
+
+	if (!(cfg->flags & VXLAN_F_UDP_ZERO_CSUM6_RX)) {
+		NL_SET_ERR_MSG_MOD(extack, "VxLAN: Zero UDP checksum must be allowed for RX");
+		return false;
+	}
+
+	if (cfg->flags & ~MLXSW_SP_NVE_VXLAN_IPV6_SUPPORTED_FLAGS) {
+		NL_SET_ERR_MSG_MOD(extack, "VxLAN: Unsupported flag");
+		return false;
+	}
+
+	return true;
+}
+
 static bool mlxsw_sp_nve_vxlan_can_offload(const struct mlxsw_sp_nve *nve,
 					   const struct mlxsw_sp_nve_params *params,
 					   struct netlink_ext_ack *extack)
@@ -36,11 +60,6 @@  static bool mlxsw_sp_nve_vxlan_can_offload(const struct mlxsw_sp_nve *nve,
 	struct vxlan_dev *vxlan = netdev_priv(params->dev);
 	struct vxlan_config *cfg = &vxlan->cfg;
 
-	if (cfg->saddr.sa.sa_family != AF_INET) {
-		NL_SET_ERR_MSG_MOD(extack, "VxLAN: Only IPv4 underlay is supported");
-		return false;
-	}
-
 	if (vxlan_addr_multicast(&cfg->remote_ip)) {
 		NL_SET_ERR_MSG_MOD(extack, "VxLAN: Multicast destination IP is not supported");
 		return false;
@@ -76,6 +95,10 @@  static bool mlxsw_sp_nve_vxlan_can_offload(const struct mlxsw_sp_nve *nve,
 		if (!mlxsw_sp_nve_vxlan_ipv4_flags_check(cfg, extack))
 			return false;
 		break;
+	case AF_INET6:
+		if (!mlxsw_sp_nve_vxlan_ipv6_flags_check(cfg, extack))
+			return false;
+		break;
 	}
 
 	if (cfg->ttl == 0) {
@@ -103,6 +126,22 @@  static bool mlxsw_sp1_nve_vxlan_can_offload(const struct mlxsw_sp_nve *nve,
 	return mlxsw_sp_nve_vxlan_can_offload(nve, params, extack);
 }
 
+static void
+mlxsw_sp_nve_vxlan_ul_proto_sip_config(const struct vxlan_config *cfg,
+				       struct mlxsw_sp_nve_config *config)
+{
+	switch (cfg->saddr.sa.sa_family) {
+	case AF_INET:
+		config->ul_proto = MLXSW_SP_L3_PROTO_IPV4;
+		config->ul_sip.addr4 = cfg->saddr.sin.sin_addr.s_addr;
+		break;
+	case AF_INET6:
+		config->ul_proto = MLXSW_SP_L3_PROTO_IPV6;
+		config->ul_sip.addr6 = cfg->saddr.sin6.sin6_addr;
+		break;
+	}
+}
+
 static void mlxsw_sp_nve_vxlan_config(const struct mlxsw_sp_nve *nve,
 				      const struct mlxsw_sp_nve_params *params,
 				      struct mlxsw_sp_nve_config *config)
@@ -115,8 +154,7 @@  static void mlxsw_sp_nve_vxlan_config(const struct mlxsw_sp_nve *nve,
 	config->flowlabel = cfg->label;
 	config->learning_en = cfg->flags & VXLAN_F_LEARN ? 1 : 0;
 	config->ul_tb_id = RT_TABLE_MAIN;
-	config->ul_proto = MLXSW_SP_L3_PROTO_IPV4;
-	config->ul_sip.addr4 = cfg->saddr.sin.sin_addr.s_addr;
+	mlxsw_sp_nve_vxlan_ul_proto_sip_config(cfg, config);
 	config->udp_dport = cfg->dst_port;
 }
 
@@ -124,6 +162,7 @@  static void
 mlxsw_sp_nve_vxlan_config_prepare(char *tngcr_pl,
 				  const struct mlxsw_sp_nve_config *config)
 {
+	struct in6_addr addr6;
 	u8 udp_sport;
 
 	mlxsw_reg_tngcr_pack(tngcr_pl, MLXSW_REG_TNGCR_TYPE_VXLAN, true,
@@ -135,7 +174,18 @@  mlxsw_sp_nve_vxlan_config_prepare(char *tngcr_pl,
 	get_random_bytes(&udp_sport, sizeof(udp_sport));
 	udp_sport = (udp_sport % (0xee - 0x80 + 1)) + 0x80;
 	mlxsw_reg_tngcr_nve_udp_sport_prefix_set(tngcr_pl, udp_sport);
-	mlxsw_reg_tngcr_usipv4_set(tngcr_pl, be32_to_cpu(config->ul_sip.addr4));
+
+	switch (config->ul_proto) {
+	case MLXSW_SP_L3_PROTO_IPV4:
+		mlxsw_reg_tngcr_usipv4_set(tngcr_pl,
+					   be32_to_cpu(config->ul_sip.addr4));
+		break;
+	case MLXSW_SP_L3_PROTO_IPV6:
+		addr6 = config->ul_sip.addr6;
+		mlxsw_reg_tngcr_usipv6_memcpy_to(tngcr_pl,
+						 (const char *)&addr6);
+		break;
+	}
 }
 
 static int
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index 764731eae2cd..d40762cfc453 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -1307,6 +1307,10 @@  mlxsw_sp_router_ip2me_fib_entry_find(struct mlxsw_sp *mlxsw_sp, u32 tb_id,
 		addr_prefix_len = 32;
 		break;
 	case MLXSW_SP_L3_PROTO_IPV6:
+		addrp = &addr->addr6;
+		addr_len = 16;
+		addr_prefix_len = 128;
+		break;
 	default:
 		WARN_ON(1);
 		return NULL;
@@ -7002,6 +7006,8 @@  mlxsw_sp_fib6_entry_type_set_local(struct mlxsw_sp *mlxsw_sp,
 {
 	struct mlxsw_sp_nexthop_group_info *nhgi = fib_entry->nh_group->nhgi;
 	union mlxsw_sp_l3addr dip = { .addr6 = rt->fib6_dst.addr };
+	u32 tb_id = mlxsw_sp_fix_tb_id(rt->fib6_table->tb6_id);
+	struct mlxsw_sp_router *router = mlxsw_sp->router;
 	int ifindex = nhgi->nexthops[0].ifindex;
 	struct mlxsw_sp_ipip_entry *ipip_entry;
 
@@ -7015,6 +7021,14 @@  mlxsw_sp_fib6_entry_type_set_local(struct mlxsw_sp *mlxsw_sp,
 		return mlxsw_sp_fib_entry_decap_init(mlxsw_sp, fib_entry,
 						     ipip_entry);
 	}
+	if (mlxsw_sp_router_nve_is_decap(mlxsw_sp, tb_id,
+					 MLXSW_SP_L3_PROTO_IPV6, &dip)) {
+		u32 tunnel_index;
+
+		tunnel_index = router->nve_decap_config.tunnel_index;
+		fib_entry->decap.tunnel_index = tunnel_index;
+		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP;
+	}
 
 	return 0;
 }
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
index 53473647870d..65c1724c63b0 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
@@ -1321,6 +1321,88 @@  mlxsw_sp_port_fdb_tun_uc_op4(struct mlxsw_sp *mlxsw_sp, bool dynamic,
 	return err;
 }
 
+static int mlxsw_sp_port_fdb_tun_uc_op6_sfd_write(struct mlxsw_sp *mlxsw_sp,
+						  const char *mac, u16 fid,
+						  u32 kvdl_index, bool adding)
+{
+	char *sfd_pl;
+	u8 num_rec;
+	int err;
+
+	sfd_pl = kmalloc(MLXSW_REG_SFD_LEN, GFP_KERNEL);
+	if (!sfd_pl)
+		return -ENOMEM;
+
+	mlxsw_reg_sfd_pack(sfd_pl, mlxsw_sp_sfd_op(adding), 0);
+	mlxsw_reg_sfd_uc_tunnel_pack6(sfd_pl, 0, mac, fid,
+				      MLXSW_REG_SFD_REC_ACTION_NOP, kvdl_index);
+	num_rec = mlxsw_reg_sfd_num_rec_get(sfd_pl);
+	err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfd), sfd_pl);
+	if (err)
+		goto out;
+
+	if (num_rec != mlxsw_reg_sfd_num_rec_get(sfd_pl))
+		err = -EBUSY;
+
+out:
+	kfree(sfd_pl);
+	return err;
+}
+
+static int mlxsw_sp_port_fdb_tun_uc_op6_add(struct mlxsw_sp *mlxsw_sp,
+					    const char *mac, u16 fid,
+					    const struct in6_addr *addr)
+{
+	u32 kvdl_index;
+	int err;
+
+	err = mlxsw_sp_nve_ipv6_addr_kvdl_set(mlxsw_sp, addr, &kvdl_index);
+	if (err)
+		return err;
+
+	err = mlxsw_sp_port_fdb_tun_uc_op6_sfd_write(mlxsw_sp, mac, fid,
+						     kvdl_index, true);
+	if (err)
+		goto err_sfd_write;
+
+	err = mlxsw_sp_nve_ipv6_addr_map_replace(mlxsw_sp, mac, fid, addr);
+	if (err)
+		/* Replace can fail only for creating new mapping, so removing
+		 * the FDB entry in the error path is OK.
+		 */
+		goto err_addr_replace;
+
+	return 0;
+
+err_addr_replace:
+	mlxsw_sp_port_fdb_tun_uc_op6_sfd_write(mlxsw_sp, mac, fid, kvdl_index,
+					       false);
+err_sfd_write:
+	mlxsw_sp_nve_ipv6_addr_kvdl_unset(mlxsw_sp, addr);
+	return err;
+}
+
+static void mlxsw_sp_port_fdb_tun_uc_op6_del(struct mlxsw_sp *mlxsw_sp,
+					     const char *mac, u16 fid,
+					     const struct in6_addr *addr)
+{
+	mlxsw_sp_nve_ipv6_addr_map_del(mlxsw_sp, mac, fid);
+	mlxsw_sp_port_fdb_tun_uc_op6_sfd_write(mlxsw_sp, mac, fid, 0, false);
+	mlxsw_sp_nve_ipv6_addr_kvdl_unset(mlxsw_sp, addr);
+}
+
+static int
+mlxsw_sp_port_fdb_tun_uc_op6(struct mlxsw_sp *mlxsw_sp, const char *mac,
+			     u16 fid, const struct in6_addr *addr, bool adding)
+{
+	if (adding)
+		return mlxsw_sp_port_fdb_tun_uc_op6_add(mlxsw_sp, mac, fid,
+							addr);
+
+	mlxsw_sp_port_fdb_tun_uc_op6_del(mlxsw_sp, mac, fid, addr);
+	return 0;
+}
+
 static int mlxsw_sp_port_fdb_tunnel_uc_op(struct mlxsw_sp *mlxsw_sp,
 					  const char *mac, u16 fid,
 					  enum mlxsw_sp_l3proto proto,
@@ -1332,6 +1414,8 @@  static int mlxsw_sp_port_fdb_tunnel_uc_op(struct mlxsw_sp *mlxsw_sp,
 		return mlxsw_sp_port_fdb_tun_uc_op4(mlxsw_sp, dynamic, mac, fid,
 						    addr->addr4, adding);
 	case MLXSW_SP_L3_PROTO_IPV6:
+		return mlxsw_sp_port_fdb_tun_uc_op6(mlxsw_sp, mac, fid,
+						    &addr->addr6, adding);
 	default:
 		WARN_ON(1);
 		return -EOPNOTSUPP;