diff mbox series

[RFC,bpf-next,23/23] mlx5: implement RX checksum XDP hint

Message ID 20230824192703.712881-24-larysa.zaremba@intel.com (mailing list archive)
State RFC
Delegated to: BPF
Headers show
Series XDP metadata via kfuncs for ice + mlx5 | expand

Checks

Context Check Description
bpf/vmtest-bpf-next-PR success PR summary
bpf/vmtest-bpf-next-VM_Test-0 success Logs for ShellCheck
bpf/vmtest-bpf-next-VM_Test-4 success Logs for build for x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-5 success Logs for set-matrix
bpf/vmtest-bpf-next-VM_Test-1 success Logs for build for aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-3 success Logs for build for x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-2 success Logs for build for s390x with gcc
bpf/vmtest-bpf-next-VM_Test-7 pending Logs for test_maps on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-11 pending Logs for test_progs on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-15 pending Logs for test_progs_no_alu32 on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-24 success Logs for test_verifier on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-26 success Logs for test_verifier on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-27 success Logs for test_verifier on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-28 success Logs for veristat
bpf/vmtest-bpf-next-VM_Test-6 success Logs for test_maps on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-8 success Logs for test_maps on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-9 success Logs for test_maps on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-10 fail Logs for test_progs on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-12 fail Logs for test_progs on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-13 fail Logs for test_progs on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-14 fail Logs for test_progs_no_alu32 on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-16 fail Logs for test_progs_no_alu32 on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-17 fail Logs for test_progs_no_alu32 on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-18 success Logs for test_progs_no_alu32_parallel on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-19 success Logs for test_progs_no_alu32_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-20 success Logs for test_progs_no_alu32_parallel on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-21 success Logs for test_progs_parallel on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-22 success Logs for test_progs_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-23 success Logs for test_progs_parallel on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-25 success Logs for test_verifier on s390x with gcc
netdev/series_format fail Series longer than 15 patches (and no cover letter)
netdev/tree_selection success Clearly marked for bpf-next
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 1334 this patch: 1334
netdev/cc_maintainers warning 10 maintainers not CCed: hawk@kernel.org saeedm@nvidia.com tariqt@nvidia.com davem@davemloft.net pabeni@redhat.com leon@kernel.org maxtram95@gmail.com dtatulea@nvidia.com edumazet@google.com linux-rdma@vger.kernel.org
netdev/build_clang success Errors and warnings before: 1353 this patch: 1353
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 1357 this patch: 1357
netdev/checkpatch warning WARNING: line length of 81 exceeds 80 columns WARNING: line length of 85 exceeds 80 columns WARNING: line length of 92 exceeds 80 columns
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Larysa Zaremba Aug. 24, 2023, 7:27 p.m. UTC
Implement .xmo_rx_csum() callback to expose checksum information
to XDP code.

This version contains a lot of logic, duplicated from skb path, because
refactoring would be much more complex than implementation itself, checksum
code is too coupled with the skb concept.

Inteded logic differences from the skb path:
- when checksum does not cover the whole packet, no fixups are performed,
  such packet is treated as one without complete checksum. Just to prevent
  the patch from ballooning from hints-unrelated code.
- with hints API, we can now inform about both complete and validated
  checksum statuses, that is why XDP_CHECKSUM_VERIFIED is ORed to the
  status. I hope this represents HW logic well.

Signed-off-by: Larysa Zaremba <larysa.zaremba@intel.com>
---
 .../net/ethernet/mellanox/mlx5/core/en/txrx.h |  10 ++
 .../net/ethernet/mellanox/mlx5/core/en/xdp.c  | 100 ++++++++++++++++++
 .../net/ethernet/mellanox/mlx5/core/en_rx.c   |  12 +--
 include/linux/mlx5/device.h                   |   2 +-
 4 files changed, 112 insertions(+), 12 deletions(-)
diff mbox series

Patch

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
index 879d698b6119..9467a0dea6ae 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
@@ -506,4 +506,14 @@  static inline struct mlx5e_mpw_info *mlx5e_get_mpw_info(struct mlx5e_rq *rq, int
 
 	return (struct mlx5e_mpw_info *)((char *)rq->mpwqe.info + array_size(i, isz));
 }
+
+static inline u8 get_ip_proto(void *data, int network_depth, __be16 proto)
+{
+	void *ip_p = data + network_depth;
+
+	return (proto == htons(ETH_P_IP)) ? ((struct iphdr *)ip_p)->protocol :
+					    ((struct ipv6hdr *)ip_p)->nexthdr;
+}
+
+#define short_frame(size) ((size) <= ETH_ZLEN + ETH_FCS_LEN)
 #endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
index e8319ab0fa85..e08b2ad56442 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
@@ -270,10 +270,110 @@  static int mlx5e_xdp_rx_vlan_tag(const struct xdp_md *ctx, u16 *vlan_tci,
 	return 0;
 }
 
+static __be16 xdp_buff_last_ethertype(const struct xdp_buff *xdp,
+				      int *network_offset)
+{
+	__be16 proto = ((struct ethhdr *)xdp->data)->h_proto;
+	struct vlan_hdr *remaining_data = xdp->data + ETH_HLEN;
+	u8 allowed_depth = VLAN_MAX_DEPTH;
+
+	while (eth_type_vlan(proto)) {
+		struct vlan_hdr *next_data = remaining_data + 1;
+
+		if ((void *)next_data > xdp->data_end || !--allowed_depth)
+			return 0;
+		proto = remaining_data->h_vlan_encapsulated_proto;
+		remaining_data = next_data;
+	}
+
+	*network_offset = (void *)remaining_data - xdp->data;
+	return proto;
+}
+
+static bool xdp_csum_needs_fixup(const struct xdp_buff *xdp, int network_depth,
+				 __be16 proto)
+{
+	struct ipv6hdr *ip6;
+	struct iphdr   *ip4;
+	int pkt_len;
+
+	if (network_depth > ETH_HLEN)
+		return true;
+
+	switch (proto) {
+	case htons(ETH_P_IP):
+		ip4 = (struct iphdr *)(xdp->data + network_depth);
+		pkt_len = network_depth + ntohs(ip4->tot_len);
+		break;
+	case htons(ETH_P_IPV6):
+		ip6 = (struct ipv6hdr *)(xdp->data + network_depth);
+		pkt_len = network_depth + sizeof(*ip6) + ntohs(ip6->payload_len);
+		break;
+	default:
+		return true;
+	}
+
+	if (likely(pkt_len >= xdp->data_end - xdp->data))
+		return false;
+
+	return true;
+}
+
+static int mlx5e_xdp_rx_csum(const struct xdp_md *ctx,
+			     enum xdp_csum_status *csum_status,
+			     __wsum *csum)
+{
+	const struct mlx5e_xdp_buff *_ctx = (void *)ctx;
+	const struct mlx5_cqe64 *cqe = _ctx->cqe;
+	const struct mlx5e_rq *rq = _ctx->rq;
+	__be16 last_ethertype;
+	int network_offset;
+	u8 lro_num_seg;
+
+	lro_num_seg = be32_to_cpu(cqe->srqn) >> 24;
+	if (lro_num_seg) {
+		*csum_status = XDP_CHECKSUM_VERIFIED;
+		return 0;
+	}
+
+	if (test_bit(MLX5E_RQ_STATE_NO_CSUM_COMPLETE, &rq->state) ||
+	    get_cqe_tls_offload(cqe))
+		goto csum_unnecessary;
+
+	if (short_frame(ctx->data_end - ctx->data))
+		goto csum_unnecessary;
+
+	last_ethertype = xdp_buff_last_ethertype(&_ctx->xdp, &network_offset);
+	if (last_ethertype != htons(ETH_P_IP) && last_ethertype != htons(ETH_P_IPV6))
+		goto csum_unnecessary;
+	if (unlikely(get_ip_proto(_ctx->xdp.data, network_offset,
+				  last_ethertype) == IPPROTO_SCTP))
+		goto csum_unnecessary;
+
+	*csum_status = XDP_CHECKSUM_COMPLETE;
+	*csum = csum_unfold((__force __sum16)cqe->check_sum);
+
+	if (test_bit(MLX5E_RQ_STATE_CSUM_FULL, &rq->state))
+		goto csum_unnecessary;
+
+	if (unlikely(xdp_csum_needs_fixup(&_ctx->xdp, network_offset,
+					  last_ethertype)))
+		*csum_status = 0;
+
+csum_unnecessary:
+	if (likely((cqe->hds_ip_ext & CQE_L3_OK) &&
+		   (cqe->hds_ip_ext & CQE_L4_OK))) {
+		*csum_status |= XDP_CHECKSUM_VERIFIED;
+	}
+
+	return *csum_status ? 0 : -ENODATA;
+}
+
 const struct xdp_metadata_ops mlx5e_xdp_metadata_ops = {
 	.xmo_rx_timestamp		= mlx5e_xdp_rx_timestamp,
 	.xmo_rx_hash			= mlx5e_xdp_rx_hash,
 	.xmo_rx_vlan_tag		= mlx5e_xdp_rx_vlan_tag,
+	.xmo_rx_csum			= mlx5e_xdp_rx_csum,
 };
 
 /* returns true if packet was consumed by xdp */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
index 3fd11b0761e0..c303ab8b928c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -1374,16 +1374,6 @@  static inline void mlx5e_enable_ecn(struct mlx5e_rq *rq, struct sk_buff *skb)
 	rq->stats->ecn_mark += !!rc;
 }
 
-static u8 get_ip_proto(struct sk_buff *skb, int network_depth, __be16 proto)
-{
-	void *ip_p = skb->data + network_depth;
-
-	return (proto == htons(ETH_P_IP)) ? ((struct iphdr *)ip_p)->protocol :
-					    ((struct ipv6hdr *)ip_p)->nexthdr;
-}
-
-#define short_frame(size) ((size) <= ETH_ZLEN + ETH_FCS_LEN)
-
 #define MAX_PADDING 8
 
 static void
@@ -1493,7 +1483,7 @@  static inline void mlx5e_handle_csum(struct net_device *netdev,
 		goto csum_unnecessary;
 
 	if (likely(is_last_ethertype_ip(skb, &network_depth, &proto))) {
-		if (unlikely(get_ip_proto(skb, network_depth, proto) == IPPROTO_SCTP))
+		if (unlikely(get_ip_proto(skb->data, network_depth, proto) == IPPROTO_SCTP))
 			goto csum_unnecessary;
 
 		stats->csum_complete++;
diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index 95ffd78546a7..82813efae79d 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -908,7 +908,7 @@  static inline bool cqe_is_tunneled(struct mlx5_cqe64 *cqe)
 	return cqe->tls_outer_l3_tunneled & 0x1;
 }
 
-static inline u8 get_cqe_tls_offload(struct mlx5_cqe64 *cqe)
+static inline u8 get_cqe_tls_offload(const struct mlx5_cqe64 *cqe)
 {
 	return (cqe->tls_outer_l3_tunneled >> 3) & 0x3;
 }