diff mbox series

[bpf,v2,4/8] veth: Use tstats per-CPU traffic counters

Message ID 20231112203009.26073-5-daniel@iogearbox.net (mailing list archive)
State Superseded
Delegated to: BPF
Headers show
Series bpf_redirect_peer fixes | expand

Checks

Context Check Description
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for bpf, async
netdev/fixes_present success Fixes tag present in non-next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 1134 this patch: 1134
netdev/cc_maintainers warning 5 maintainers not CCed: pabeni@redhat.com john.fastabend@gmail.com ast@kernel.org edumazet@google.com hawk@kernel.org
netdev/build_clang success Errors and warnings before: 1161 this patch: 1161
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 1161 this patch: 1161
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 63 lines checked
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0
bpf/vmtest-bpf-VM_Test-1 success Logs for ShellCheck
bpf/vmtest-bpf-VM_Test-0 success Logs for Lint
bpf/vmtest-bpf-VM_Test-2 success Logs for Validate matrix.py
bpf/vmtest-bpf-PR success PR summary
bpf/vmtest-bpf-VM_Test-8 success Logs for aarch64-gcc / veristat
bpf/vmtest-bpf-VM_Test-7 success Logs for aarch64-gcc / test (test_verifier, false, 360) / test_verifier on aarch64 with gcc
bpf/vmtest-bpf-VM_Test-3 success Logs for aarch64-gcc / build / build for aarch64 with gcc
bpf/vmtest-bpf-VM_Test-4 success Logs for aarch64-gcc / test (test_maps, false, 360) / test_maps on aarch64 with gcc
bpf/vmtest-bpf-VM_Test-5 success Logs for aarch64-gcc / test (test_progs, false, 360) / test_progs on aarch64 with gcc
bpf/vmtest-bpf-VM_Test-6 success Logs for aarch64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on aarch64 with gcc
bpf/vmtest-bpf-VM_Test-9 success Logs for s390x-gcc / build / build for s390x with gcc
bpf/vmtest-bpf-VM_Test-14 success Logs for s390x-gcc / veristat
bpf/vmtest-bpf-VM_Test-15 success Logs for set-matrix
bpf/vmtest-bpf-VM_Test-16 success Logs for x86_64-gcc / build / build for x86_64 with gcc
bpf/vmtest-bpf-VM_Test-17 success Logs for x86_64-gcc / test (test_maps, false, 360) / test_maps on x86_64 with gcc
bpf/vmtest-bpf-VM_Test-18 success Logs for x86_64-gcc / test (test_progs, false, 360) / test_progs on x86_64 with gcc
bpf/vmtest-bpf-VM_Test-19 success Logs for x86_64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with gcc
bpf/vmtest-bpf-VM_Test-20 success Logs for x86_64-gcc / test (test_progs_no_alu32_parallel, true, 30) / test_progs_no_alu32_parallel on x86_64 with gcc
bpf/vmtest-bpf-VM_Test-21 success Logs for x86_64-gcc / test (test_progs_parallel, true, 30) / test_progs_parallel on x86_64 with gcc
bpf/vmtest-bpf-VM_Test-22 success Logs for x86_64-gcc / test (test_verifier, false, 360) / test_verifier on x86_64 with gcc
bpf/vmtest-bpf-VM_Test-23 success Logs for x86_64-gcc / veristat / veristat on x86_64 with gcc
bpf/vmtest-bpf-VM_Test-24 success Logs for x86_64-llvm-16 / build / build for x86_64 with llvm-16
bpf/vmtest-bpf-VM_Test-25 success Logs for x86_64-llvm-16 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-16
bpf/vmtest-bpf-VM_Test-26 success Logs for x86_64-llvm-16 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-16
bpf/vmtest-bpf-VM_Test-27 success Logs for x86_64-llvm-16 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-16
bpf/vmtest-bpf-VM_Test-28 success Logs for x86_64-llvm-16 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-16
bpf/vmtest-bpf-VM_Test-29 success Logs for x86_64-llvm-16 / veristat
bpf/vmtest-bpf-VM_Test-12 success Logs for s390x-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on s390x with gcc
bpf/vmtest-bpf-VM_Test-13 success Logs for s390x-gcc / test (test_verifier, false, 360) / test_verifier on s390x with gcc
bpf/vmtest-bpf-VM_Test-11 success Logs for s390x-gcc / test (test_progs, false, 360) / test_progs on s390x with gcc
bpf/vmtest-bpf-VM_Test-10 success Logs for s390x-gcc / test (test_maps, false, 360) / test_maps on s390x with gcc

Commit Message

Daniel Borkmann Nov. 12, 2023, 8:30 p.m. UTC
From: Peilin Ye <peilin.ye@bytedance.com>

Currently veth devices use the lstats per-CPU traffic counters, which only
cover TX traffic. veth_get_stats64() actually populates RX stats of a veth
device from its peer's TX counters, based on the assumption that a veth
device can _only_ receive packets from its peer, which is no longer true:

For example, recent CNIs (like Cilium) can use the bpf_redirect_peer() BPF
helper to redirect traffic from NIC's tc ingress to veth's tc ingress (in
a different netns), skipping veth's peer device. Unfortunately, this kind
of traffic isn't currently accounted for in veth's RX stats.

In preparation for the fix, use tstats (instead of lstats) to maintain
both RX and TX counters for each veth device. We'll use RX counters for
bpf_redirect_peer() traffic, and keep using TX counters for the usual
"peer-to-peer" traffic. In veth_get_stats64(), calculate RX stats by
_adding_ RX count to peer's TX count, in order to cover both kinds of
traffic.

veth_stats_rx() might need a name change (perhaps to "veth_stats_xdp()")
for less confusion, but let's leave it to another patch to keep the fix
minimal.

Signed-off-by: Peilin Ye <peilin.ye@bytedance.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 drivers/net/veth.c | 30 +++++++++++-------------------
 1 file changed, 11 insertions(+), 19 deletions(-)

Comments

Peilin Ye Nov. 12, 2023, 10:09 p.m. UTC | #1
Hi Daniel,

Thanks a lot for taking care of this!

On Sun, Nov 12, 2023 at 09:30:05PM +0100, Daniel Borkmann wrote:
> Signed-off-by: Peilin Ye <peilin.ye@bytedance.com>

Would you like to add your Co-developed-by: here, since you've changed
the code?

> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>

Thanks,
Peilin Ye
Daniel Borkmann Nov. 12, 2023, 10:12 p.m. UTC | #2
Hi Peilin,

On 11/12/23 11:09 PM, Peilin Ye wrote:
> Hi Daniel,
> 
> Thanks a lot for taking care of this!
> 
> On Sun, Nov 12, 2023 at 09:30:05PM +0100, Daniel Borkmann wrote:
>> Signed-off-by: Peilin Ye <peilin.ye@bytedance.com>
> 
> Would you like to add your Co-developed-by: here, since you've changed
> the code?

Thanks, forgot about it, I'll just reply with the tag here:

Co-developed-by: Daniel Borkmann <daniel@iogearbox.net>

Cheers,
Daniel
Nikolay Aleksandrov Nov. 13, 2023, 8:53 a.m. UTC | #3
On 11/12/23 22:30, Daniel Borkmann wrote:
> From: Peilin Ye <peilin.ye@bytedance.com>
> 
> Currently veth devices use the lstats per-CPU traffic counters, which only
> cover TX traffic. veth_get_stats64() actually populates RX stats of a veth
> device from its peer's TX counters, based on the assumption that a veth
> device can _only_ receive packets from its peer, which is no longer true:
> 
> For example, recent CNIs (like Cilium) can use the bpf_redirect_peer() BPF
> helper to redirect traffic from NIC's tc ingress to veth's tc ingress (in
> a different netns), skipping veth's peer device. Unfortunately, this kind
> of traffic isn't currently accounted for in veth's RX stats.
> 
> In preparation for the fix, use tstats (instead of lstats) to maintain
> both RX and TX counters for each veth device. We'll use RX counters for
> bpf_redirect_peer() traffic, and keep using TX counters for the usual
> "peer-to-peer" traffic. In veth_get_stats64(), calculate RX stats by
> _adding_ RX count to peer's TX count, in order to cover both kinds of
> traffic.
> 
> veth_stats_rx() might need a name change (perhaps to "veth_stats_xdp()")
> for less confusion, but let's leave it to another patch to keep the fix
> minimal.
> 
> Signed-off-by: Peilin Ye <peilin.ye@bytedance.com>
> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
> ---
>   drivers/net/veth.c | 30 +++++++++++-------------------
>   1 file changed, 11 insertions(+), 19 deletions(-)
> 

Reviewed-by: Nikolay Aleksandrov <razor@blackwall.org>
diff mbox series

Patch

diff --git a/drivers/net/veth.c b/drivers/net/veth.c
index ac030c241d1a..6cc352296c67 100644
--- a/drivers/net/veth.c
+++ b/drivers/net/veth.c
@@ -373,7 +373,7 @@  static netdev_tx_t veth_xmit(struct sk_buff *skb, struct net_device *dev)
 	skb_tx_timestamp(skb);
 	if (likely(veth_forward_skb(rcv, skb, rq, use_napi) == NET_RX_SUCCESS)) {
 		if (!use_napi)
-			dev_lstats_add(dev, length);
+			dev_sw_netstats_tx_add(dev, 1, length);
 		else
 			__veth_xdp_flush(rq);
 	} else {
@@ -387,14 +387,6 @@  static netdev_tx_t veth_xmit(struct sk_buff *skb, struct net_device *dev)
 	return ret;
 }
 
-static u64 veth_stats_tx(struct net_device *dev, u64 *packets, u64 *bytes)
-{
-	struct veth_priv *priv = netdev_priv(dev);
-
-	dev_lstats_read(dev, packets, bytes);
-	return atomic64_read(&priv->dropped);
-}
-
 static void veth_stats_rx(struct veth_stats *result, struct net_device *dev)
 {
 	struct veth_priv *priv = netdev_priv(dev);
@@ -432,24 +424,24 @@  static void veth_get_stats64(struct net_device *dev,
 	struct veth_priv *priv = netdev_priv(dev);
 	struct net_device *peer;
 	struct veth_stats rx;
-	u64 packets, bytes;
 
-	tot->tx_dropped = veth_stats_tx(dev, &packets, &bytes);
-	tot->tx_bytes = bytes;
-	tot->tx_packets = packets;
+	tot->tx_dropped = atomic64_read(&priv->dropped);
+	dev_fetch_sw_netstats(tot, dev->tstats);
 
 	veth_stats_rx(&rx, dev);
 	tot->tx_dropped += rx.xdp_tx_err;
 	tot->rx_dropped = rx.rx_drops + rx.peer_tq_xdp_xmit_err;
-	tot->rx_bytes = rx.xdp_bytes;
-	tot->rx_packets = rx.xdp_packets;
+	tot->rx_bytes += rx.xdp_bytes;
+	tot->rx_packets += rx.xdp_packets;
 
 	rcu_read_lock();
 	peer = rcu_dereference(priv->peer);
 	if (peer) {
-		veth_stats_tx(peer, &packets, &bytes);
-		tot->rx_bytes += bytes;
-		tot->rx_packets += packets;
+		struct rtnl_link_stats64 tot_peer = {};
+
+		dev_fetch_sw_netstats(&tot_peer, peer->tstats);
+		tot->rx_bytes += tot_peer.tx_bytes;
+		tot->rx_packets += tot_peer.tx_packets;
 
 		veth_stats_rx(&rx, peer);
 		tot->tx_dropped += rx.peer_tq_xdp_xmit_err;
@@ -1783,7 +1775,7 @@  static void veth_setup(struct net_device *dev)
 			       NETIF_F_HW_VLAN_STAG_RX);
 	dev->needs_free_netdev = true;
 	dev->priv_destructor = veth_dev_free;
-	dev->pcpu_stat_type = NETDEV_PCPU_STAT_LSTATS;
+	dev->pcpu_stat_type = NETDEV_PCPU_STAT_TSTATS;
 	dev->max_mtu = ETH_MAX_MTU;
 
 	dev->hw_features = VETH_FEATURES;