@@ -42,6 +42,8 @@ struct xsk {
struct xsk_ring_prod tx;
struct xsk_ring_cons rx;
struct xsk_socket *socket;
+ int tx_completions;
+ struct devtx_sample last_sample;
};
static int open_xsk(int ifindex, struct xsk *xsk)
@@ -51,6 +53,7 @@ static int open_xsk(int ifindex, struct xsk *xsk)
.rx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS,
.tx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS,
.bind_flags = XDP_COPY,
+ .tx_metadata_len = TX_META_LEN,
};
const struct xsk_umem_config umem_config = {
.fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS,
@@ -138,6 +141,7 @@ static void ip_csum(struct iphdr *iph)
static int generate_packet(struct xsk *xsk, __u16 dst_port)
{
+ struct xdp_tx_meta *meta;
struct xdp_desc *tx_desc;
struct udphdr *udph;
struct ethhdr *eth;
@@ -151,10 +155,13 @@ static int generate_packet(struct xsk *xsk, __u16 dst_port)
return -1;
tx_desc = xsk_ring_prod__tx_desc(&xsk->tx, idx);
- tx_desc->addr = idx % (UMEM_NUM / 2) * UMEM_FRAME_SIZE;
+ tx_desc->addr = idx % (UMEM_NUM / 2) * UMEM_FRAME_SIZE + TX_META_LEN;
printf("%p: tx_desc[%u]->addr=%llx\n", xsk, idx, tx_desc->addr);
data = xsk_umem__get_data(xsk->umem_area, tx_desc->addr);
+ meta = data - TX_META_LEN;
+ meta->request_timestamp = 1;
+
eth = data;
iph = (void *)(eth + 1);
udph = (void *)(iph + 1);
@@ -178,7 +185,8 @@ static int generate_packet(struct xsk *xsk, __u16 dst_port)
udph->source = htons(AF_XDP_SOURCE_PORT);
udph->dest = htons(dst_port);
udph->len = htons(sizeof(*udph) + UDP_PAYLOAD_BYTES);
- udph->check = 0;
+ udph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr,
+ ntohs(udph->len), IPPROTO_UDP, 0);
memset(udph + 1, 0xAA, UDP_PAYLOAD_BYTES);
@@ -192,7 +200,8 @@ static int generate_packet(struct xsk *xsk, __u16 dst_port)
return 0;
}
-static void complete_tx(struct xsk *xsk)
+static void complete_tx(struct xsk *xsk, struct xdp_metadata *bpf_obj,
+ struct ring_buffer *ringbuf)
{
__u32 idx;
__u64 addr;
@@ -202,6 +211,14 @@ static void complete_tx(struct xsk *xsk)
printf("%p: complete tx idx=%u addr=%llx\n", xsk, idx, addr);
xsk_ring_cons__release(&xsk->comp, 1);
+
+ ring_buffer__poll(ringbuf, 1000);
+
+ ASSERT_EQ(bpf_obj->bss->pkts_fail_tx, 0, "pkts_fail_tx");
+ ASSERT_GE(xsk->tx_completions, 1, "tx_completions");
+ ASSERT_EQ(xsk->last_sample.timestamp_retval, 0, "timestamp_retval");
+ ASSERT_GE(xsk->last_sample.hw_timestamp, 0, "hw_timestamp");
+ ASSERT_EQ(xsk->last_sample.tx_csum, 0x1c72, "tx_csum");
}
}
@@ -276,8 +293,23 @@ static int verify_xsk_metadata(struct xsk *xsk)
return 0;
}
+static int process_sample(void *ctx, void *data, size_t len)
+{
+ struct devtx_sample *sample = data;
+ struct xsk *xsk = ctx;
+
+ printf("%p: got tx timestamp sample %u %llu\n",
+ xsk, sample->timestamp_retval, sample->hw_timestamp);
+
+ xsk->tx_completions++;
+ xsk->last_sample = *sample;
+
+ return 0;
+}
+
void test_xdp_metadata(void)
{
+ struct ring_buffer *tx_compl_ringbuf = NULL;
struct xdp_metadata2 *bpf_obj2 = NULL;
struct xdp_metadata *bpf_obj = NULL;
struct bpf_program *new_prog, *prog;
@@ -290,6 +322,7 @@ void test_xdp_metadata(void)
int retries = 10;
int rx_ifindex;
int tx_ifindex;
+ int syscall_fd;
int sock_fd;
int ret;
@@ -323,6 +356,14 @@ void test_xdp_metadata(void)
if (!ASSERT_OK_PTR(bpf_obj, "open skeleton"))
goto out;
+ prog = bpf_object__find_program_by_name(bpf_obj->obj, "tx_submit");
+ bpf_program__set_ifindex(prog, tx_ifindex);
+ bpf_program__set_flags(prog, BPF_F_XDP_DEV_BOUND_ONLY);
+
+ prog = bpf_object__find_program_by_name(bpf_obj->obj, "tx_complete");
+ bpf_program__set_ifindex(prog, tx_ifindex);
+ bpf_program__set_flags(prog, BPF_F_XDP_DEV_BOUND_ONLY);
+
prog = bpf_object__find_program_by_name(bpf_obj->obj, "rx");
bpf_program__set_ifindex(prog, rx_ifindex);
bpf_program__set_flags(prog, BPF_F_XDP_DEV_BOUND_ONLY);
@@ -330,6 +371,18 @@ void test_xdp_metadata(void)
if (!ASSERT_OK(xdp_metadata__load(bpf_obj), "load skeleton"))
goto out;
+ bpf_obj->data->ifindex = tx_ifindex;
+ bpf_obj->data->net_cookie = get_net_cookie();
+
+ ret = xdp_metadata__attach(bpf_obj);
+ if (!ASSERT_OK(ret, "xdp_metadata__attach"))
+ goto out;
+
+ tx_compl_ringbuf = ring_buffer__new(bpf_map__fd(bpf_obj->maps.tx_compl_buf),
+ process_sample, &tx_xsk, NULL);
+ if (!ASSERT_OK_PTR(tx_compl_ringbuf, "ring_buffer__new"))
+ goto out;
+
/* Make sure we can't add dev-bound programs to prog maps. */
prog_arr = bpf_object__find_map_by_name(bpf_obj->obj, "prog_arr");
if (!ASSERT_OK_PTR(prog_arr, "no prog_arr map"))
@@ -364,7 +417,8 @@ void test_xdp_metadata(void)
"verify_xsk_metadata"))
goto out;
- complete_tx(&tx_xsk);
+ /* Verify AF_XDP TX packet has completion event with a timestamp. */
+ complete_tx(&tx_xsk, bpf_obj, tx_compl_ringbuf);
/* Make sure freplace correctly picks up original bound device
* and doesn't crash.
@@ -402,5 +456,7 @@ void test_xdp_metadata(void)
xdp_metadata__destroy(bpf_obj);
if (tok)
close_netns(tok);
+ if (tx_compl_ringbuf)
+ ring_buffer__free(tx_compl_ringbuf);
SYS_NOFAIL("ip netns del xdp_metadata");
}
@@ -4,6 +4,11 @@
#include "xdp_metadata.h"
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_endian.h>
+#include <bpf/bpf_tracing.h>
+
+#ifndef ETH_P_IP
+#define ETH_P_IP 0x0800
+#endif
struct {
__uint(type, BPF_MAP_TYPE_XSKMAP);
@@ -19,10 +24,24 @@ struct {
__type(value, __u32);
} prog_arr SEC(".maps");
+struct {
+ __uint(type, BPF_MAP_TYPE_RINGBUF);
+ __uint(max_entries, 4096);
+} tx_compl_buf SEC(".maps");
+
+__u64 pkts_fail_tx = 0;
+
+int ifindex = -1;
+__u64 net_cookie = -1;
+
extern int bpf_xdp_metadata_rx_timestamp(const struct xdp_md *ctx,
__u64 *timestamp) __ksym;
extern int bpf_xdp_metadata_rx_hash(const struct xdp_md *ctx, __u32 *hash,
enum xdp_rss_hash_type *rss_type) __ksym;
+extern int bpf_devtx_request_tx_timestamp(const struct devtx_ctx *ctx) __ksym;
+extern int bpf_devtx_tx_timestamp(const struct devtx_ctx *ctx, __u64 *timestamp) __ksym;
+extern int bpf_devtx_request_l4_csum(const struct devtx_ctx *ctx,
+ u16 csum_start, u16 csum_offset) __ksym;
SEC("xdp")
int rx(struct xdp_md *ctx)
@@ -61,4 +80,126 @@ int rx(struct xdp_md *ctx)
return bpf_redirect_map(&xsk, ctx->rx_queue_index, XDP_PASS);
}
+static inline int verify_frame(const struct sk_buff *skb, const struct skb_shared_info *sinfo)
+{
+ struct ethhdr eth = {};
+
+ /* all the pointers are set up correctly */
+ if (!skb->data)
+ return -1;
+ if (!sinfo)
+ return -1;
+
+ /* can get to the frags */
+ if (sinfo->nr_frags != 0)
+ return -1;
+ if (sinfo->frags[0].bv_page != 0)
+ return -1;
+ if (sinfo->frags[0].bv_len != 0)
+ return -1;
+ if (sinfo->frags[0].bv_offset != 0)
+ return -1;
+
+ /* the data has something that looks like ethernet */
+ if (skb->len != 46)
+ return -1;
+ bpf_probe_read_kernel(ð, sizeof(eth), skb->data);
+
+ if (eth.h_proto != bpf_htons(ETH_P_IP))
+ return -1;
+
+ return 0;
+}
+
+static inline bool my_netdev(const struct devtx_ctx *ctx)
+{
+ static struct net_device *netdev;
+
+ if (netdev)
+ return netdev == ctx->netdev;
+
+ if (ctx->netdev->ifindex != ifindex)
+ return false;
+ if (ctx->netdev->nd_net.net->net_cookie != net_cookie)
+ return false;
+
+ netdev = ctx->netdev;
+ return true;
+}
+
+SEC("fentry/veth_devtx_submit_skb")
+int BPF_PROG(tx_submit, const struct devtx_ctx *devtx, struct sk_buff *skb)
+{
+ int udpoff = sizeof(struct ethhdr) + sizeof(struct iphdr);
+ struct xdp_tx_meta meta = {};
+ int ret;
+
+ if (!my_netdev(devtx))
+ return 0;
+ if (devtx->sinfo->meta_len != TX_META_LEN)
+ return 0;
+
+ bpf_probe_read_kernel(&meta, sizeof(meta), skb->data - TX_META_LEN);
+ if (!meta.request_timestamp)
+ return 0;
+
+ ret = verify_frame(skb, devtx->sinfo);
+ if (ret < 0) {
+ __sync_add_and_fetch(&pkts_fail_tx, 1);
+ return 0;
+ }
+
+ ret = bpf_devtx_request_tx_timestamp(devtx);
+ if (ret < 0) {
+ __sync_add_and_fetch(&pkts_fail_tx, 1);
+ return 0;
+ }
+
+ ret = bpf_devtx_request_l4_csum(devtx, udpoff, udpoff + offsetof(struct udphdr, check));
+ if (ret < 0) {
+ __sync_add_and_fetch(&pkts_fail_tx, 1);
+ return 0;
+ }
+
+ return 0;
+}
+
+SEC("fentry/veth_devtx_complete_skb")
+int BPF_PROG(tx_complete, const struct devtx_ctx *devtx, struct sk_buff *skb)
+{
+ struct xdp_tx_meta meta = {};
+ struct devtx_sample *sample;
+ struct udphdr udph;
+ int ret;
+
+ if (!my_netdev(devtx))
+ return 0;
+ if (devtx->sinfo->meta_len != TX_META_LEN)
+ return 0;
+
+ bpf_probe_read_kernel(&meta, sizeof(meta), skb->data - TX_META_LEN);
+ if (!meta.request_timestamp)
+ return 0;
+
+ ret = verify_frame(skb, devtx->sinfo);
+ if (ret < 0) {
+ __sync_add_and_fetch(&pkts_fail_tx, 1);
+ return 0;
+ }
+
+ sample = bpf_ringbuf_reserve(&tx_compl_buf, sizeof(*sample), 0);
+ if (!sample)
+ return 0;
+
+ bpf_probe_read_kernel(&udph, sizeof(udph),
+ skb->data + sizeof(struct ethhdr) + sizeof(struct iphdr));
+
+ sample->timestamp_retval = bpf_devtx_tx_timestamp(devtx, &sample->hw_timestamp);
+ sample->tx_csum = udph.check;
+
+ bpf_ringbuf_submit(sample, 0);
+
+ return 0;
+}
+
char _license[] SEC("license") = "GPL";
@@ -18,3 +18,19 @@ struct xdp_meta {
__s32 rx_hash_err;
};
};
+
+struct devtx_sample {
+ int timestamp_retval;
+ __u64 hw_timestamp;
+ __u64 sw_complete_timestamp;
+ __u16 tx_csum;
+};
+
+#define TX_META_LEN 8
+
+struct xdp_tx_meta {
+ __u8 request_timestamp;
+ __u8 padding0;
+ __u16 padding1;
+ __u32 padding2;
+};
Attach kfuncs that request and report TX timestamp via ringbuf. Confirm on the userspace side that the program has triggered and the timestamp is non-zero. Also make sure skb has a sensible pointers and data. In addition, calculate pseudo-header checksum and offload payload checksum calculation to the kfunc. Signed-off-by: Stanislav Fomichev <sdf@google.com> --- .../selftests/bpf/prog_tests/xdp_metadata.c | 64 +++++++- .../selftests/bpf/progs/xdp_metadata.c | 141 ++++++++++++++++++ tools/testing/selftests/bpf/xdp_metadata.h | 16 ++ 3 files changed, 217 insertions(+), 4 deletions(-)