Message ID | 20211124091821.3916046-4-boon.leong.ong@intel.com (mailing list archive) |
---|---|
State | Changes Requested |
Delegated to: | BPF |
Headers | show |
Series | samples/bpf: xdpsock app enhancements | expand |
Context | Check | Description |
---|---|---|
netdev/tree_selection | success | Clearly marked for bpf-next |
netdev/fixes_present | success | Fixes tag not required for -next series |
netdev/subject_prefix | success | Link |
netdev/cover_letter | success | Series has a cover letter |
netdev/patch_count | success | Link |
netdev/header_inline | success | No static functions without inline keyword in header files |
netdev/build_32bit | success | Errors and warnings before: 0 this patch: 0 |
netdev/cc_maintainers | success | CCed 16 of 16 maintainers |
netdev/build_clang | success | Errors and warnings before: 0 this patch: 0 |
netdev/module_param | success | Was 0 now: 0 |
netdev/verify_signedoff | success | Signed-off-by tag matches author and committer |
netdev/verify_fixes | success | No Fixes tag |
netdev/build_allmodconfig_warn | success | Errors and warnings before: 0 this patch: 0 |
netdev/checkpatch | warning | WARNING: line length of 95 exceeds 80 columns |
netdev/kdoc | success | Errors and warnings before: 0 this patch: 0 |
netdev/source_inline | success | Was 0 now: 0 |
bpf/vmtest-bpf-next-PR | fail | PR summary |
bpf/vmtest-bpf-next | success | VM_Test |
On Wed, Nov 24, 2021 at 1:22 AM Ong Boon Leong <boon.leong.ong@intel.com> wrote: > > Tx cycle time is in micro-seconds unit. By combining the batch size (-b M) > and Tx cycle time (-T|--tx-cycle N), xdpsock now can transmit batch-size of > packets every N-us periodically. > > For example to transmit 1 packet each 1ms cycle time for total of 2000000 > packets: > > $ xdpsock -i eth0 -T -N -z -T 1000 -b 1 -C 2000000 > > sock0@enp0s29f1:2 txonly xdp-drv > pps pkts 1.00 > rx 0 0 > tx 1000 1996872 > > sock0@enp0s29f1:2 txonly xdp-drv > pps pkts 1.00 > rx 0 0 > tx 1000 1997872 > > sock0@enp0s29f1:2 txonly xdp-drv > pps pkts 1.00 > rx 0 0 > tx 1000 1998872 > > sock0@enp0s29f1:2 txonly xdp-drv > pps pkts 1.00 > rx 0 0 > tx 1000 1999872 > > sock0@enp0s29f1:2 txonly xdp-drv > pps pkts 1.00 > rx 0 0 > tx 128 2000000 > > sock0@enp0s29f1:2 txonly xdp-drv > pps pkts 0.00 > rx 0 0 > tx 0 2000000 > > Signed-off-by: Ong Boon Leong <boon.leong.ong@intel.com> Acked-by: Song Liu <songliubraving@fb.com>
On 24/11/2021 10.18, Ong Boon Leong wrote: > Tx cycle time is in micro-seconds unit. By combining the batch size (-b M) > and Tx cycle time (-T|--tx-cycle N), xdpsock now can transmit batch-size of > packets every N-us periodically. Does this also work for --poll mode (which is a wakeup mode) ? > For example to transmit 1 packet each 1ms cycle time for total of 2000000 > packets: > > $ xdpsock -i eth0 -T -N -z -T 1000 -b 1 -C 2000000 > > sock0@enp0s29f1:2 txonly xdp-drv > pps pkts 1.00 > rx 0 0 > tx 1000 1996872 > > sock0@enp0s29f1:2 txonly xdp-drv > pps pkts 1.00 > rx 0 0 > tx 1000 1997872 > > sock0@enp0s29f1:2 txonly xdp-drv > pps pkts 1.00 > rx 0 0 > tx 1000 1998872 > > sock0@enp0s29f1:2 txonly xdp-drv > pps pkts 1.00 > rx 0 0 > tx 1000 1999872 > > sock0@enp0s29f1:2 txonly xdp-drv > pps pkts 1.00 > rx 0 0 > tx 128 2000000 > > sock0@enp0s29f1:2 txonly xdp-drv > pps pkts 0.00 > rx 0 0 > tx 0 2000000 > > Signed-off-by: Ong Boon Leong <boon.leong.ong@intel.com> > --- > samples/bpf/xdpsock_user.c | 36 +++++++++++++++++++++++++++++++----- > 1 file changed, 31 insertions(+), 5 deletions(-) > > diff --git a/samples/bpf/xdpsock_user.c b/samples/bpf/xdpsock_user.c > index 691f442bbb2..61d4063f11a 100644 > --- a/samples/bpf/xdpsock_user.c > +++ b/samples/bpf/xdpsock_user.c > @@ -111,6 +111,7 @@ static u32 opt_num_xsks = 1; > static u32 prog_id; > static bool opt_busy_poll; > static bool opt_reduced_cap; > +static unsigned long opt_cycle_time; > > struct vlan_ethhdr { > unsigned char h_dest[6]; > @@ -173,6 +174,8 @@ struct xsk_socket_info { > struct xsk_app_stats app_stats; > struct xsk_driver_stats drv_stats; > u32 outstanding_tx; > + unsigned long prev_tx_time; > + unsigned long tx_cycle_time; > }; > > static int num_socks; > @@ -972,6 +975,7 @@ static struct option long_options[] = { > {"tx-vlan-pri", required_argument, 0, 'K'}, > {"tx-dmac", required_argument, 0, 'G'}, > {"tx-smac", required_argument, 0, 'H'}, > + {"tx-cycle", required_argument, 0, 'T'}, > {"extra-stats", no_argument, 0, 'x'}, > {"quiet", no_argument, 0, 'Q'}, > {"app-stats", no_argument, 0, 'a'}, > @@ -1017,6 +1021,7 @@ static void usage(const char *prog) > " -K, --tx-vlan-pri=n Tx VLAN Priority [0-7]. Default: %d (For -V|--tx-vlan)\n" > " -G, --tx-dmac=<MAC> Dest MAC addr of TX frame in aa:bb:cc:dd:ee:ff format (For -V|--tx-vlan)\n" > " -H, --tx-smac=<MAC> Src MAC addr of TX frame in aa:bb:cc:dd:ee:ff format (For -V|--tx-vlan)\n" > + " -T, --tx-cycle=n Tx cycle time in micro-seconds (For -t|--txonly).\n" > " -x, --extra-stats Display extra statistics.\n" > " -Q, --quiet Do not display any stats.\n" > " -a, --app-stats Display application (syscall) statistics.\n" > @@ -1039,7 +1044,7 @@ static void parse_command_line(int argc, char **argv) > opterr = 0; > > for (;;) { > - c = getopt_long(argc, argv, "Frtli:q:pSNn:czf:muMd:b:C:s:P:VJ:K:G:H:xQaI:BR", > + c = getopt_long(argc, argv, "Frtli:q:pSNn:czf:muMd:b:C:s:P:VJ:K:G:H:T:xQaI:BR", > long_options, &option_index); > if (c == -1) > break; > @@ -1145,6 +1150,10 @@ static void parse_command_line(int argc, char **argv) > usage(basename(argv[0])); > } > break; > + case 'T': > + opt_cycle_time = atoi(optarg); > + opt_cycle_time *= 1000; Converting to nanosec, right(?). > + break; > case 'x': > opt_extra_stats = 1; > break; > @@ -1350,16 +1359,25 @@ static void rx_drop_all(void) > } > } > > -static void tx_only(struct xsk_socket_info *xsk, u32 *frame_nb, int batch_size) > +static int tx_only(struct xsk_socket_info *xsk, u32 *frame_nb, int batch_size) > { > u32 idx; > unsigned int i; > > + if (xsk->tx_cycle_time) { > + unsigned long now = get_nsecs(); > + > + if ((now - xsk->prev_tx_time) < xsk->tx_cycle_time) > + return 0; So, this test is actively spinning until the time is reached, spending 100% CPU time on this. I guess we can have this as a test for most accurate transmit (cyclic period) with AF_XDP. Do you have a use-case for this? I have a customer use-case, but my customer don't want to actively spin. My plan is to use clock_nanosleep() and wakeup slightly before the target time and then we can spin shortly for the Tx time slot. I will need to code this up for the customer soon anyway... perhaps we can extend your code with this idea? I have coded the period cycle Tx with UDP packets, here[1], if you like to see some code using clock_nanosleep(). Next step (for me) is doing this for AF_XDP (likely in my example[2]. [1] https://github.com/netoptimizer/network-testing/blob/master/src/udp_pacer.c [2] https://github.com/xdp-project/bpf-examples/tree/master/AF_XDP-interaction > + > + xsk->prev_tx_time = now; Would it be valuable to know how-much we shoot "over" the tx_cycle_time? For my use-case, I will be monitoring the other-side receiving the packets (and using HW RX-time) to evaluate how accurate my sender is. In this case, I would like to know if my software "knew" if was not 100% accurate. > + } > + > while (xsk_ring_prod__reserve(&xsk->tx, batch_size, &idx) < > batch_size) { > complete_tx_only(xsk, batch_size); > if (benchmark_done) > - return; > + return 0; > } I wonder if this step can introduce jitter/delay before the actual Tx happens? I mean, the real transmit cannot happen before xsk_ring_prod__submit() is called. If the cycles spend are exactly the same, it doesn't matter if you tx_cycle_time timestamp is done above. Here you have a potential call to complete_tx_only(), which can introduce variance for your period. I will suggest moving the TX completion handling, so it doesn't interfere with accurate TX. > > for (i = 0; i < batch_size; i++) { > @@ -1375,6 +1393,8 @@ static void tx_only(struct xsk_socket_info *xsk, u32 *frame_nb, int batch_size) > *frame_nb += batch_size; > *frame_nb %= NUM_FRAMES; > complete_tx_only(xsk, batch_size); > + > + return batch_size; > } > > static inline int get_batch_size(int pkt_cnt) > @@ -1407,6 +1427,7 @@ static void complete_tx_only_all(void) > static void tx_only_all(void) > { > struct pollfd fds[MAX_SOCKS] = {}; > + unsigned long now = get_nsecs(); > u32 frame_nb[MAX_SOCKS] = {}; > int pkt_cnt = 0; > int i, ret; > @@ -1414,10 +1435,15 @@ static void tx_only_all(void) > for (i = 0; i < num_socks; i++) { > fds[0].fd = xsk_socket__fd(xsks[i]->xsk); > fds[0].events = POLLOUT; > + if (opt_cycle_time) { > + xsks[i]->prev_tx_time = now; > + xsks[i]->tx_cycle_time = opt_cycle_time; > + } > } > > while ((opt_pkt_count && pkt_cnt < opt_pkt_count) || !opt_pkt_count) { > int batch_size = get_batch_size(pkt_cnt); > + int tx_cnt = 0; > > if (opt_poll) { > for (i = 0; i < num_socks; i++) > @@ -1431,9 +1457,9 @@ static void tx_only_all(void) > } > > for (i = 0; i < num_socks; i++) > - tx_only(xsks[i], &frame_nb[i], batch_size); > + tx_cnt += tx_only(xsks[i], &frame_nb[i], batch_size); > > - pkt_cnt += batch_size; > + pkt_cnt += tx_cnt; > > if (benchmark_done) > break; >
diff --git a/samples/bpf/xdpsock_user.c b/samples/bpf/xdpsock_user.c index 691f442bbb2..61d4063f11a 100644 --- a/samples/bpf/xdpsock_user.c +++ b/samples/bpf/xdpsock_user.c @@ -111,6 +111,7 @@ static u32 opt_num_xsks = 1; static u32 prog_id; static bool opt_busy_poll; static bool opt_reduced_cap; +static unsigned long opt_cycle_time; struct vlan_ethhdr { unsigned char h_dest[6]; @@ -173,6 +174,8 @@ struct xsk_socket_info { struct xsk_app_stats app_stats; struct xsk_driver_stats drv_stats; u32 outstanding_tx; + unsigned long prev_tx_time; + unsigned long tx_cycle_time; }; static int num_socks; @@ -972,6 +975,7 @@ static struct option long_options[] = { {"tx-vlan-pri", required_argument, 0, 'K'}, {"tx-dmac", required_argument, 0, 'G'}, {"tx-smac", required_argument, 0, 'H'}, + {"tx-cycle", required_argument, 0, 'T'}, {"extra-stats", no_argument, 0, 'x'}, {"quiet", no_argument, 0, 'Q'}, {"app-stats", no_argument, 0, 'a'}, @@ -1017,6 +1021,7 @@ static void usage(const char *prog) " -K, --tx-vlan-pri=n Tx VLAN Priority [0-7]. Default: %d (For -V|--tx-vlan)\n" " -G, --tx-dmac=<MAC> Dest MAC addr of TX frame in aa:bb:cc:dd:ee:ff format (For -V|--tx-vlan)\n" " -H, --tx-smac=<MAC> Src MAC addr of TX frame in aa:bb:cc:dd:ee:ff format (For -V|--tx-vlan)\n" + " -T, --tx-cycle=n Tx cycle time in micro-seconds (For -t|--txonly).\n" " -x, --extra-stats Display extra statistics.\n" " -Q, --quiet Do not display any stats.\n" " -a, --app-stats Display application (syscall) statistics.\n" @@ -1039,7 +1044,7 @@ static void parse_command_line(int argc, char **argv) opterr = 0; for (;;) { - c = getopt_long(argc, argv, "Frtli:q:pSNn:czf:muMd:b:C:s:P:VJ:K:G:H:xQaI:BR", + c = getopt_long(argc, argv, "Frtli:q:pSNn:czf:muMd:b:C:s:P:VJ:K:G:H:T:xQaI:BR", long_options, &option_index); if (c == -1) break; @@ -1145,6 +1150,10 @@ static void parse_command_line(int argc, char **argv) usage(basename(argv[0])); } break; + case 'T': + opt_cycle_time = atoi(optarg); + opt_cycle_time *= 1000; + break; case 'x': opt_extra_stats = 1; break; @@ -1350,16 +1359,25 @@ static void rx_drop_all(void) } } -static void tx_only(struct xsk_socket_info *xsk, u32 *frame_nb, int batch_size) +static int tx_only(struct xsk_socket_info *xsk, u32 *frame_nb, int batch_size) { u32 idx; unsigned int i; + if (xsk->tx_cycle_time) { + unsigned long now = get_nsecs(); + + if ((now - xsk->prev_tx_time) < xsk->tx_cycle_time) + return 0; + + xsk->prev_tx_time = now; + } + while (xsk_ring_prod__reserve(&xsk->tx, batch_size, &idx) < batch_size) { complete_tx_only(xsk, batch_size); if (benchmark_done) - return; + return 0; } for (i = 0; i < batch_size; i++) { @@ -1375,6 +1393,8 @@ static void tx_only(struct xsk_socket_info *xsk, u32 *frame_nb, int batch_size) *frame_nb += batch_size; *frame_nb %= NUM_FRAMES; complete_tx_only(xsk, batch_size); + + return batch_size; } static inline int get_batch_size(int pkt_cnt) @@ -1407,6 +1427,7 @@ static void complete_tx_only_all(void) static void tx_only_all(void) { struct pollfd fds[MAX_SOCKS] = {}; + unsigned long now = get_nsecs(); u32 frame_nb[MAX_SOCKS] = {}; int pkt_cnt = 0; int i, ret; @@ -1414,10 +1435,15 @@ static void tx_only_all(void) for (i = 0; i < num_socks; i++) { fds[0].fd = xsk_socket__fd(xsks[i]->xsk); fds[0].events = POLLOUT; + if (opt_cycle_time) { + xsks[i]->prev_tx_time = now; + xsks[i]->tx_cycle_time = opt_cycle_time; + } } while ((opt_pkt_count && pkt_cnt < opt_pkt_count) || !opt_pkt_count) { int batch_size = get_batch_size(pkt_cnt); + int tx_cnt = 0; if (opt_poll) { for (i = 0; i < num_socks; i++) @@ -1431,9 +1457,9 @@ static void tx_only_all(void) } for (i = 0; i < num_socks; i++) - tx_only(xsks[i], &frame_nb[i], batch_size); + tx_cnt += tx_only(xsks[i], &frame_nb[i], batch_size); - pkt_cnt += batch_size; + pkt_cnt += tx_cnt; if (benchmark_done) break;
Tx cycle time is in micro-seconds unit. By combining the batch size (-b M) and Tx cycle time (-T|--tx-cycle N), xdpsock now can transmit batch-size of packets every N-us periodically. For example to transmit 1 packet each 1ms cycle time for total of 2000000 packets: $ xdpsock -i eth0 -T -N -z -T 1000 -b 1 -C 2000000 sock0@enp0s29f1:2 txonly xdp-drv pps pkts 1.00 rx 0 0 tx 1000 1996872 sock0@enp0s29f1:2 txonly xdp-drv pps pkts 1.00 rx 0 0 tx 1000 1997872 sock0@enp0s29f1:2 txonly xdp-drv pps pkts 1.00 rx 0 0 tx 1000 1998872 sock0@enp0s29f1:2 txonly xdp-drv pps pkts 1.00 rx 0 0 tx 1000 1999872 sock0@enp0s29f1:2 txonly xdp-drv pps pkts 1.00 rx 0 0 tx 128 2000000 sock0@enp0s29f1:2 txonly xdp-drv pps pkts 0.00 rx 0 0 tx 0 2000000 Signed-off-by: Ong Boon Leong <boon.leong.ong@intel.com> --- samples/bpf/xdpsock_user.c | 36 +++++++++++++++++++++++++++++++----- 1 file changed, 31 insertions(+), 5 deletions(-)