diff mbox series

[bpf-next,3/4] samples/bpf: xdpsock: add period cycle time to Tx operation

Message ID 20211124091821.3916046-4-boon.leong.ong@intel.com (mailing list archive)
State Changes Requested
Delegated to: BPF
Headers show
Series samples/bpf: xdpsock app enhancements | expand

Checks

Context Check Description
netdev/tree_selection success Clearly marked for bpf-next
netdev/fixes_present success Fixes tag not required for -next series
netdev/subject_prefix success Link
netdev/cover_letter success Series has a cover letter
netdev/patch_count success Link
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 0 this patch: 0
netdev/cc_maintainers success CCed 16 of 16 maintainers
netdev/build_clang success Errors and warnings before: 0 this patch: 0
netdev/module_param success Was 0 now: 0
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 0 this patch: 0
netdev/checkpatch warning WARNING: line length of 95 exceeds 80 columns
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0
bpf/vmtest-bpf-next-PR fail PR summary
bpf/vmtest-bpf-next success VM_Test

Commit Message

Ong Boon Leong Nov. 24, 2021, 9:18 a.m. UTC
Tx cycle time is in micro-seconds unit. By combining the batch size (-b M)
and Tx cycle time (-T|--tx-cycle N), xdpsock now can transmit batch-size of
packets every N-us periodically.

For example to transmit 1 packet each 1ms cycle time for total of 2000000
packets:

 $ xdpsock -i eth0 -T -N -z -T 1000 -b 1 -C 2000000

 sock0@enp0s29f1:2 txonly xdp-drv
                   pps            pkts           1.00
rx                 0              0
tx                 1000           1996872

 sock0@enp0s29f1:2 txonly xdp-drv
                   pps            pkts           1.00
rx                 0              0
tx                 1000           1997872

 sock0@enp0s29f1:2 txonly xdp-drv
                   pps            pkts           1.00
rx                 0              0
tx                 1000           1998872

 sock0@enp0s29f1:2 txonly xdp-drv
                   pps            pkts           1.00
rx                 0              0
tx                 1000           1999872

 sock0@enp0s29f1:2 txonly xdp-drv
                   pps            pkts           1.00
rx                 0              0
tx                 128            2000000

 sock0@enp0s29f1:2 txonly xdp-drv
                   pps            pkts           0.00
rx                 0              0
tx                 0              2000000

Signed-off-by: Ong Boon Leong <boon.leong.ong@intel.com>
---
 samples/bpf/xdpsock_user.c | 36 +++++++++++++++++++++++++++++++-----
 1 file changed, 31 insertions(+), 5 deletions(-)

Comments

Song Liu Nov. 27, 2021, 6:52 a.m. UTC | #1
On Wed, Nov 24, 2021 at 1:22 AM Ong Boon Leong <boon.leong.ong@intel.com> wrote:
>
> Tx cycle time is in micro-seconds unit. By combining the batch size (-b M)
> and Tx cycle time (-T|--tx-cycle N), xdpsock now can transmit batch-size of
> packets every N-us periodically.
>
> For example to transmit 1 packet each 1ms cycle time for total of 2000000
> packets:
>
>  $ xdpsock -i eth0 -T -N -z -T 1000 -b 1 -C 2000000
>
>  sock0@enp0s29f1:2 txonly xdp-drv
>                    pps            pkts           1.00
> rx                 0              0
> tx                 1000           1996872
>
>  sock0@enp0s29f1:2 txonly xdp-drv
>                    pps            pkts           1.00
> rx                 0              0
> tx                 1000           1997872
>
>  sock0@enp0s29f1:2 txonly xdp-drv
>                    pps            pkts           1.00
> rx                 0              0
> tx                 1000           1998872
>
>  sock0@enp0s29f1:2 txonly xdp-drv
>                    pps            pkts           1.00
> rx                 0              0
> tx                 1000           1999872
>
>  sock0@enp0s29f1:2 txonly xdp-drv
>                    pps            pkts           1.00
> rx                 0              0
> tx                 128            2000000
>
>  sock0@enp0s29f1:2 txonly xdp-drv
>                    pps            pkts           0.00
> rx                 0              0
> tx                 0              2000000
>
> Signed-off-by: Ong Boon Leong <boon.leong.ong@intel.com>

Acked-by: Song Liu <songliubraving@fb.com>
Jesper Dangaard Brouer Nov. 27, 2021, 10:41 a.m. UTC | #2
On 24/11/2021 10.18, Ong Boon Leong wrote:
> Tx cycle time is in micro-seconds unit. By combining the batch size (-b M)
> and Tx cycle time (-T|--tx-cycle N), xdpsock now can transmit batch-size of
> packets every N-us periodically.

Does this also work for --poll mode (which is a wakeup mode) ?

> For example to transmit 1 packet each 1ms cycle time for total of 2000000
> packets:
> 
>   $ xdpsock -i eth0 -T -N -z -T 1000 -b 1 -C 2000000
> 
>   sock0@enp0s29f1:2 txonly xdp-drv
>                     pps            pkts           1.00
> rx                 0              0
> tx                 1000           1996872
> 
>   sock0@enp0s29f1:2 txonly xdp-drv
>                     pps            pkts           1.00
> rx                 0              0
> tx                 1000           1997872
> 
>   sock0@enp0s29f1:2 txonly xdp-drv
>                     pps            pkts           1.00
> rx                 0              0
> tx                 1000           1998872
> 
>   sock0@enp0s29f1:2 txonly xdp-drv
>                     pps            pkts           1.00
> rx                 0              0
> tx                 1000           1999872
> 
>   sock0@enp0s29f1:2 txonly xdp-drv
>                     pps            pkts           1.00
> rx                 0              0
> tx                 128            2000000
> 
>   sock0@enp0s29f1:2 txonly xdp-drv
>                     pps            pkts           0.00
> rx                 0              0
> tx                 0              2000000
> 
> Signed-off-by: Ong Boon Leong <boon.leong.ong@intel.com>
> ---
>   samples/bpf/xdpsock_user.c | 36 +++++++++++++++++++++++++++++++-----
>   1 file changed, 31 insertions(+), 5 deletions(-)
> 
> diff --git a/samples/bpf/xdpsock_user.c b/samples/bpf/xdpsock_user.c
> index 691f442bbb2..61d4063f11a 100644
> --- a/samples/bpf/xdpsock_user.c
> +++ b/samples/bpf/xdpsock_user.c
> @@ -111,6 +111,7 @@ static u32 opt_num_xsks = 1;
>   static u32 prog_id;
>   static bool opt_busy_poll;
>   static bool opt_reduced_cap;
> +static unsigned long opt_cycle_time;
>   
>   struct vlan_ethhdr {
>   	unsigned char h_dest[6];
> @@ -173,6 +174,8 @@ struct xsk_socket_info {
>   	struct xsk_app_stats app_stats;
>   	struct xsk_driver_stats drv_stats;
>   	u32 outstanding_tx;
> +	unsigned long prev_tx_time;
> +	unsigned long tx_cycle_time;
>   };
>   
>   static int num_socks;
> @@ -972,6 +975,7 @@ static struct option long_options[] = {
>   	{"tx-vlan-pri", required_argument, 0, 'K'},
>   	{"tx-dmac", required_argument, 0, 'G'},
>   	{"tx-smac", required_argument, 0, 'H'},
> +	{"tx-cycle", required_argument, 0, 'T'},
>   	{"extra-stats", no_argument, 0, 'x'},
>   	{"quiet", no_argument, 0, 'Q'},
>   	{"app-stats", no_argument, 0, 'a'},
> @@ -1017,6 +1021,7 @@ static void usage(const char *prog)
>   		"  -K, --tx-vlan-pri=n  Tx VLAN Priority [0-7]. Default: %d (For -V|--tx-vlan)\n"
>   		"  -G, --tx-dmac=<MAC>  Dest MAC addr of TX frame in aa:bb:cc:dd:ee:ff format (For -V|--tx-vlan)\n"
>   		"  -H, --tx-smac=<MAC>  Src MAC addr of TX frame in aa:bb:cc:dd:ee:ff format (For -V|--tx-vlan)\n"
> +		"  -T, --tx-cycle=n     Tx cycle time in micro-seconds (For -t|--txonly).\n"
>   		"  -x, --extra-stats	Display extra statistics.\n"
>   		"  -Q, --quiet          Do not display any stats.\n"
>   		"  -a, --app-stats	Display application (syscall) statistics.\n"
> @@ -1039,7 +1044,7 @@ static void parse_command_line(int argc, char **argv)
>   	opterr = 0;
>   
>   	for (;;) {
> -		c = getopt_long(argc, argv, "Frtli:q:pSNn:czf:muMd:b:C:s:P:VJ:K:G:H:xQaI:BR",
> +		c = getopt_long(argc, argv, "Frtli:q:pSNn:czf:muMd:b:C:s:P:VJ:K:G:H:T:xQaI:BR",
>   				long_options, &option_index);
>   		if (c == -1)
>   			break;
> @@ -1145,6 +1150,10 @@ static void parse_command_line(int argc, char **argv)
>   				usage(basename(argv[0]));
>   			}
>   			break;
> +		case 'T':
> +			opt_cycle_time = atoi(optarg);
> +			opt_cycle_time *= 1000;

Converting to nanosec, right(?).

> +			break;
>   		case 'x':
>   			opt_extra_stats = 1;
>   			break;
> @@ -1350,16 +1359,25 @@ static void rx_drop_all(void)
>   	}
>   }
>   
> -static void tx_only(struct xsk_socket_info *xsk, u32 *frame_nb, int batch_size)
> +static int tx_only(struct xsk_socket_info *xsk, u32 *frame_nb, int batch_size)
>   {
>   	u32 idx;
>   	unsigned int i;
>   
> +	if (xsk->tx_cycle_time) {
> +		unsigned long now = get_nsecs();
> +
> +		if ((now - xsk->prev_tx_time) < xsk->tx_cycle_time)
> +			return 0;

So, this test is actively spinning until the time is reached, spending 
100% CPU time on this. I guess we can have this as a test for most 
accurate transmit (cyclic period) with AF_XDP.

Do you have a use-case for this?

I have a customer use-case, but my customer don't want to actively spin.
My plan is to use clock_nanosleep() and wakeup slightly before the 
target time and then we can spin shortly for the Tx time slot.

I will need to code this up for the customer soon anyway... perhaps we 
can extend your code with this idea?

I have coded the period cycle Tx with UDP packets, here[1], if you like 
to see some code using clock_nanosleep().  Next step (for me) is doing 
this for AF_XDP (likely in my example[2].

[1] 
https://github.com/netoptimizer/network-testing/blob/master/src/udp_pacer.c

[2] 
https://github.com/xdp-project/bpf-examples/tree/master/AF_XDP-interaction

> +
> +		xsk->prev_tx_time = now;

Would it be valuable to know how-much we shoot "over" the tx_cycle_time?

For my use-case, I will be monitoring the other-side receiving the 
packets (and using HW RX-time) to evaluate how accurate my sender is. In 
this case, I would like to know if my software "knew" if was not 100% 
accurate.


> +	}
> +
>   	while (xsk_ring_prod__reserve(&xsk->tx, batch_size, &idx) <
>   				      batch_size) {
>   		complete_tx_only(xsk, batch_size);
>   		if (benchmark_done)
> -			return;
> +			return 0;
>   	}

I wonder if this step can introduce jitter/delay before the actual Tx 
happens?

I mean, the real transmit cannot happen before xsk_ring_prod__submit() 
is called.  If the cycles spend are exactly the same, it doesn't matter 
if you tx_cycle_time timestamp is done above.
Here you have a potential call to complete_tx_only(), which can 
introduce variance for your period.

I will suggest moving the TX completion handling, so it doesn't 
interfere with accurate TX.

>   
>   	for (i = 0; i < batch_size; i++) {
> @@ -1375,6 +1393,8 @@ static void tx_only(struct xsk_socket_info *xsk, u32 *frame_nb, int batch_size)
>   	*frame_nb += batch_size;
>   	*frame_nb %= NUM_FRAMES;
>   	complete_tx_only(xsk, batch_size);
> +
> +	return batch_size;
>   }
>   
>   static inline int get_batch_size(int pkt_cnt)
> @@ -1407,6 +1427,7 @@ static void complete_tx_only_all(void)
>   static void tx_only_all(void)
>   {
>   	struct pollfd fds[MAX_SOCKS] = {};
> +	unsigned long now = get_nsecs();
>   	u32 frame_nb[MAX_SOCKS] = {};
>   	int pkt_cnt = 0;
>   	int i, ret;
> @@ -1414,10 +1435,15 @@ static void tx_only_all(void)
>   	for (i = 0; i < num_socks; i++) {
>   		fds[0].fd = xsk_socket__fd(xsks[i]->xsk);
>   		fds[0].events = POLLOUT;
> +		if (opt_cycle_time) {
> +			xsks[i]->prev_tx_time = now;
> +			xsks[i]->tx_cycle_time = opt_cycle_time;
> +		}
>   	}
>   
>   	while ((opt_pkt_count && pkt_cnt < opt_pkt_count) || !opt_pkt_count) {
>   		int batch_size = get_batch_size(pkt_cnt);
> +		int tx_cnt = 0;
>   
>   		if (opt_poll) {
>   			for (i = 0; i < num_socks; i++)
> @@ -1431,9 +1457,9 @@ static void tx_only_all(void)
>   		}
>   
>   		for (i = 0; i < num_socks; i++)
> -			tx_only(xsks[i], &frame_nb[i], batch_size);
> +			tx_cnt += tx_only(xsks[i], &frame_nb[i], batch_size);
>   
> -		pkt_cnt += batch_size;
> +		pkt_cnt += tx_cnt;
>   
>   		if (benchmark_done)
>   			break;
>
diff mbox series

Patch

diff --git a/samples/bpf/xdpsock_user.c b/samples/bpf/xdpsock_user.c
index 691f442bbb2..61d4063f11a 100644
--- a/samples/bpf/xdpsock_user.c
+++ b/samples/bpf/xdpsock_user.c
@@ -111,6 +111,7 @@  static u32 opt_num_xsks = 1;
 static u32 prog_id;
 static bool opt_busy_poll;
 static bool opt_reduced_cap;
+static unsigned long opt_cycle_time;
 
 struct vlan_ethhdr {
 	unsigned char h_dest[6];
@@ -173,6 +174,8 @@  struct xsk_socket_info {
 	struct xsk_app_stats app_stats;
 	struct xsk_driver_stats drv_stats;
 	u32 outstanding_tx;
+	unsigned long prev_tx_time;
+	unsigned long tx_cycle_time;
 };
 
 static int num_socks;
@@ -972,6 +975,7 @@  static struct option long_options[] = {
 	{"tx-vlan-pri", required_argument, 0, 'K'},
 	{"tx-dmac", required_argument, 0, 'G'},
 	{"tx-smac", required_argument, 0, 'H'},
+	{"tx-cycle", required_argument, 0, 'T'},
 	{"extra-stats", no_argument, 0, 'x'},
 	{"quiet", no_argument, 0, 'Q'},
 	{"app-stats", no_argument, 0, 'a'},
@@ -1017,6 +1021,7 @@  static void usage(const char *prog)
 		"  -K, --tx-vlan-pri=n  Tx VLAN Priority [0-7]. Default: %d (For -V|--tx-vlan)\n"
 		"  -G, --tx-dmac=<MAC>  Dest MAC addr of TX frame in aa:bb:cc:dd:ee:ff format (For -V|--tx-vlan)\n"
 		"  -H, --tx-smac=<MAC>  Src MAC addr of TX frame in aa:bb:cc:dd:ee:ff format (For -V|--tx-vlan)\n"
+		"  -T, --tx-cycle=n     Tx cycle time in micro-seconds (For -t|--txonly).\n"
 		"  -x, --extra-stats	Display extra statistics.\n"
 		"  -Q, --quiet          Do not display any stats.\n"
 		"  -a, --app-stats	Display application (syscall) statistics.\n"
@@ -1039,7 +1044,7 @@  static void parse_command_line(int argc, char **argv)
 	opterr = 0;
 
 	for (;;) {
-		c = getopt_long(argc, argv, "Frtli:q:pSNn:czf:muMd:b:C:s:P:VJ:K:G:H:xQaI:BR",
+		c = getopt_long(argc, argv, "Frtli:q:pSNn:czf:muMd:b:C:s:P:VJ:K:G:H:T:xQaI:BR",
 				long_options, &option_index);
 		if (c == -1)
 			break;
@@ -1145,6 +1150,10 @@  static void parse_command_line(int argc, char **argv)
 				usage(basename(argv[0]));
 			}
 			break;
+		case 'T':
+			opt_cycle_time = atoi(optarg);
+			opt_cycle_time *= 1000;
+			break;
 		case 'x':
 			opt_extra_stats = 1;
 			break;
@@ -1350,16 +1359,25 @@  static void rx_drop_all(void)
 	}
 }
 
-static void tx_only(struct xsk_socket_info *xsk, u32 *frame_nb, int batch_size)
+static int tx_only(struct xsk_socket_info *xsk, u32 *frame_nb, int batch_size)
 {
 	u32 idx;
 	unsigned int i;
 
+	if (xsk->tx_cycle_time) {
+		unsigned long now = get_nsecs();
+
+		if ((now - xsk->prev_tx_time) < xsk->tx_cycle_time)
+			return 0;
+
+		xsk->prev_tx_time = now;
+	}
+
 	while (xsk_ring_prod__reserve(&xsk->tx, batch_size, &idx) <
 				      batch_size) {
 		complete_tx_only(xsk, batch_size);
 		if (benchmark_done)
-			return;
+			return 0;
 	}
 
 	for (i = 0; i < batch_size; i++) {
@@ -1375,6 +1393,8 @@  static void tx_only(struct xsk_socket_info *xsk, u32 *frame_nb, int batch_size)
 	*frame_nb += batch_size;
 	*frame_nb %= NUM_FRAMES;
 	complete_tx_only(xsk, batch_size);
+
+	return batch_size;
 }
 
 static inline int get_batch_size(int pkt_cnt)
@@ -1407,6 +1427,7 @@  static void complete_tx_only_all(void)
 static void tx_only_all(void)
 {
 	struct pollfd fds[MAX_SOCKS] = {};
+	unsigned long now = get_nsecs();
 	u32 frame_nb[MAX_SOCKS] = {};
 	int pkt_cnt = 0;
 	int i, ret;
@@ -1414,10 +1435,15 @@  static void tx_only_all(void)
 	for (i = 0; i < num_socks; i++) {
 		fds[0].fd = xsk_socket__fd(xsks[i]->xsk);
 		fds[0].events = POLLOUT;
+		if (opt_cycle_time) {
+			xsks[i]->prev_tx_time = now;
+			xsks[i]->tx_cycle_time = opt_cycle_time;
+		}
 	}
 
 	while ((opt_pkt_count && pkt_cnt < opt_pkt_count) || !opt_pkt_count) {
 		int batch_size = get_batch_size(pkt_cnt);
+		int tx_cnt = 0;
 
 		if (opt_poll) {
 			for (i = 0; i < num_socks; i++)
@@ -1431,9 +1457,9 @@  static void tx_only_all(void)
 		}
 
 		for (i = 0; i < num_socks; i++)
-			tx_only(xsks[i], &frame_nb[i], batch_size);
+			tx_cnt += tx_only(xsks[i], &frame_nb[i], batch_size);
 
-		pkt_cnt += batch_size;
+		pkt_cnt += tx_cnt;
 
 		if (benchmark_done)
 			break;