diff mbox series

[bpf-next,2/4] selftests/bpf: Drop python client/server in favor of threads

Message ID 160384963313.698509.13129692731727238158.stgit@localhost.localdomain (mailing list archive)
State Superseded
Delegated to: BPF
Headers show
Series selftests/bpf: Migrate test_tcpbpf_user to be a part of test_progs framework | expand

Commit Message

Alexander Duyck Oct. 28, 2020, 1:47 a.m. UTC
From: Alexander Duyck <alexanderduyck@fb.com>

Drop the tcp_client/server.py files in favor of using a client and server
thread within the test case. Specifically we spawn a new thread to play the
role of the server, and the main testing thread plays the role of client.

Doing this we are able to reduce overhead since we don't have two python
workers possibly floating around. In addition we don't have to worry about
synchronization issues and as such the retry loop waiting for the threads
to close the sockets can be dropped as we will have already closed the
sockets in the local executable and synchronized the server thread.

Signed-off-by: Alexander Duyck <alexanderduyck@fb.com>
---
 .../testing/selftests/bpf/prog_tests/tcpbpf_user.c |  125 +++++++++++++++++---
 tools/testing/selftests/bpf/tcp_client.py          |   50 --------
 tools/testing/selftests/bpf/tcp_server.py          |   80 -------------
 3 files changed, 107 insertions(+), 148 deletions(-)
 delete mode 100755 tools/testing/selftests/bpf/tcp_client.py
 delete mode 100755 tools/testing/selftests/bpf/tcp_server.py

Comments

Martin KaFai Lau Oct. 29, 2020, 1:51 a.m. UTC | #1
On Tue, Oct 27, 2020 at 06:47:13PM -0700, Alexander Duyck wrote:
> From: Alexander Duyck <alexanderduyck@fb.com>
> 
> Drop the tcp_client/server.py files in favor of using a client and server
> thread within the test case. Specifically we spawn a new thread to play the
> role of the server, and the main testing thread plays the role of client.
> 
> Doing this we are able to reduce overhead since we don't have two python
> workers possibly floating around. In addition we don't have to worry about
> synchronization issues and as such the retry loop waiting for the threads
> to close the sockets can be dropped as we will have already closed the
> sockets in the local executable and synchronized the server thread.
Thanks for working on this.

> 
> Signed-off-by: Alexander Duyck <alexanderduyck@fb.com>
> ---
>  .../testing/selftests/bpf/prog_tests/tcpbpf_user.c |  125 +++++++++++++++++---
>  tools/testing/selftests/bpf/tcp_client.py          |   50 --------
>  tools/testing/selftests/bpf/tcp_server.py          |   80 -------------
>  3 files changed, 107 insertions(+), 148 deletions(-)
>  delete mode 100755 tools/testing/selftests/bpf/tcp_client.py
>  delete mode 100755 tools/testing/selftests/bpf/tcp_server.py
> 
> diff --git a/tools/testing/selftests/bpf/prog_tests/tcpbpf_user.c b/tools/testing/selftests/bpf/prog_tests/tcpbpf_user.c
> index 5becab8b04e3..71ab82e37eb7 100644
> --- a/tools/testing/selftests/bpf/prog_tests/tcpbpf_user.c
> +++ b/tools/testing/selftests/bpf/prog_tests/tcpbpf_user.c
> @@ -1,14 +1,65 @@
>  // SPDX-License-Identifier: GPL-2.0
>  #include <inttypes.h>
>  #include <test_progs.h>
> +#include <network_helpers.h>
>  
>  #include "test_tcpbpf.h"
>  #include "cgroup_helpers.h"
>  
> +#define LO_ADDR6 "::1"
>  #define CG_NAME "/tcpbpf-user-test"
>  
> -/* 3 comes from one listening socket + both ends of the connection */
> -#define EXPECTED_CLOSE_EVENTS		3
> +static pthread_mutex_t server_started_mtx = PTHREAD_MUTEX_INITIALIZER;
> +static pthread_cond_t server_started = PTHREAD_COND_INITIALIZER;
> +
> +static void *server_thread(void *arg)
> +{
> +	struct sockaddr_storage addr;
> +	socklen_t len = sizeof(addr);
> +	int fd = *(int *)arg;
> +	char buf[1000];
> +	int client_fd;
> +	int err = 0;
> +	int i;
> +
> +	err = listen(fd, 1);
This is not needed.  start_server() has done it.

> +
> +	pthread_mutex_lock(&server_started_mtx);
> +	pthread_cond_signal(&server_started);
> +	pthread_mutex_unlock(&server_started_mtx);
> +
> +	if (err < 0) {
> +		perror("Failed to listen on socket");
> +		err = errno;
> +		goto err;
> +	}
> +
> +	client_fd = accept(fd, (struct sockaddr *)&addr, &len);
> +	if (client_fd < 0) {
> +		perror("Failed to accept client");
> +		err = errno;
> +		goto err;
> +	}
> +
> +	if (recv(client_fd, buf, 1000, 0) < 1000) {
> +		perror("failed/partial recv");
> +		err = errno;
> +		goto out_clean;
> +	}
> +
> +	for (i = 0; i < 500; i++)
> +		buf[i] = '.';
> +
> +	if (send(client_fd, buf, 500, 0) < 500) {
> +		perror("failed/partial send");
> +		err = errno;
> +		goto out_clean;
> +	}
> +out_clean:
> +	close(client_fd);
> +err:
> +	return (void *)(long)err;
> +}
>  
>  #define EXPECT_EQ(expected, actual, fmt)			\
>  	do {							\
> @@ -43,7 +94,9 @@ int verify_result(const struct tcpbpf_globals *result)
>  	EXPECT_EQ(0x80, result->bad_cb_test_rv, PRIu32);
>  	EXPECT_EQ(0, result->good_cb_test_rv, PRIu32);
>  	EXPECT_EQ(1, result->num_listen, PRIu32);
> -	EXPECT_EQ(EXPECTED_CLOSE_EVENTS, result->num_close_events, PRIu32);
> +
> +	/* 3 comes from one listening socket + both ends of the connection */
> +	EXPECT_EQ(3, result->num_close_events, PRIu32);
>  
>  	return ret;
>  }
> @@ -67,6 +120,52 @@ int verify_sockopt_result(int sock_map_fd)
>  	return ret;
>  }
>  
> +static int run_test(void)
> +{
> +	int server_fd, client_fd;
> +	void *server_err;
> +	char buf[1000];
> +	pthread_t tid;
> +	int err = -1;
> +	int i;
> +
> +	server_fd = start_server(AF_INET6, SOCK_STREAM, LO_ADDR6, 0, 0);
> +	if (CHECK_FAIL(server_fd < 0))
> +		return err;
> +
> +	pthread_mutex_lock(&server_started_mtx);
> +	if (CHECK_FAIL(pthread_create(&tid, NULL, server_thread,
> +				      (void *)&server_fd)))
> +		goto close_server_fd;
> +
> +	pthread_cond_wait(&server_started, &server_started_mtx);
> +	pthread_mutex_unlock(&server_started_mtx);
> +
> +	client_fd = connect_to_fd(server_fd, 0);
> +	if (client_fd < 0)
> +		goto close_server_fd;
> +
> +	for (i = 0; i < 1000; i++)
> +		buf[i] = '+';
> +
> +	if (CHECK_FAIL(send(client_fd, buf, 1000, 0) < 1000))
> +		goto close_client_fd;
> +
> +	if (CHECK_FAIL(recv(client_fd, buf, 500, 0) < 500))
> +		goto close_client_fd;
> +
> +	pthread_join(tid, &server_err);
I think this can be further simplified without starting thread
and do everything in run_test() instead.

Something like this (uncompiled code):

	accept_fd = accept(server_fd, NULL, 0);
	send(client_fd, plus_buf, 1000, 0);
	recv(accept_fd, recv_buf, 1000, 0);
	send(accept_fd, dot_buf, 500, 0);
	recv(client_fd, recv_buf, 500, 0);

> +
> +	err = (int)(long)server_err;
> +	CHECK_FAIL(err);
> +
> +close_client_fd:
> +	close(client_fd);
> +close_server_fd:
> +	close(server_fd);
> +	return err;
> +}
> +
>  void test_tcpbpf_user(void)
>  {
>  	const char *file = "test_tcpbpf_kern.o";
> @@ -74,7 +173,6 @@ void test_tcpbpf_user(void)
>  	struct tcpbpf_globals g = {0};
>  	struct bpf_object *obj;
>  	int cg_fd = -1;
> -	int retry = 10;
>  	__u32 key = 0;
>  	int rv;
>  
> @@ -94,11 +192,6 @@ void test_tcpbpf_user(void)
>  		goto err;
>  	}
>  
> -	if (CHECK_FAIL(system("./tcp_server.py"))) {
> -		fprintf(stderr, "FAILED: TCP server\n");
> -		goto err;
> -	}
> -
>  	map_fd = bpf_find_map(__func__, obj, "global_map");
>  	if (CHECK_FAIL(map_fd < 0))
>  		goto err;
> @@ -107,21 +200,17 @@ void test_tcpbpf_user(void)
>  	if (CHECK_FAIL(sock_map_fd < 0))
>  		goto err;
>  
> -retry_lookup:
> +	if (run_test()) {
> +		fprintf(stderr, "FAILED: TCP server\n");
> +		goto err;
> +	}
> +
>  	rv = bpf_map_lookup_elem(map_fd, &key, &g);
>  	if (CHECK_FAIL(rv != 0)) {
CHECK() is a better one here if it needs to output error message.
The same goes for similar usages in this patch set.

For the start_server() above which has already logged the error message,
CHECK_FAIL() is good enough.

>  		fprintf(stderr, "FAILED: bpf_map_lookup_elem returns %d\n", rv);
>  		goto err;
>  	}
>  
> -	if (g.num_close_events != EXPECTED_CLOSE_EVENTS && retry--) {
It is good to have a solution to avoid a test depending on some number
of retries.

After looking at BPF_SOCK_OPS_STATE_CB in test_tcpbpf_kern.c,
it is not clear to me removing python alone is enough to avoid the
race (so the retry--).  One of the sk might still be in TCP_LAST_ACK
instead of TCP_CLOSE.

Also, when looking closer at BPF_SOCK_OPS_STATE_CB in test_tcpbpf_kern.c,
it seems the map value "gp" is slapped together across multiple
TCP_CLOSE events which may be not easy to understand.

How about it checks different states: TCP_CLOSE, TCP_LAST_ACK,
and BPF_TCP_FIN_WAIT2.  Each of this state will update its own
values under "gp".  Something like this (only compiler tested on
top of patch 4):

diff --git i/tools/testing/selftests/bpf/prog_tests/tcpbpf_user.c w/tools/testing/selftests/bpf/prog_tests/tcpbpf_user.c
index 7e92c37976ac..65b247b03dfc 100644
--- i/tools/testing/selftests/bpf/prog_tests/tcpbpf_user.c
+++ w/tools/testing/selftests/bpf/prog_tests/tcpbpf_user.c
@@ -90,15 +90,14 @@ static void verify_result(int map_fd, int sock_map_fd)
 	      result.event_map, expected_events);
 
 	ASSERT_EQ(result.bytes_received, 501, "bytes_received");
-	ASSERT_EQ(result.bytes_acked, 1002, "bytes_acked");
+	ASSERT_EQ(result.bytes_acked, 1001, "bytes_acked");
 	ASSERT_EQ(result.data_segs_in, 1, "data_segs_in");
 	ASSERT_EQ(result.data_segs_out, 1, "data_segs_out");
 	ASSERT_EQ(result.bad_cb_test_rv, 0x80, "bad_cb_test_rv");
 	ASSERT_EQ(result.good_cb_test_rv, 0, "good_cb_test_rv");
-	ASSERT_EQ(result.num_listen, 1, "num_listen");
-
-	/* 3 comes from one listening socket + both ends of the connection */
-	ASSERT_EQ(result.num_close_events, 3, "num_close_events");
+	ASSERT_EQ(result.num_listen_close, 1, "num_listen");
+	ASSERT_EQ(result.num_last_ack, 1, "num_last_ack");
+	ASSERT_EQ(result.num_fin_wait2, 1, "num_fin_wait2");
 
 	/* check setsockopt for SAVE_SYN */
 	key = 0;
diff --git i/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c w/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c
index 3e6912e4df3d..2c5ffb50d6e0 100644
--- i/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c
+++ w/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c
@@ -55,9 +55,11 @@ int bpf_testcb(struct bpf_sock_ops *skops)
 {
 	char header[sizeof(struct ipv6hdr) + sizeof(struct tcphdr)];
 	struct bpf_sock_ops *reuse = skops;
+	struct tcpbpf_globals *gp;
 	struct tcphdr *thdr;
 	int good_call_rv = 0;
 	int bad_call_rv = 0;
+	__u32 key_zero = 0;
 	int save_syn = 1;
 	int rv = -1;
 	int v = 0;
@@ -155,26 +157,21 @@ int bpf_testcb(struct bpf_sock_ops *skops)
 	case BPF_SOCK_OPS_RETRANS_CB:
 		break;
 	case BPF_SOCK_OPS_STATE_CB:
-		if (skops->args[1] == BPF_TCP_CLOSE) {
-			__u32 key = 0;
-			struct tcpbpf_globals g, *gp;
-
-			gp = bpf_map_lookup_elem(&global_map, &key);
-			if (!gp)
-				break;
-			g = *gp;
-			if (skops->args[0] == BPF_TCP_LISTEN) {
-				g.num_listen++;
-			} else {
-				g.total_retrans = skops->total_retrans;
-				g.data_segs_in = skops->data_segs_in;
-				g.data_segs_out = skops->data_segs_out;
-				g.bytes_received = skops->bytes_received;
-				g.bytes_acked = skops->bytes_acked;
-			}
-			g.num_close_events++;
-			bpf_map_update_elem(&global_map, &key, &g,
-					    BPF_ANY);
+		gp = bpf_map_lookup_elem(&global_map, &key_zero);
+		if (!gp)
+			break;
+		if (skops->args[1] == BPF_TCP_CLOSE &&
+		    skops->args[0] == BPF_TCP_LISTEN) {
+			gp->num_listen_close++;
+		} else if (skops->args[1] == BPF_TCP_LAST_ACK) {
+			gp->total_retrans = skops->total_retrans;
+			gp->data_segs_in = skops->data_segs_in;
+			gp->data_segs_out = skops->data_segs_out;
+			gp->bytes_received = skops->bytes_received;
+			gp->bytes_acked = skops->bytes_acked;
+			gp->num_last_ack++;
+		} else if (skops->args[1] == BPF_TCP_FIN_WAIT2) {
+			gp->num_fin_wait2++;
 		}
 		break;
 	case BPF_SOCK_OPS_TCP_LISTEN_CB:
diff --git i/tools/testing/selftests/bpf/test_tcpbpf.h w/tools/testing/selftests/bpf/test_tcpbpf.h
index 6220b95cbd02..0dec324ba4a6 100644
--- i/tools/testing/selftests/bpf/test_tcpbpf.h
+++ w/tools/testing/selftests/bpf/test_tcpbpf.h
@@ -12,7 +12,8 @@ struct tcpbpf_globals {
 	__u32 good_cb_test_rv;
 	__u64 bytes_received;
 	__u64 bytes_acked;
-	__u32 num_listen;
-	__u32 num_close_events;
+	__u32 num_listen_close;
+	__u32 num_last_ack;
+	__u32 num_fin_wait2;
 };
 #endif

I also noticed the bytes_received/acked depends on the order of close(),
i.e. always close the accepted fd first.  I think a comment
in the tcpbpf_user.c is good enough for now.

[ It does not have to be in this set and it can be done in another
  follow up effort.
  Instead of using a bpf map to store the result, using global
  variables in test_tcpbpf_kern.c will simplify the code further. ]
Alexander Duyck Oct. 29, 2020, 4:58 p.m. UTC | #2
On Wed, Oct 28, 2020 at 6:51 PM Martin KaFai Lau <kafai@fb.com> wrote:
>
> On Tue, Oct 27, 2020 at 06:47:13PM -0700, Alexander Duyck wrote:
> > From: Alexander Duyck <alexanderduyck@fb.com>
> >
> > Drop the tcp_client/server.py files in favor of using a client and server
> > thread within the test case. Specifically we spawn a new thread to play the
> > role of the server, and the main testing thread plays the role of client.
> >
> > Doing this we are able to reduce overhead since we don't have two python
> > workers possibly floating around. In addition we don't have to worry about
> > synchronization issues and as such the retry loop waiting for the threads
> > to close the sockets can be dropped as we will have already closed the
> > sockets in the local executable and synchronized the server thread.
> Thanks for working on this.
>
> >
> > Signed-off-by: Alexander Duyck <alexanderduyck@fb.com>
> > ---
> >  .../testing/selftests/bpf/prog_tests/tcpbpf_user.c |  125 +++++++++++++++++---
> >  tools/testing/selftests/bpf/tcp_client.py          |   50 --------
> >  tools/testing/selftests/bpf/tcp_server.py          |   80 -------------
> >  3 files changed, 107 insertions(+), 148 deletions(-)
> >  delete mode 100755 tools/testing/selftests/bpf/tcp_client.py
> >  delete mode 100755 tools/testing/selftests/bpf/tcp_server.py
> >
> > diff --git a/tools/testing/selftests/bpf/prog_tests/tcpbpf_user.c b/tools/testing/selftests/bpf/prog_tests/tcpbpf_user.c
> > index 5becab8b04e3..71ab82e37eb7 100644
> > --- a/tools/testing/selftests/bpf/prog_tests/tcpbpf_user.c
> > +++ b/tools/testing/selftests/bpf/prog_tests/tcpbpf_user.c
> > @@ -1,14 +1,65 @@
> >  // SPDX-License-Identifier: GPL-2.0
> >  #include <inttypes.h>
> >  #include <test_progs.h>
> > +#include <network_helpers.h>
> >
> >  #include "test_tcpbpf.h"
> >  #include "cgroup_helpers.h"
> >
> > +#define LO_ADDR6 "::1"
> >  #define CG_NAME "/tcpbpf-user-test"
> >
> > -/* 3 comes from one listening socket + both ends of the connection */
> > -#define EXPECTED_CLOSE_EVENTS                3
> > +static pthread_mutex_t server_started_mtx = PTHREAD_MUTEX_INITIALIZER;
> > +static pthread_cond_t server_started = PTHREAD_COND_INITIALIZER;
> > +
> > +static void *server_thread(void *arg)
> > +{
> > +     struct sockaddr_storage addr;
> > +     socklen_t len = sizeof(addr);
> > +     int fd = *(int *)arg;
> > +     char buf[1000];
> > +     int client_fd;
> > +     int err = 0;
> > +     int i;
> > +
> > +     err = listen(fd, 1);
> This is not needed.  start_server() has done it.

Okay, I will drop it.

> > +
> > +     pthread_mutex_lock(&server_started_mtx);
> > +     pthread_cond_signal(&server_started);
> > +     pthread_mutex_unlock(&server_started_mtx);
> > +
> > +     if (err < 0) {
> > +             perror("Failed to listen on socket");
> > +             err = errno;
> > +             goto err;
> > +     }
> > +
> > +     client_fd = accept(fd, (struct sockaddr *)&addr, &len);
> > +     if (client_fd < 0) {
> > +             perror("Failed to accept client");
> > +             err = errno;
> > +             goto err;
> > +     }
> > +
> > +     if (recv(client_fd, buf, 1000, 0) < 1000) {
> > +             perror("failed/partial recv");
> > +             err = errno;
> > +             goto out_clean;
> > +     }
> > +
> > +     for (i = 0; i < 500; i++)
> > +             buf[i] = '.';
> > +
> > +     if (send(client_fd, buf, 500, 0) < 500) {
> > +             perror("failed/partial send");
> > +             err = errno;
> > +             goto out_clean;
> > +     }
> > +out_clean:
> > +     close(client_fd);
> > +err:
> > +     return (void *)(long)err;
> > +}
> >
> >  #define EXPECT_EQ(expected, actual, fmt)                     \
> >       do {                                                    \
> > @@ -43,7 +94,9 @@ int verify_result(const struct tcpbpf_globals *result)
> >       EXPECT_EQ(0x80, result->bad_cb_test_rv, PRIu32);
> >       EXPECT_EQ(0, result->good_cb_test_rv, PRIu32);
> >       EXPECT_EQ(1, result->num_listen, PRIu32);
> > -     EXPECT_EQ(EXPECTED_CLOSE_EVENTS, result->num_close_events, PRIu32);
> > +
> > +     /* 3 comes from one listening socket + both ends of the connection */
> > +     EXPECT_EQ(3, result->num_close_events, PRIu32);
> >
> >       return ret;
> >  }
> > @@ -67,6 +120,52 @@ int verify_sockopt_result(int sock_map_fd)
> >       return ret;
> >  }
> >
> > +static int run_test(void)
> > +{
> > +     int server_fd, client_fd;
> > +     void *server_err;
> > +     char buf[1000];
> > +     pthread_t tid;
> > +     int err = -1;
> > +     int i;
> > +
> > +     server_fd = start_server(AF_INET6, SOCK_STREAM, LO_ADDR6, 0, 0);
> > +     if (CHECK_FAIL(server_fd < 0))
> > +             return err;
> > +
> > +     pthread_mutex_lock(&server_started_mtx);
> > +     if (CHECK_FAIL(pthread_create(&tid, NULL, server_thread,
> > +                                   (void *)&server_fd)))
> > +             goto close_server_fd;
> > +
> > +     pthread_cond_wait(&server_started, &server_started_mtx);
> > +     pthread_mutex_unlock(&server_started_mtx);
> > +
> > +     client_fd = connect_to_fd(server_fd, 0);
> > +     if (client_fd < 0)
> > +             goto close_server_fd;
> > +
> > +     for (i = 0; i < 1000; i++)
> > +             buf[i] = '+';
> > +
> > +     if (CHECK_FAIL(send(client_fd, buf, 1000, 0) < 1000))
> > +             goto close_client_fd;
> > +
> > +     if (CHECK_FAIL(recv(client_fd, buf, 500, 0) < 500))
> > +             goto close_client_fd;
> > +
> > +     pthread_join(tid, &server_err);
> I think this can be further simplified without starting thread
> and do everything in run_test() instead.
>
> Something like this (uncompiled code):
>
>         accept_fd = accept(server_fd, NULL, 0);
>         send(client_fd, plus_buf, 1000, 0);
>         recv(accept_fd, recv_buf, 1000, 0);
>         send(accept_fd, dot_buf, 500, 0);
>         recv(client_fd, recv_buf, 500, 0);

I can take a look at switching it over.

> > +
> > +     err = (int)(long)server_err;
> > +     CHECK_FAIL(err);
> > +
> > +close_client_fd:
> > +     close(client_fd);
> > +close_server_fd:
> > +     close(server_fd);
> > +     return err;
> > +}
> > +
> >  void test_tcpbpf_user(void)
> >  {
> >       const char *file = "test_tcpbpf_kern.o";
> > @@ -74,7 +173,6 @@ void test_tcpbpf_user(void)
> >       struct tcpbpf_globals g = {0};
> >       struct bpf_object *obj;
> >       int cg_fd = -1;
> > -     int retry = 10;
> >       __u32 key = 0;
> >       int rv;
> >
> > @@ -94,11 +192,6 @@ void test_tcpbpf_user(void)
> >               goto err;
> >       }
> >
> > -     if (CHECK_FAIL(system("./tcp_server.py"))) {
> > -             fprintf(stderr, "FAILED: TCP server\n");
> > -             goto err;
> > -     }
> > -
> >       map_fd = bpf_find_map(__func__, obj, "global_map");
> >       if (CHECK_FAIL(map_fd < 0))
> >               goto err;
> > @@ -107,21 +200,17 @@ void test_tcpbpf_user(void)
> >       if (CHECK_FAIL(sock_map_fd < 0))
> >               goto err;
> >
> > -retry_lookup:
> > +     if (run_test()) {
> > +             fprintf(stderr, "FAILED: TCP server\n");
> > +             goto err;
> > +     }
> > +
> >       rv = bpf_map_lookup_elem(map_fd, &key, &g);
> >       if (CHECK_FAIL(rv != 0)) {
> CHECK() is a better one here if it needs to output error message.
> The same goes for similar usages in this patch set.
>
> For the start_server() above which has already logged the error message,
> CHECK_FAIL() is good enough.
>
> >               fprintf(stderr, "FAILED: bpf_map_lookup_elem returns %d\n", rv);
> >               goto err;
> >       }
> >
> > -     if (g.num_close_events != EXPECTED_CLOSE_EVENTS && retry--) {
> It is good to have a solution to avoid a test depending on some number
> of retries.
>
> After looking at BPF_SOCK_OPS_STATE_CB in test_tcpbpf_kern.c,
> it is not clear to me removing python alone is enough to avoid the
> race (so the retry--).  One of the sk might still be in TCP_LAST_ACK
> instead of TCP_CLOSE.
>

After you pointed this out I decided to go back through and do some
further testing. After testing this for several thousand iterations it
does look like the issue can still happen, it was just significantly
less frequent with the threaded approach, but it was still there. So I
will go back through and add this back and then fold it into the
verify_results function in the third patch. Although I might reduce
the wait times as it seems like with the inline approach we only need
in the 10s of microseconds instead of 100s for the sockets to close
out.

> Also, when looking closer at BPF_SOCK_OPS_STATE_CB in test_tcpbpf_kern.c,
> it seems the map value "gp" is slapped together across multiple
> TCP_CLOSE events which may be not easy to understand.
>
> How about it checks different states: TCP_CLOSE, TCP_LAST_ACK,
> and BPF_TCP_FIN_WAIT2.  Each of this state will update its own
> values under "gp".  Something like this (only compiler tested on
> top of patch 4):
>
> diff --git i/tools/testing/selftests/bpf/prog_tests/tcpbpf_user.c w/tools/testing/selftests/bpf/prog_tests/tcpbpf_user.c
> index 7e92c37976ac..65b247b03dfc 100644
> --- i/tools/testing/selftests/bpf/prog_tests/tcpbpf_user.c
> +++ w/tools/testing/selftests/bpf/prog_tests/tcpbpf_user.c
> @@ -90,15 +90,14 @@ static void verify_result(int map_fd, int sock_map_fd)
>               result.event_map, expected_events);
>
>         ASSERT_EQ(result.bytes_received, 501, "bytes_received");
> -       ASSERT_EQ(result.bytes_acked, 1002, "bytes_acked");
> +       ASSERT_EQ(result.bytes_acked, 1001, "bytes_acked");
>         ASSERT_EQ(result.data_segs_in, 1, "data_segs_in");
>         ASSERT_EQ(result.data_segs_out, 1, "data_segs_out");
>         ASSERT_EQ(result.bad_cb_test_rv, 0x80, "bad_cb_test_rv");
>         ASSERT_EQ(result.good_cb_test_rv, 0, "good_cb_test_rv");
> -       ASSERT_EQ(result.num_listen, 1, "num_listen");
> -
> -       /* 3 comes from one listening socket + both ends of the connection */
> -       ASSERT_EQ(result.num_close_events, 3, "num_close_events");
> +       ASSERT_EQ(result.num_listen_close, 1, "num_listen");
> +       ASSERT_EQ(result.num_last_ack, 1, "num_last_ack");
> +       ASSERT_EQ(result.num_fin_wait2, 1, "num_fin_wait2");
>
>         /* check setsockopt for SAVE_SYN */
>         key = 0;
> diff --git i/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c w/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c
> index 3e6912e4df3d..2c5ffb50d6e0 100644
> --- i/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c
> +++ w/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c
> @@ -55,9 +55,11 @@ int bpf_testcb(struct bpf_sock_ops *skops)
>  {
>         char header[sizeof(struct ipv6hdr) + sizeof(struct tcphdr)];
>         struct bpf_sock_ops *reuse = skops;
> +       struct tcpbpf_globals *gp;
>         struct tcphdr *thdr;
>         int good_call_rv = 0;
>         int bad_call_rv = 0;
> +       __u32 key_zero = 0;
>         int save_syn = 1;
>         int rv = -1;
>         int v = 0;
> @@ -155,26 +157,21 @@ int bpf_testcb(struct bpf_sock_ops *skops)
>         case BPF_SOCK_OPS_RETRANS_CB:
>                 break;
>         case BPF_SOCK_OPS_STATE_CB:
> -               if (skops->args[1] == BPF_TCP_CLOSE) {
> -                       __u32 key = 0;
> -                       struct tcpbpf_globals g, *gp;
> -
> -                       gp = bpf_map_lookup_elem(&global_map, &key);
> -                       if (!gp)
> -                               break;
> -                       g = *gp;
> -                       if (skops->args[0] == BPF_TCP_LISTEN) {
> -                               g.num_listen++;
> -                       } else {
> -                               g.total_retrans = skops->total_retrans;
> -                               g.data_segs_in = skops->data_segs_in;
> -                               g.data_segs_out = skops->data_segs_out;
> -                               g.bytes_received = skops->bytes_received;
> -                               g.bytes_acked = skops->bytes_acked;
> -                       }
> -                       g.num_close_events++;
> -                       bpf_map_update_elem(&global_map, &key, &g,
> -                                           BPF_ANY);
> +               gp = bpf_map_lookup_elem(&global_map, &key_zero);
> +               if (!gp)
> +                       break;
> +               if (skops->args[1] == BPF_TCP_CLOSE &&
> +                   skops->args[0] == BPF_TCP_LISTEN) {
> +                       gp->num_listen_close++;
> +               } else if (skops->args[1] == BPF_TCP_LAST_ACK) {
> +                       gp->total_retrans = skops->total_retrans;
> +                       gp->data_segs_in = skops->data_segs_in;
> +                       gp->data_segs_out = skops->data_segs_out;
> +                       gp->bytes_received = skops->bytes_received;
> +                       gp->bytes_acked = skops->bytes_acked;
> +                       gp->num_last_ack++;
> +               } else if (skops->args[1] == BPF_TCP_FIN_WAIT2) {
> +                       gp->num_fin_wait2++;
>                 }
>                 break;
>         case BPF_SOCK_OPS_TCP_LISTEN_CB:
> diff --git i/tools/testing/selftests/bpf/test_tcpbpf.h w/tools/testing/selftests/bpf/test_tcpbpf.h
> index 6220b95cbd02..0dec324ba4a6 100644
> --- i/tools/testing/selftests/bpf/test_tcpbpf.h
> +++ w/tools/testing/selftests/bpf/test_tcpbpf.h
> @@ -12,7 +12,8 @@ struct tcpbpf_globals {
>         __u32 good_cb_test_rv;
>         __u64 bytes_received;
>         __u64 bytes_acked;
> -       __u32 num_listen;
> -       __u32 num_close_events;
> +       __u32 num_listen_close;
> +       __u32 num_last_ack;
> +       __u32 num_fin_wait2;
>  };
>  #endif

I can look at pulling this in and including it as a patch 5 if you
would prefer. If I find any issues I will let you know.

> I also noticed the bytes_received/acked depends on the order of close(),
> i.e. always close the accepted fd first.  I think a comment
> in the tcpbpf_user.c is good enough for now.

Okay, I can add a comment explaining this.

> [ It does not have to be in this set and it can be done in another
>   follow up effort.
>   Instead of using a bpf map to store the result, using global
>   variables in test_tcpbpf_kern.c will simplify the code further. ]

I assume this comment is about the changes to test_tcpbpf_kern.c? Just
want to clarify as I assume this isn't about adding the comment about
the socket closing order affecting the bytes_received/acked.
Martin KaFai Lau Oct. 29, 2020, 6:12 p.m. UTC | #3
On Thu, Oct 29, 2020 at 09:58:15AM -0700, Alexander Duyck wrote:
[ ... ]

> > > @@ -43,7 +94,9 @@ int verify_result(const struct tcpbpf_globals *result)
> > >       EXPECT_EQ(0x80, result->bad_cb_test_rv, PRIu32);
> > >       EXPECT_EQ(0, result->good_cb_test_rv, PRIu32);
> > >       EXPECT_EQ(1, result->num_listen, PRIu32);
> > > -     EXPECT_EQ(EXPECTED_CLOSE_EVENTS, result->num_close_events, PRIu32);
> > > +
> > > +     /* 3 comes from one listening socket + both ends of the connection */
> > > +     EXPECT_EQ(3, result->num_close_events, PRIu32);
> > >
> > >       return ret;
> > >  }
> > > @@ -67,6 +120,52 @@ int verify_sockopt_result(int sock_map_fd)
> > >       return ret;
> > >  }
> > >
> > > +static int run_test(void)
> > > +{
> > > +     int server_fd, client_fd;
> > > +     void *server_err;
> > > +     char buf[1000];
> > > +     pthread_t tid;
> > > +     int err = -1;
> > > +     int i;
> > > +
> > > +     server_fd = start_server(AF_INET6, SOCK_STREAM, LO_ADDR6, 0, 0);
> > > +     if (CHECK_FAIL(server_fd < 0))
> > > +             return err;
> > > +
> > > +     pthread_mutex_lock(&server_started_mtx);
> > > +     if (CHECK_FAIL(pthread_create(&tid, NULL, server_thread,
> > > +                                   (void *)&server_fd)))
> > > +             goto close_server_fd;
> > > +
> > > +     pthread_cond_wait(&server_started, &server_started_mtx);
> > > +     pthread_mutex_unlock(&server_started_mtx);
> > > +
> > > +     client_fd = connect_to_fd(server_fd, 0);
> > > +     if (client_fd < 0)
> > > +             goto close_server_fd;
> > > +
> > > +     for (i = 0; i < 1000; i++)
> > > +             buf[i] = '+';
> > > +
> > > +     if (CHECK_FAIL(send(client_fd, buf, 1000, 0) < 1000))
> > > +             goto close_client_fd;
> > > +
> > > +     if (CHECK_FAIL(recv(client_fd, buf, 500, 0) < 500))
> > > +             goto close_client_fd;
> > > +
> > > +     pthread_join(tid, &server_err);
> > I think this can be further simplified without starting thread
> > and do everything in run_test() instead.
> >
> > Something like this (uncompiled code):
> >
> >         accept_fd = accept(server_fd, NULL, 0);
> >         send(client_fd, plus_buf, 1000, 0);
> >         recv(accept_fd, recv_buf, 1000, 0);
> >         send(accept_fd, dot_buf, 500, 0);
> >         recv(client_fd, recv_buf, 500, 0);
> 
> I can take a look at switching it over.
> 
> > > +
> > > +     err = (int)(long)server_err;
> > > +     CHECK_FAIL(err);
> > > +
> > > +close_client_fd:
> > > +     close(client_fd);
> > > +close_server_fd:
> > > +     close(server_fd);
> > > +     return err;
> > > +}
> > > +
> > >  void test_tcpbpf_user(void)
> > >  {
> > >       const char *file = "test_tcpbpf_kern.o";
> > > @@ -74,7 +173,6 @@ void test_tcpbpf_user(void)
> > >       struct tcpbpf_globals g = {0};
> > >       struct bpf_object *obj;
> > >       int cg_fd = -1;
> > > -     int retry = 10;
> > >       __u32 key = 0;
> > >       int rv;
> > >
> > > @@ -94,11 +192,6 @@ void test_tcpbpf_user(void)
> > >               goto err;
> > >       }
> > >
> > > -     if (CHECK_FAIL(system("./tcp_server.py"))) {
> > > -             fprintf(stderr, "FAILED: TCP server\n");
> > > -             goto err;
> > > -     }
> > > -
> > >       map_fd = bpf_find_map(__func__, obj, "global_map");
> > >       if (CHECK_FAIL(map_fd < 0))
> > >               goto err;
> > > @@ -107,21 +200,17 @@ void test_tcpbpf_user(void)
> > >       if (CHECK_FAIL(sock_map_fd < 0))
> > >               goto err;
> > >
> > > -retry_lookup:
> > > +     if (run_test()) {
> > > +             fprintf(stderr, "FAILED: TCP server\n");
> > > +             goto err;
> > > +     }
> > > +
> > >       rv = bpf_map_lookup_elem(map_fd, &key, &g);
> > >       if (CHECK_FAIL(rv != 0)) {
> > CHECK() is a better one here if it needs to output error message.
> > The same goes for similar usages in this patch set.
> >
> > For the start_server() above which has already logged the error message,
> > CHECK_FAIL() is good enough.
> >
> > >               fprintf(stderr, "FAILED: bpf_map_lookup_elem returns %d\n", rv);
> > >               goto err;
> > >       }
> > >
> > > -     if (g.num_close_events != EXPECTED_CLOSE_EVENTS && retry--) {
> > It is good to have a solution to avoid a test depending on some number
> > of retries.
> >
> > After looking at BPF_SOCK_OPS_STATE_CB in test_tcpbpf_kern.c,
> > it is not clear to me removing python alone is enough to avoid the
> > race (so the retry--).  One of the sk might still be in TCP_LAST_ACK
> > instead of TCP_CLOSE.
> >
> 
> After you pointed this out I decided to go back through and do some
> further testing. After testing this for several thousand iterations it
> does look like the issue can still happen, it was just significantly
> less frequent with the threaded approach, but it was still there. So I
> will go back through and add this back and then fold it into the
> verify_results function in the third patch. Although I might reduce
> the wait times as it seems like with the inline approach we only need
> in the 10s of microseconds instead of 100s for the sockets to close
> out.
I think this retry-and-wait can be avoided.  More on this...

> 
> > Also, when looking closer at BPF_SOCK_OPS_STATE_CB in test_tcpbpf_kern.c,
> > it seems the map value "gp" is slapped together across multiple
> > TCP_CLOSE events which may be not easy to understand.
> >
> > How about it checks different states: TCP_CLOSE, TCP_LAST_ACK,
> > and BPF_TCP_FIN_WAIT2.  Each of this state will update its own
> > values under "gp".  Something like this (only compiler tested on
> > top of patch 4):
> >
> > diff --git i/tools/testing/selftests/bpf/prog_tests/tcpbpf_user.c w/tools/testing/selftests/bpf/prog_tests/tcpbpf_user.c
> > index 7e92c37976ac..65b247b03dfc 100644
> > --- i/tools/testing/selftests/bpf/prog_tests/tcpbpf_user.c
> > +++ w/tools/testing/selftests/bpf/prog_tests/tcpbpf_user.c
> > @@ -90,15 +90,14 @@ static void verify_result(int map_fd, int sock_map_fd)
> >               result.event_map, expected_events);
> >
> >         ASSERT_EQ(result.bytes_received, 501, "bytes_received");
> > -       ASSERT_EQ(result.bytes_acked, 1002, "bytes_acked");
> > +       ASSERT_EQ(result.bytes_acked, 1001, "bytes_acked");
> >         ASSERT_EQ(result.data_segs_in, 1, "data_segs_in");
> >         ASSERT_EQ(result.data_segs_out, 1, "data_segs_out");
> >         ASSERT_EQ(result.bad_cb_test_rv, 0x80, "bad_cb_test_rv");
> >         ASSERT_EQ(result.good_cb_test_rv, 0, "good_cb_test_rv");
> > -       ASSERT_EQ(result.num_listen, 1, "num_listen");
> > -
> > -       /* 3 comes from one listening socket + both ends of the connection */
> > -       ASSERT_EQ(result.num_close_events, 3, "num_close_events");
> > +       ASSERT_EQ(result.num_listen_close, 1, "num_listen");
> > +       ASSERT_EQ(result.num_last_ack, 1, "num_last_ack");
> > +       ASSERT_EQ(result.num_fin_wait2, 1, "num_fin_wait2");
> >
> >         /* check setsockopt for SAVE_SYN */
> >         key = 0;
> > diff --git i/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c w/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c
> > index 3e6912e4df3d..2c5ffb50d6e0 100644
> > --- i/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c
> > +++ w/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c
> > @@ -55,9 +55,11 @@ int bpf_testcb(struct bpf_sock_ops *skops)
> >  {
> >         char header[sizeof(struct ipv6hdr) + sizeof(struct tcphdr)];
> >         struct bpf_sock_ops *reuse = skops;
> > +       struct tcpbpf_globals *gp;
> >         struct tcphdr *thdr;
> >         int good_call_rv = 0;
> >         int bad_call_rv = 0;
> > +       __u32 key_zero = 0;
> >         int save_syn = 1;
> >         int rv = -1;
> >         int v = 0;
> > @@ -155,26 +157,21 @@ int bpf_testcb(struct bpf_sock_ops *skops)
> >         case BPF_SOCK_OPS_RETRANS_CB:
> >                 break;
> >         case BPF_SOCK_OPS_STATE_CB:
> > -               if (skops->args[1] == BPF_TCP_CLOSE) {
> > -                       __u32 key = 0;
> > -                       struct tcpbpf_globals g, *gp;
> > -
> > -                       gp = bpf_map_lookup_elem(&global_map, &key);
> > -                       if (!gp)
> > -                               break;
> > -                       g = *gp;
> > -                       if (skops->args[0] == BPF_TCP_LISTEN) {
> > -                               g.num_listen++;
> > -                       } else {
> > -                               g.total_retrans = skops->total_retrans;
> > -                               g.data_segs_in = skops->data_segs_in;
> > -                               g.data_segs_out = skops->data_segs_out;
> > -                               g.bytes_received = skops->bytes_received;
> > -                               g.bytes_acked = skops->bytes_acked;
> > -                       }
> > -                       g.num_close_events++;
> > -                       bpf_map_update_elem(&global_map, &key, &g,
> > -                                           BPF_ANY);
> > +               gp = bpf_map_lookup_elem(&global_map, &key_zero);
> > +               if (!gp)
> > +                       break;
> > +               if (skops->args[1] == BPF_TCP_CLOSE &&
> > +                   skops->args[0] == BPF_TCP_LISTEN) {
> > +                       gp->num_listen_close++;
> > +               } else if (skops->args[1] == BPF_TCP_LAST_ACK) {
> > +                       gp->total_retrans = skops->total_retrans;
> > +                       gp->data_segs_in = skops->data_segs_in;
> > +                       gp->data_segs_out = skops->data_segs_out;
> > +                       gp->bytes_received = skops->bytes_received;
> > +                       gp->bytes_acked = skops->bytes_acked;
> > +                       gp->num_last_ack++;
> > +               } else if (skops->args[1] == BPF_TCP_FIN_WAIT2) {
> > +                       gp->num_fin_wait2++;
I meant with the above change in "case BPF_SOCK_OPS_STATE_CB".
The retry-and-wait in tcpbpf_user.c can be avoided.

What may still be needed in tcpbpf_user.c is to use shutdown and
read-zero to ensure the sk has gone through those states before
calling verify_result().  Something like this [ uncompiled code again :) ]:

        /* Always send FIN from accept_fd first to
         * ensure it will go through FIN_WAIT_2.
         */
        shutdown(accept_fd, SHUT_WR);
        /* Ensure client_fd gets the FIN */
        err = read(client_fd, buf, sizeof(buf));
        if (CHECK(err != 0, "read-after-shutdown(client_fd):",
                  "err:%d errno:%d\n", err, errno))
                goto close_accept_fd;

        /* FIN sends from client_fd and it must be in LAST_ACK now */
        shutdown(client_fd, SHUT_WR);
        /* Ensure accept_fd gets the FIN-ACK.
         * accept_fd must have passed the FIN_WAIT2.
         */
        err = read(accept_fd, buf, sizeof(buf));
        if (CHECK(err != 0, "read-after-shutdown(accept_fd):",
                  "err:%d errno:%d\n", err, errno))
                goto close_accept_fd;

	close(server_fd);
	close(accept_fd);
	close(client_fd);

	/* All sk has gone through the states being tested.
	 * check the results now.
	 */
	verify_result(map_fd, sock_map_fd);

> >                 }
> >                 break;
> >         case BPF_SOCK_OPS_TCP_LISTEN_CB:
> > diff --git i/tools/testing/selftests/bpf/test_tcpbpf.h w/tools/testing/selftests/bpf/test_tcpbpf.h
> > index 6220b95cbd02..0dec324ba4a6 100644
> > --- i/tools/testing/selftests/bpf/test_tcpbpf.h
> > +++ w/tools/testing/selftests/bpf/test_tcpbpf.h
> > @@ -12,7 +12,8 @@ struct tcpbpf_globals {
> >         __u32 good_cb_test_rv;
> >         __u64 bytes_received;
> >         __u64 bytes_acked;
> > -       __u32 num_listen;
> > -       __u32 num_close_events;
> > +       __u32 num_listen_close;
> > +       __u32 num_last_ack;
> > +       __u32 num_fin_wait2;
> >  };
> >  #endif
> 
> I can look at pulling this in and including it as a patch 5 if you
> would prefer. If I find any issues I will let you know.
> 
> > I also noticed the bytes_received/acked depends on the order of close(),
> > i.e. always close the accepted fd first.  I think a comment
> > in the tcpbpf_user.c is good enough for now.
> 
> Okay, I can add a comment explaining this.
> 
> > [ It does not have to be in this set and it can be done in another
> >   follow up effort.
> >   Instead of using a bpf map to store the result, using global
> >   variables in test_tcpbpf_kern.c will simplify the code further. ]
> 
> I assume this comment is about the changes to test_tcpbpf_kern.c? Just
> want to clarify as I assume this isn't about adding the comment about
> the socket closing order affecting the bytes_received/acked.
Right, it is unrelated to the "adding the comment about socket closing order".
It is about changing test_tcpbpf_kern.c and tcpbpf_user.c to
use global variables instead of bpf map to store results.
Again, it can be done later.  This can be used as an example:
b18c1f0aa477 ("bpf: selftest: Adapt sock_fields test to use skel and global variables")
Alexander Duyck Oct. 29, 2020, 7:51 p.m. UTC | #4
On Thu, Oct 29, 2020 at 11:13 AM Martin KaFai Lau <kafai@fb.com> wrote:
>
> On Thu, Oct 29, 2020 at 09:58:15AM -0700, Alexander Duyck wrote:
> [ ... ]
>

[...]

> >
> > > Also, when looking closer at BPF_SOCK_OPS_STATE_CB in test_tcpbpf_kern.c,
> > > it seems the map value "gp" is slapped together across multiple
> > > TCP_CLOSE events which may be not easy to understand.
> > >
> > > How about it checks different states: TCP_CLOSE, TCP_LAST_ACK,
> > > and BPF_TCP_FIN_WAIT2.  Each of this state will update its own
> > > values under "gp".  Something like this (only compiler tested on
> > > top of patch 4):
> > >
> > > diff --git i/tools/testing/selftests/bpf/prog_tests/tcpbpf_user.c w/tools/testing/selftests/bpf/prog_tests/tcpbpf_user.c
> > > index 7e92c37976ac..65b247b03dfc 100644
> > > --- i/tools/testing/selftests/bpf/prog_tests/tcpbpf_user.c
> > > +++ w/tools/testing/selftests/bpf/prog_tests/tcpbpf_user.c
> > > @@ -90,15 +90,14 @@ static void verify_result(int map_fd, int sock_map_fd)
> > >               result.event_map, expected_events);
> > >
> > >         ASSERT_EQ(result.bytes_received, 501, "bytes_received");
> > > -       ASSERT_EQ(result.bytes_acked, 1002, "bytes_acked");
> > > +       ASSERT_EQ(result.bytes_acked, 1001, "bytes_acked");
> > >         ASSERT_EQ(result.data_segs_in, 1, "data_segs_in");
> > >         ASSERT_EQ(result.data_segs_out, 1, "data_segs_out");
> > >         ASSERT_EQ(result.bad_cb_test_rv, 0x80, "bad_cb_test_rv");
> > >         ASSERT_EQ(result.good_cb_test_rv, 0, "good_cb_test_rv");
> > > -       ASSERT_EQ(result.num_listen, 1, "num_listen");
> > > -
> > > -       /* 3 comes from one listening socket + both ends of the connection */
> > > -       ASSERT_EQ(result.num_close_events, 3, "num_close_events");
> > > +       ASSERT_EQ(result.num_listen_close, 1, "num_listen");
> > > +       ASSERT_EQ(result.num_last_ack, 1, "num_last_ack");
> > > +       ASSERT_EQ(result.num_fin_wait2, 1, "num_fin_wait2");
> > >
> > >         /* check setsockopt for SAVE_SYN */
> > >         key = 0;
> > > diff --git i/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c w/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c
> > > index 3e6912e4df3d..2c5ffb50d6e0 100644
> > > --- i/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c
> > > +++ w/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c
> > > @@ -55,9 +55,11 @@ int bpf_testcb(struct bpf_sock_ops *skops)
> > >  {
> > >         char header[sizeof(struct ipv6hdr) + sizeof(struct tcphdr)];
> > >         struct bpf_sock_ops *reuse = skops;
> > > +       struct tcpbpf_globals *gp;
> > >         struct tcphdr *thdr;
> > >         int good_call_rv = 0;
> > >         int bad_call_rv = 0;
> > > +       __u32 key_zero = 0;
> > >         int save_syn = 1;
> > >         int rv = -1;
> > >         int v = 0;
> > > @@ -155,26 +157,21 @@ int bpf_testcb(struct bpf_sock_ops *skops)
> > >         case BPF_SOCK_OPS_RETRANS_CB:
> > >                 break;
> > >         case BPF_SOCK_OPS_STATE_CB:
> > > -               if (skops->args[1] == BPF_TCP_CLOSE) {
> > > -                       __u32 key = 0;
> > > -                       struct tcpbpf_globals g, *gp;
> > > -
> > > -                       gp = bpf_map_lookup_elem(&global_map, &key);
> > > -                       if (!gp)
> > > -                               break;
> > > -                       g = *gp;
> > > -                       if (skops->args[0] == BPF_TCP_LISTEN) {
> > > -                               g.num_listen++;
> > > -                       } else {
> > > -                               g.total_retrans = skops->total_retrans;
> > > -                               g.data_segs_in = skops->data_segs_in;
> > > -                               g.data_segs_out = skops->data_segs_out;
> > > -                               g.bytes_received = skops->bytes_received;
> > > -                               g.bytes_acked = skops->bytes_acked;
> > > -                       }
> > > -                       g.num_close_events++;
> > > -                       bpf_map_update_elem(&global_map, &key, &g,
> > > -                                           BPF_ANY);
> > > +               gp = bpf_map_lookup_elem(&global_map, &key_zero);
> > > +               if (!gp)
> > > +                       break;
> > > +               if (skops->args[1] == BPF_TCP_CLOSE &&
> > > +                   skops->args[0] == BPF_TCP_LISTEN) {
> > > +                       gp->num_listen_close++;
> > > +               } else if (skops->args[1] == BPF_TCP_LAST_ACK) {
> > > +                       gp->total_retrans = skops->total_retrans;
> > > +                       gp->data_segs_in = skops->data_segs_in;
> > > +                       gp->data_segs_out = skops->data_segs_out;
> > > +                       gp->bytes_received = skops->bytes_received;
> > > +                       gp->bytes_acked = skops->bytes_acked;
> > > +                       gp->num_last_ack++;
> > > +               } else if (skops->args[1] == BPF_TCP_FIN_WAIT2) {
> > > +                       gp->num_fin_wait2++;
> I meant with the above change in "case BPF_SOCK_OPS_STATE_CB".
> The retry-and-wait in tcpbpf_user.c can be avoided.
>
> What may still be needed in tcpbpf_user.c is to use shutdown and
> read-zero to ensure the sk has gone through those states before
> calling verify_result().  Something like this [ uncompiled code again :) ]:
>
>         /* Always send FIN from accept_fd first to
>          * ensure it will go through FIN_WAIT_2.
>          */
>         shutdown(accept_fd, SHUT_WR);
>         /* Ensure client_fd gets the FIN */
>         err = read(client_fd, buf, sizeof(buf));
>         if (CHECK(err != 0, "read-after-shutdown(client_fd):",
>                   "err:%d errno:%d\n", err, errno))
>                 goto close_accept_fd;
>
>         /* FIN sends from client_fd and it must be in LAST_ACK now */
>         shutdown(client_fd, SHUT_WR);
>         /* Ensure accept_fd gets the FIN-ACK.
>          * accept_fd must have passed the FIN_WAIT2.
>          */
>         err = read(accept_fd, buf, sizeof(buf));
>         if (CHECK(err != 0, "read-after-shutdown(accept_fd):",
>                   "err:%d errno:%d\n", err, errno))
>                 goto close_accept_fd;
>
>         close(server_fd);
>         close(accept_fd);
>         close(client_fd);
>
>         /* All sk has gone through the states being tested.
>          * check the results now.
>          */
>         verify_result(map_fd, sock_map_fd);

Okay. I think I see how that works then. Basically shutdown the write
on one end and read on the other expecting to hold until it forces us
out with a read of length 0 on the other end. Although I might just
use recv since that was the call being used to pull data from the
socket rather than read. I just need to make sure I perform it
starting with the shutdown on the accept end first so that it will
close first to avoid causing the received/acked to be swapped.

Thanks.

- Alex
diff mbox series

Patch

diff --git a/tools/testing/selftests/bpf/prog_tests/tcpbpf_user.c b/tools/testing/selftests/bpf/prog_tests/tcpbpf_user.c
index 5becab8b04e3..71ab82e37eb7 100644
--- a/tools/testing/selftests/bpf/prog_tests/tcpbpf_user.c
+++ b/tools/testing/selftests/bpf/prog_tests/tcpbpf_user.c
@@ -1,14 +1,65 @@ 
 // SPDX-License-Identifier: GPL-2.0
 #include <inttypes.h>
 #include <test_progs.h>
+#include <network_helpers.h>
 
 #include "test_tcpbpf.h"
 #include "cgroup_helpers.h"
 
+#define LO_ADDR6 "::1"
 #define CG_NAME "/tcpbpf-user-test"
 
-/* 3 comes from one listening socket + both ends of the connection */
-#define EXPECTED_CLOSE_EVENTS		3
+static pthread_mutex_t server_started_mtx = PTHREAD_MUTEX_INITIALIZER;
+static pthread_cond_t server_started = PTHREAD_COND_INITIALIZER;
+
+static void *server_thread(void *arg)
+{
+	struct sockaddr_storage addr;
+	socklen_t len = sizeof(addr);
+	int fd = *(int *)arg;
+	char buf[1000];
+	int client_fd;
+	int err = 0;
+	int i;
+
+	err = listen(fd, 1);
+
+	pthread_mutex_lock(&server_started_mtx);
+	pthread_cond_signal(&server_started);
+	pthread_mutex_unlock(&server_started_mtx);
+
+	if (err < 0) {
+		perror("Failed to listen on socket");
+		err = errno;
+		goto err;
+	}
+
+	client_fd = accept(fd, (struct sockaddr *)&addr, &len);
+	if (client_fd < 0) {
+		perror("Failed to accept client");
+		err = errno;
+		goto err;
+	}
+
+	if (recv(client_fd, buf, 1000, 0) < 1000) {
+		perror("failed/partial recv");
+		err = errno;
+		goto out_clean;
+	}
+
+	for (i = 0; i < 500; i++)
+		buf[i] = '.';
+
+	if (send(client_fd, buf, 500, 0) < 500) {
+		perror("failed/partial send");
+		err = errno;
+		goto out_clean;
+	}
+out_clean:
+	close(client_fd);
+err:
+	return (void *)(long)err;
+}
 
 #define EXPECT_EQ(expected, actual, fmt)			\
 	do {							\
@@ -43,7 +94,9 @@  int verify_result(const struct tcpbpf_globals *result)
 	EXPECT_EQ(0x80, result->bad_cb_test_rv, PRIu32);
 	EXPECT_EQ(0, result->good_cb_test_rv, PRIu32);
 	EXPECT_EQ(1, result->num_listen, PRIu32);
-	EXPECT_EQ(EXPECTED_CLOSE_EVENTS, result->num_close_events, PRIu32);
+
+	/* 3 comes from one listening socket + both ends of the connection */
+	EXPECT_EQ(3, result->num_close_events, PRIu32);
 
 	return ret;
 }
@@ -67,6 +120,52 @@  int verify_sockopt_result(int sock_map_fd)
 	return ret;
 }
 
+static int run_test(void)
+{
+	int server_fd, client_fd;
+	void *server_err;
+	char buf[1000];
+	pthread_t tid;
+	int err = -1;
+	int i;
+
+	server_fd = start_server(AF_INET6, SOCK_STREAM, LO_ADDR6, 0, 0);
+	if (CHECK_FAIL(server_fd < 0))
+		return err;
+
+	pthread_mutex_lock(&server_started_mtx);
+	if (CHECK_FAIL(pthread_create(&tid, NULL, server_thread,
+				      (void *)&server_fd)))
+		goto close_server_fd;
+
+	pthread_cond_wait(&server_started, &server_started_mtx);
+	pthread_mutex_unlock(&server_started_mtx);
+
+	client_fd = connect_to_fd(server_fd, 0);
+	if (client_fd < 0)
+		goto close_server_fd;
+
+	for (i = 0; i < 1000; i++)
+		buf[i] = '+';
+
+	if (CHECK_FAIL(send(client_fd, buf, 1000, 0) < 1000))
+		goto close_client_fd;
+
+	if (CHECK_FAIL(recv(client_fd, buf, 500, 0) < 500))
+		goto close_client_fd;
+
+	pthread_join(tid, &server_err);
+
+	err = (int)(long)server_err;
+	CHECK_FAIL(err);
+
+close_client_fd:
+	close(client_fd);
+close_server_fd:
+	close(server_fd);
+	return err;
+}
+
 void test_tcpbpf_user(void)
 {
 	const char *file = "test_tcpbpf_kern.o";
@@ -74,7 +173,6 @@  void test_tcpbpf_user(void)
 	struct tcpbpf_globals g = {0};
 	struct bpf_object *obj;
 	int cg_fd = -1;
-	int retry = 10;
 	__u32 key = 0;
 	int rv;
 
@@ -94,11 +192,6 @@  void test_tcpbpf_user(void)
 		goto err;
 	}
 
-	if (CHECK_FAIL(system("./tcp_server.py"))) {
-		fprintf(stderr, "FAILED: TCP server\n");
-		goto err;
-	}
-
 	map_fd = bpf_find_map(__func__, obj, "global_map");
 	if (CHECK_FAIL(map_fd < 0))
 		goto err;
@@ -107,21 +200,17 @@  void test_tcpbpf_user(void)
 	if (CHECK_FAIL(sock_map_fd < 0))
 		goto err;
 
-retry_lookup:
+	if (run_test()) {
+		fprintf(stderr, "FAILED: TCP server\n");
+		goto err;
+	}
+
 	rv = bpf_map_lookup_elem(map_fd, &key, &g);
 	if (CHECK_FAIL(rv != 0)) {
 		fprintf(stderr, "FAILED: bpf_map_lookup_elem returns %d\n", rv);
 		goto err;
 	}
 
-	if (g.num_close_events != EXPECTED_CLOSE_EVENTS && retry--) {
-		fprintf(stderr,
-			"Unexpected number of close events (%d), retrying!\n",
-			g.num_close_events);
-		usleep(100);
-		goto retry_lookup;
-	}
-
 	if (CHECK_FAIL(verify_result(&g))) {
 		fprintf(stderr, "FAILED: Wrong stats\n");
 		goto err;
diff --git a/tools/testing/selftests/bpf/tcp_client.py b/tools/testing/selftests/bpf/tcp_client.py
deleted file mode 100755
index bfff82be3fc1..000000000000
--- a/tools/testing/selftests/bpf/tcp_client.py
+++ /dev/null
@@ -1,50 +0,0 @@ 
-#!/usr/bin/env python3
-#
-# SPDX-License-Identifier: GPL-2.0
-#
-
-import sys, os, os.path, getopt
-import socket, time
-import subprocess
-import select
-
-def read(sock, n):
-    buf = b''
-    while len(buf) < n:
-        rem = n - len(buf)
-        try: s = sock.recv(rem)
-        except (socket.error) as e: return b''
-        buf += s
-    return buf
-
-def send(sock, s):
-    total = len(s)
-    count = 0
-    while count < total:
-        try: n = sock.send(s)
-        except (socket.error) as e: n = 0
-        if n == 0:
-            return count;
-        count += n
-    return count
-
-
-serverPort = int(sys.argv[1])
-
-# create active socket
-sock = socket.socket(socket.AF_INET6, socket.SOCK_STREAM)
-try:
-    sock.connect(('::1', serverPort))
-except socket.error as e:
-    sys.exit(1)
-
-buf = b''
-n = 0
-while n < 1000:
-    buf += b'+'
-    n += 1
-
-sock.settimeout(1);
-n = send(sock, buf)
-n = read(sock, 500)
-sys.exit(0)
diff --git a/tools/testing/selftests/bpf/tcp_server.py b/tools/testing/selftests/bpf/tcp_server.py
deleted file mode 100755
index 42ab8882f00f..000000000000
--- a/tools/testing/selftests/bpf/tcp_server.py
+++ /dev/null
@@ -1,80 +0,0 @@ 
-#!/usr/bin/env python3
-#
-# SPDX-License-Identifier: GPL-2.0
-#
-
-import sys, os, os.path, getopt
-import socket, time
-import subprocess
-import select
-
-def read(sock, n):
-    buf = b''
-    while len(buf) < n:
-        rem = n - len(buf)
-        try: s = sock.recv(rem)
-        except (socket.error) as e: return b''
-        buf += s
-    return buf
-
-def send(sock, s):
-    total = len(s)
-    count = 0
-    while count < total:
-        try: n = sock.send(s)
-        except (socket.error) as e: n = 0
-        if n == 0:
-            return count;
-        count += n
-    return count
-
-
-SERVER_PORT = 12877
-MAX_PORTS = 2
-
-serverPort = SERVER_PORT
-serverSocket = None
-
-# create passive socket
-serverSocket = socket.socket(socket.AF_INET6, socket.SOCK_STREAM)
-
-try: serverSocket.bind(('::1', 0))
-except socket.error as msg:
-    print('bind fails: ' + str(msg))
-
-sn = serverSocket.getsockname()
-serverPort = sn[1]
-
-cmdStr = ("./tcp_client.py %d &") % (serverPort)
-os.system(cmdStr)
-
-buf = b''
-n = 0
-while n < 500:
-    buf += b'.'
-    n += 1
-
-serverSocket.listen(MAX_PORTS)
-readList = [serverSocket]
-
-while True:
-    readyRead, readyWrite, inError = \
-        select.select(readList, [], [], 2)
-
-    if len(readyRead) > 0:
-        waitCount = 0
-        for sock in readyRead:
-            if sock == serverSocket:
-                (clientSocket, address) = serverSocket.accept()
-                address = str(address[0])
-                readList.append(clientSocket)
-            else:
-                sock.settimeout(1);
-                s = read(sock, 1000)
-                n = send(sock, buf)
-                sock.close()
-                serverSocket.close()
-                sys.exit(0)
-    else:
-        print('Select timeout!')
-        sys.exit(1)