diff mbox series

selftests: fix OOM problem in msg_zerocopy selftest

Message ID 20240701202338.2806388-1-zijianzhang@bytedance.com (mailing list archive)
State Superseded
Delegated to: Netdev Maintainers
Headers show
Series selftests: fix OOM problem in msg_zerocopy selftest | expand

Checks

Context Check Description
netdev/series_format warning Single patches do not need cover letters; Target tree name not specified in the subject
netdev/tree_selection success Guessed tree name to be net-next
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 8 this patch: 8
netdev/build_tools success Errors and warnings before: 0 this patch: 0
netdev/cc_maintainers warning 4 maintainers not CCed: kuba@kernel.org shuah@kernel.org linux-kselftest@vger.kernel.org pabeni@redhat.com
netdev/build_clang success Errors and warnings before: 8 this patch: 8
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 8 this patch: 8
netdev/checkpatch warning WARNING: line length of 81 exceeds 80 columns
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0
netdev/contest success net-next-2024-07-02--06-00 (tests: 664)

Commit Message

Zijian Zhang July 1, 2024, 8:23 p.m. UTC
From: Zijian Zhang <zijianzhang@bytedance.com>

In selftests/net/msg_zerocopy.c, it has a while loop keeps calling sendmsg
on a socket with MSG_ZEROCOPY flag, and it will recv the notifications
until the socket is not writable. Typically, it will start the receiving
process after around 30+ sendmsgs. However, because of the commit
dfa2f0483360 ("tcp: get rid of sysctl_tcp_adv_win_scale") the sender is
always writable and does not get any chance to run recv notifications.
The selftest always exits with OUT_OF_MEMORY because the memory used by
opt_skb exceeds the core.sysctl_optmem_max.

We introduce "cfg_notification_limit" to force sender to receive
notifications after some number of sendmsgs. And, notifications may not
come in order, because of the reason we present above. We have order
checking code managed by cfg_verbose.

Signed-off-by: Zijian Zhang <zijianzhang@bytedance.com>
Signed-off-by: Xiaochun Lu <xiaochun.lu@bytedance.com>
---
 tools/testing/selftests/net/msg_zerocopy.c | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

Comments

Willem de Bruijn July 1, 2024, 9:22 p.m. UTC | #1
zijianzhang@ wrote:
> From: Zijian Zhang <zijianzhang@bytedance.com>


Remember to append to PATCH net or net-next in the subject line.

Since the title has fix in it, I suppose this should go to net.

As this is a test adjustment, I don't think it should go to stable.
Still, fixes need a Fixes: tag. The below referenced commit is not the
cause. Likely that sysctl could be set to a different value to trigger
this on older kernels too.

This has likely been present since the start of the test, so

Fixes: 07b65c5b31ce ("test: add msg_zerocopy test")

> In selftests/net/msg_zerocopy.c, it has a while loop keeps calling sendmsg
> on a socket with MSG_ZEROCOPY flag, and it will recv the notifications
> until the socket is not writable. Typically, it will start the receiving
> process after around 30+ sendmsgs. However, because of the commit
> dfa2f0483360 ("tcp: get rid of sysctl_tcp_adv_win_scale") the sender is
> always writable and does not get any chance to run recv notifications.
> The selftest always exits with OUT_OF_MEMORY because the memory used by
> opt_skb exceeds the core.sysctl_optmem_max.

Regardless of how large you set this sysctl, right? It is suggested to
increase it to at least 128KB.

> We introduce "cfg_notification_limit" to force sender to receive
> notifications after some number of sendmsgs. And, notifications may not
> come in order, because of the reason we present above.

Which reason?

> We have order
> checking code managed by cfg_verbose.
> 
> Signed-off-by: Zijian Zhang <zijianzhang@bytedance.com>
> Signed-off-by: Xiaochun Lu <xiaochun.lu@bytedance.com>
> ---
>  tools/testing/selftests/net/msg_zerocopy.c | 14 ++++++++++++--
>  1 file changed, 12 insertions(+), 2 deletions(-)
> 
> diff --git a/tools/testing/selftests/net/msg_zerocopy.c b/tools/testing/selftests/net/msg_zerocopy.c
> index bdc03a2097e8..7ea5fb28c93d 100644
> --- a/tools/testing/selftests/net/msg_zerocopy.c
> +++ b/tools/testing/selftests/net/msg_zerocopy.c
> @@ -85,6 +85,7 @@ static bool cfg_rx;
>  static int  cfg_runtime_ms	= 4200;
>  static int  cfg_verbose;
>  static int  cfg_waittime_ms	= 500;
> +static int  cfg_notification_limit = 32;
>  static bool cfg_zerocopy;
>  
>  static socklen_t cfg_alen;
> @@ -95,6 +96,7 @@ static char payload[IP_MAXPACKET];
>  static long packets, bytes, completions, expected_completions;
>  static int  zerocopied = -1;
>  static uint32_t next_completion;
> +static uint32_t sends_since_notify;
>  
>  static unsigned long gettimeofday_ms(void)
>  {
> @@ -208,6 +210,7 @@ static bool do_sendmsg(int fd, struct msghdr *msg, bool do_zerocopy, int domain)
>  		error(1, errno, "send");
>  	if (cfg_verbose && ret != len)
>  		fprintf(stderr, "send: ret=%u != %u\n", ret, len);
> +	sends_since_notify++;
>  
>  	if (len) {
>  		packets++;
> @@ -435,7 +438,7 @@ static bool do_recv_completion(int fd, int domain)
>  	/* Detect notification gaps. These should not happen often, if at all.
>  	 * Gaps can occur due to drops, reordering and retransmissions.
>  	 */
> -	if (lo != next_completion)
> +	if (cfg_verbose && lo != next_completion)
>  		fprintf(stderr, "gap: %u..%u does not append to %u\n",
>  			lo, hi, next_completion);
>  	next_completion = hi + 1;
> @@ -460,6 +463,7 @@ static bool do_recv_completion(int fd, int domain)
>  static void do_recv_completions(int fd, int domain)
>  {
>  	while (do_recv_completion(fd, domain)) {}
> +	sends_since_notify = 0;
>  }
>  
>  /* Wait for all remaining completions on the errqueue */
> @@ -549,6 +553,9 @@ static void do_tx(int domain, int type, int protocol)
>  		else
>  			do_sendmsg(fd, &msg, cfg_zerocopy, domain);
>  
> +		if (cfg_zerocopy && sends_since_notify >= cfg_notification_limit)
> +			do_recv_completions(fd, domain);
> +
>  		while (!do_poll(fd, POLLOUT)) {
>  			if (cfg_zerocopy)
>  				do_recv_completions(fd, domain);
> @@ -708,7 +715,7 @@ static void parse_opts(int argc, char **argv)
>  
>  	cfg_payload_len = max_payload_len;
>  
> -	while ((c = getopt(argc, argv, "46c:C:D:i:mp:rs:S:t:vz")) != -1) {
> +	while ((c = getopt(argc, argv, "46c:C:D:i:l:mp:rs:S:t:vz")) != -1) {
>  		switch (c) {
>  		case '4':
>  			if (cfg_family != PF_UNSPEC)
> @@ -736,6 +743,9 @@ static void parse_opts(int argc, char **argv)
>  			if (cfg_ifindex == 0)
>  				error(1, errno, "invalid iface: %s", optarg);
>  			break;
> +		case 'l':
> +			cfg_notification_limit = strtoul(optarg, NULL, 0);
> +			break;
>  		case 'm':
>  			cfg_cork_mixed = true;
>  			break;
> -- 
> 2.20.1
>
Zijian Zhang July 1, 2024, 9:51 p.m. UTC | #2
On 7/1/24 2:22 PM, Willem de Bruijn wrote:
> zijianzhang@ wrote:
>> From: Zijian Zhang <zijianzhang@bytedance.com>
> 
> 
> Remember to append to PATCH net or net-next in the subject line.
> 
> Since the title has fix in it, I suppose this should go to net.
> 
> As this is a test adjustment, I don't think it should go to stable.
> Still, fixes need a Fixes: tag. The below referenced commit is not the
> cause. Likely that sysctl could be set to a different value to trigger
> this on older kernels too.
> 
> This has likely been present since the start of the test, so
> 
> Fixes: 07b65c5b31ce ("test: add msg_zerocopy test")
> 

My ignorance, thanks for pointing this out!

>> In selftests/net/msg_zerocopy.c, it has a while loop keeps calling sendmsg
>> on a socket with MSG_ZEROCOPY flag, and it will recv the notifications
>> until the socket is not writable. Typically, it will start the receiving
>> process after around 30+ sendmsgs. However, because of the commit
>> dfa2f0483360 ("tcp: get rid of sysctl_tcp_adv_win_scale") the sender is
>> always writable and does not get any chance to run recv notifications.
>> The selftest always exits with OUT_OF_MEMORY because the memory used by
>> opt_skb exceeds the core.sysctl_optmem_max.
> 
> Regardless of how large you set this sysctl, right? It is suggested to
> increase it to at least 128KB.
> 

Just retested, even though I set net.core.optmem_max to 128k+, the
problem still exists.

>> We introduce "cfg_notification_limit" to force sender to receive
>> notifications after some number of sendmsgs. And, notifications may not
>> come in order, because of the reason we present above.
> 
> Which reason?
> 

If I open the Lock debugging config, the notifications will be reordered.

>> We have order
>> checking code managed by cfg_verbose.
>>
>> Signed-off-by: Zijian Zhang <zijianzhang@bytedance.com>
>> Signed-off-by: Xiaochun Lu <xiaochun.lu@bytedance.com>
>> ---
>>   tools/testing/selftests/net/msg_zerocopy.c | 14 ++++++++++++--
>>   1 file changed, 12 insertions(+), 2 deletions(-)
>>
>> diff --git a/tools/testing/selftests/net/msg_zerocopy.c b/tools/testing/selftests/net/msg_zerocopy.c
>> index bdc03a2097e8..7ea5fb28c93d 100644
>> --- a/tools/testing/selftests/net/msg_zerocopy.c
>> +++ b/tools/testing/selftests/net/msg_zerocopy.c
>> @@ -85,6 +85,7 @@ static bool cfg_rx;
>>   static int  cfg_runtime_ms	= 4200;
>>   static int  cfg_verbose;
>>   static int  cfg_waittime_ms	= 500;
>> +static int  cfg_notification_limit = 32;
>>   static bool cfg_zerocopy;
>>   
>>   static socklen_t cfg_alen;
>> @@ -95,6 +96,7 @@ static char payload[IP_MAXPACKET];
>>   static long packets, bytes, completions, expected_completions;
>>   static int  zerocopied = -1;
>>   static uint32_t next_completion;
>> +static uint32_t sends_since_notify;
>>   
>>   static unsigned long gettimeofday_ms(void)
>>   {
>> @@ -208,6 +210,7 @@ static bool do_sendmsg(int fd, struct msghdr *msg, bool do_zerocopy, int domain)
>>   		error(1, errno, "send");
>>   	if (cfg_verbose && ret != len)
>>   		fprintf(stderr, "send: ret=%u != %u\n", ret, len);
>> +	sends_since_notify++;
>>   
>>   	if (len) {
>>   		packets++;
>> @@ -435,7 +438,7 @@ static bool do_recv_completion(int fd, int domain)
>>   	/* Detect notification gaps. These should not happen often, if at all.
>>   	 * Gaps can occur due to drops, reordering and retransmissions.
>>   	 */
>> -	if (lo != next_completion)
>> +	if (cfg_verbose && lo != next_completion)
>>   		fprintf(stderr, "gap: %u..%u does not append to %u\n",
>>   			lo, hi, next_completion);
>>   	next_completion = hi + 1;
>> @@ -460,6 +463,7 @@ static bool do_recv_completion(int fd, int domain)
>>   static void do_recv_completions(int fd, int domain)
>>   {
>>   	while (do_recv_completion(fd, domain)) {}
>> +	sends_since_notify = 0;
>>   }
>>   
>>   /* Wait for all remaining completions on the errqueue */
>> @@ -549,6 +553,9 @@ static void do_tx(int domain, int type, int protocol)
>>   		else
>>   			do_sendmsg(fd, &msg, cfg_zerocopy, domain);
>>   
>> +		if (cfg_zerocopy && sends_since_notify >= cfg_notification_limit)
>> +			do_recv_completions(fd, domain);
>> +
>>   		while (!do_poll(fd, POLLOUT)) {
>>   			if (cfg_zerocopy)
>>   				do_recv_completions(fd, domain);
>> @@ -708,7 +715,7 @@ static void parse_opts(int argc, char **argv)
>>   
>>   	cfg_payload_len = max_payload_len;
>>   
>> -	while ((c = getopt(argc, argv, "46c:C:D:i:mp:rs:S:t:vz")) != -1) {
>> +	while ((c = getopt(argc, argv, "46c:C:D:i:l:mp:rs:S:t:vz")) != -1) {
>>   		switch (c) {
>>   		case '4':
>>   			if (cfg_family != PF_UNSPEC)
>> @@ -736,6 +743,9 @@ static void parse_opts(int argc, char **argv)
>>   			if (cfg_ifindex == 0)
>>   				error(1, errno, "invalid iface: %s", optarg);
>>   			break;
>> +		case 'l':
>> +			cfg_notification_limit = strtoul(optarg, NULL, 0);
>> +			break;
>>   		case 'm':
>>   			cfg_cork_mixed = true;
>>   			break;
>> -- 
>> 2.20.1
>>
> 
>
diff mbox series

Patch

diff --git a/tools/testing/selftests/net/msg_zerocopy.c b/tools/testing/selftests/net/msg_zerocopy.c
index bdc03a2097e8..7ea5fb28c93d 100644
--- a/tools/testing/selftests/net/msg_zerocopy.c
+++ b/tools/testing/selftests/net/msg_zerocopy.c
@@ -85,6 +85,7 @@  static bool cfg_rx;
 static int  cfg_runtime_ms	= 4200;
 static int  cfg_verbose;
 static int  cfg_waittime_ms	= 500;
+static int  cfg_notification_limit = 32;
 static bool cfg_zerocopy;
 
 static socklen_t cfg_alen;
@@ -95,6 +96,7 @@  static char payload[IP_MAXPACKET];
 static long packets, bytes, completions, expected_completions;
 static int  zerocopied = -1;
 static uint32_t next_completion;
+static uint32_t sends_since_notify;
 
 static unsigned long gettimeofday_ms(void)
 {
@@ -208,6 +210,7 @@  static bool do_sendmsg(int fd, struct msghdr *msg, bool do_zerocopy, int domain)
 		error(1, errno, "send");
 	if (cfg_verbose && ret != len)
 		fprintf(stderr, "send: ret=%u != %u\n", ret, len);
+	sends_since_notify++;
 
 	if (len) {
 		packets++;
@@ -435,7 +438,7 @@  static bool do_recv_completion(int fd, int domain)
 	/* Detect notification gaps. These should not happen often, if at all.
 	 * Gaps can occur due to drops, reordering and retransmissions.
 	 */
-	if (lo != next_completion)
+	if (cfg_verbose && lo != next_completion)
 		fprintf(stderr, "gap: %u..%u does not append to %u\n",
 			lo, hi, next_completion);
 	next_completion = hi + 1;
@@ -460,6 +463,7 @@  static bool do_recv_completion(int fd, int domain)
 static void do_recv_completions(int fd, int domain)
 {
 	while (do_recv_completion(fd, domain)) {}
+	sends_since_notify = 0;
 }
 
 /* Wait for all remaining completions on the errqueue */
@@ -549,6 +553,9 @@  static void do_tx(int domain, int type, int protocol)
 		else
 			do_sendmsg(fd, &msg, cfg_zerocopy, domain);
 
+		if (cfg_zerocopy && sends_since_notify >= cfg_notification_limit)
+			do_recv_completions(fd, domain);
+
 		while (!do_poll(fd, POLLOUT)) {
 			if (cfg_zerocopy)
 				do_recv_completions(fd, domain);
@@ -708,7 +715,7 @@  static void parse_opts(int argc, char **argv)
 
 	cfg_payload_len = max_payload_len;
 
-	while ((c = getopt(argc, argv, "46c:C:D:i:mp:rs:S:t:vz")) != -1) {
+	while ((c = getopt(argc, argv, "46c:C:D:i:l:mp:rs:S:t:vz")) != -1) {
 		switch (c) {
 		case '4':
 			if (cfg_family != PF_UNSPEC)
@@ -736,6 +743,9 @@  static void parse_opts(int argc, char **argv)
 			if (cfg_ifindex == 0)
 				error(1, errno, "invalid iface: %s", optarg);
 			break;
+		case 'l':
+			cfg_notification_limit = strtoul(optarg, NULL, 0);
+			break;
 		case 'm':
 			cfg_cork_mixed = true;
 			break;