diff mbox series

[bpf-next,v4,3/3] selftests/bpf: Support nonblock for send_recv_data

Message ID 9cd358958245f8ec87c4f553779aa4243f967a2f.1712729342.git.tanggeliang@kylinos.cn (mailing list archive)
State Handled Elsewhere, archived
Delegated to: Geliang Tang
Headers show
Series export send_recv_data | expand

Checks

Context Check Description
matttbe/KVM_Validation__normal success Success! ✅
matttbe/KVM_Validation__debug success Success! ✅
matttbe/KVM_Validation__btf__only_bpftest_all_ success Success! ✅
matttbe/checkpatch success total: 0 errors, 0 warnings, 0 checks, 34 lines checked
matttbe/shellcheck success MPTCP selftests files have not been modified

Commit Message

Geliang Tang April 10, 2024, 6:13 a.m. UTC
From: Geliang Tang <tanggeliang@kylinos.cn>

Some tests, such as the MPTCP bpf tests, require send_recv_data helper
to run in nonblock mode.

This patch adds nonblock support for send_recv_data(). Check if it is
currently in nonblock mode, and if so, ignore EWOULDBLOCK to continue
sending and receiving.

Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn>
---
 tools/testing/selftests/bpf/network_helpers.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

Comments

Martin KaFai Lau April 10, 2024, 9:34 p.m. UTC | #1
On 4/9/24 11:13 PM, Geliang Tang wrote:
> From: Geliang Tang <tanggeliang@kylinos.cn>
> 
> Some tests, such as the MPTCP bpf tests, require send_recv_data helper
> to run in nonblock mode.
> 
> This patch adds nonblock support for send_recv_data(). Check if it is
> currently in nonblock mode, and if so, ignore EWOULDBLOCK to continue
> sending and receiving.
> 
> Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn>
> ---
>   tools/testing/selftests/bpf/network_helpers.c | 9 ++++++++-
>   1 file changed, 8 insertions(+), 1 deletion(-)
> 
> diff --git a/tools/testing/selftests/bpf/network_helpers.c b/tools/testing/selftests/bpf/network_helpers.c
> index 137cd18ef3f2..ca16ef2b648e 100644
> --- a/tools/testing/selftests/bpf/network_helpers.c
> +++ b/tools/testing/selftests/bpf/network_helpers.c
> @@ -555,6 +555,7 @@ struct send_recv_arg {
>   static void *send_recv_server(void *arg)
>   {
>   	struct send_recv_arg *a = (struct send_recv_arg *)arg;
> +	int flags = fcntl(a->fd, F_GETFL);
>   	ssize_t nr_sent = 0, bytes = 0;
>   	char batch[1500];
>   	int err = 0, fd;
> @@ -578,6 +579,8 @@ static void *send_recv_server(void *arg)
>   		if (nr_sent == -1 && errno == EINTR)
>   			continue;
>   		if (nr_sent == -1) {
> +			if (flags & O_NONBLOCK && errno == EWOULDBLOCK)

I still don't see why it needs to be a non blocking IO. mptcp should work
with blocking IO also, no? Does it really need non blocking IO to make
mptcp test work? I would rather stay with blocking IO in selftest as much as
possible for simplicity reason.

I am afraid the root cause of the EAGAIN thread has not been figured out yet:
https://lore.kernel.org/all/b3943f9a8bf595212b00e96ba850bf32893312cc.camel@kernel.org/

Lets drop patch 3 until it is understood why mptcp needs EAGAIN or non-blocking IO.
It feels like there is some flakiness and it should be understood and avoided.

Other than the comment in patch 2, the first two patches lgtm. Please respin with
the first two patches.

> +				continue;
>   			err = -errno;
>   			break;
>   		}
> @@ -599,6 +602,7 @@ static void *send_recv_server(void *arg)
>   
>   int send_recv_data(int lfd, int fd, uint32_t total_bytes)
>   {
> +	int flags = fcntl(lfd, F_GETFL);
>   	ssize_t nr_recv = 0, bytes = 0;
>   	struct send_recv_arg arg = {
>   		.fd	= lfd,
> @@ -622,8 +626,11 @@ int send_recv_data(int lfd, int fd, uint32_t total_bytes)
>   			       MIN(total_bytes - bytes, sizeof(batch)), 0);
>   		if (nr_recv == -1 && errno == EINTR)
>   			continue;
> -		if (nr_recv == -1)
> +		if (nr_recv == -1) {
> +			if (flags & O_NONBLOCK && errno == EWOULDBLOCK)
> +				continue;
>   			break;
> +		}
>   		bytes += nr_recv;
>   	}
>
Geliang Tang April 11, 2024, 6:52 a.m. UTC | #2
mptcp-only

Hi Matt & Mat,

On Wed, 2024-04-10 at 14:34 -0700, Martin KaFai Lau wrote:
> On 4/9/24 11:13 PM, Geliang Tang wrote:
> > From: Geliang Tang <tanggeliang@kylinos.cn>
> > 
> > Some tests, such as the MPTCP bpf tests, require send_recv_data
> > helper
> > to run in nonblock mode.
> > 
> > This patch adds nonblock support for send_recv_data(). Check if it
> > is
> > currently in nonblock mode, and if so, ignore EWOULDBLOCK to
> > continue
> > sending and receiving.
> > 
> > Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn>
> > ---
> >   tools/testing/selftests/bpf/network_helpers.c | 9 ++++++++-
> >   1 file changed, 8 insertions(+), 1 deletion(-)
> > 
> > diff --git a/tools/testing/selftests/bpf/network_helpers.c
> > b/tools/testing/selftests/bpf/network_helpers.c
> > index 137cd18ef3f2..ca16ef2b648e 100644
> > --- a/tools/testing/selftests/bpf/network_helpers.c
> > +++ b/tools/testing/selftests/bpf/network_helpers.c
> > @@ -555,6 +555,7 @@ struct send_recv_arg {
> >   static void *send_recv_server(void *arg)
> >   {
> >   	struct send_recv_arg *a = (struct send_recv_arg *)arg;
> > +	int flags = fcntl(a->fd, F_GETFL);
> >   	ssize_t nr_sent = 0, bytes = 0;
> >   	char batch[1500];
> >   	int err = 0, fd;
> > @@ -578,6 +579,8 @@ static void *send_recv_server(void *arg)
> >   		if (nr_sent == -1 && errno == EINTR)
> >   			continue;
> >   		if (nr_sent == -1) {
> > +			if (flags & O_NONBLOCK && errno ==
> > EWOULDBLOCK)
> 
> I still don't see why it needs to be a non blocking IO. mptcp should
> work
> with blocking IO also, no? Does it really need non blocking IO to
> make
> mptcp test work? I would rather stay with blocking IO in selftest as
> much as
> possible for simplicity reason.

I need some help here.

This issue is reported by Matt in "CI: MPTCP BPF tests are now
validated", and my fixes ([1] and this patch) aren't accepted by
Martin. Is it normal to get EAGAINs in this case? Please give some
suggestions.

[1]
https://patchwork.kernel.org/project/mptcp/patch/311e074a3ca0465bdc5e4c2283e334bae5ccd306.1711296000.git.tanggeliang@kylinos.cn/

Thanks,
-Geliang

> 
> I am afraid the root cause of the EAGAIN thread has not been figured
> out yet:
> https://lore.kernel.org/all/b3943f9a8bf595212b00e96ba850bf32893312cc.camel@kernel.org/
> 
> Lets drop patch 3 until it is understood why mptcp needs EAGAIN or
> non-blocking IO.
> It feels like there is some flakiness and it should be understood and
> avoided.
> 
> Other than the comment in patch 2, the first two patches lgtm. Please
> respin with
> the first two patches.
> 
> > +				continue;
> >   			err = -errno;
> >   			break;
> >   		}
> > @@ -599,6 +602,7 @@ static void *send_recv_server(void *arg)
> >   
> >   int send_recv_data(int lfd, int fd, uint32_t total_bytes)
> >   {
> > +	int flags = fcntl(lfd, F_GETFL);
> >   	ssize_t nr_recv = 0, bytes = 0;
> >   	struct send_recv_arg arg = {
> >   		.fd	= lfd,
> > @@ -622,8 +626,11 @@ int send_recv_data(int lfd, int fd, uint32_t
> > total_bytes)
> >   			       MIN(total_bytes - bytes,
> > sizeof(batch)), 0);
> >   		if (nr_recv == -1 && errno == EINTR)
> >   			continue;
> > -		if (nr_recv == -1)
> > +		if (nr_recv == -1) {
> > +			if (flags & O_NONBLOCK && errno ==
> > EWOULDBLOCK)
> > +				continue;
> >   			break;
> > +		}
> >   		bytes += nr_recv;
> >   	}
> >   
> 
>
Geliang Tang April 22, 2024, 6:50 a.m. UTC | #3
On Thu, 2024-04-11 at 14:52 +0800, Geliang Tang wrote:
> mptcp-only
> 
> Hi Matt & Mat,
> 
> On Wed, 2024-04-10 at 14:34 -0700, Martin KaFai Lau wrote:
> > On 4/9/24 11:13 PM, Geliang Tang wrote:
> > > From: Geliang Tang <tanggeliang@kylinos.cn>
> > > 
> > > Some tests, such as the MPTCP bpf tests, require send_recv_data
> > > helper
> > > to run in nonblock mode.
> > > 
> > > This patch adds nonblock support for send_recv_data(). Check if
> > > it
> > > is
> > > currently in nonblock mode, and if so, ignore EWOULDBLOCK to
> > > continue
> > > sending and receiving.
> > > 
> > > Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn>
> > > ---
> > >   tools/testing/selftests/bpf/network_helpers.c | 9 ++++++++-
> > >   1 file changed, 8 insertions(+), 1 deletion(-)
> > > 
> > > diff --git a/tools/testing/selftests/bpf/network_helpers.c
> > > b/tools/testing/selftests/bpf/network_helpers.c
> > > index 137cd18ef3f2..ca16ef2b648e 100644
> > > --- a/tools/testing/selftests/bpf/network_helpers.c
> > > +++ b/tools/testing/selftests/bpf/network_helpers.c
> > > @@ -555,6 +555,7 @@ struct send_recv_arg {
> > >   static void *send_recv_server(void *arg)
> > >   {
> > >   	struct send_recv_arg *a = (struct send_recv_arg *)arg;
> > > +	int flags = fcntl(a->fd, F_GETFL);
> > >   	ssize_t nr_sent = 0, bytes = 0;
> > >   	char batch[1500];
> > >   	int err = 0, fd;
> > > @@ -578,6 +579,8 @@ static void *send_recv_server(void *arg)
> > >   		if (nr_sent == -1 && errno == EINTR)
> > >   			continue;
> > >   		if (nr_sent == -1) {
> > > +			if (flags & O_NONBLOCK && errno ==
> > > EWOULDBLOCK)
> > 
> > I still don't see why it needs to be a non blocking IO. mptcp
> > should
> > work
> > with blocking IO also, no? Does it really need non blocking IO to
> > make
> > mptcp test work? I would rather stay with blocking IO in selftest
> > as
> > much as
> > possible for simplicity reason.
> 
> I need some help here.
> 
> This issue is reported by Matt in "CI: MPTCP BPF tests are now
> validated", and my fixes ([1] and this patch) aren't accepted by
> Martin. Is it normal to get EAGAINs in this case? Please give some
> suggestions.

It fails in mptcp_sendmsg()

1898 wait_for_memory:
1899                 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
1900                 __mptcp_push_pending(sk, msg->msg_flags);
1901                 ret = sk_stream_wait_memory(sk, &timeo);
1902                 if (ret)
1903                         goto do_error;
1904         }

sk_stream_wait_memory() fails in the case, and
tcp_rtx_and_write_queues_empty(sk) is true.

I added a issue #487 to trace this issue.

Thanks,
-Geliang

> 
> [1]
> https://patchwork.kernel.org/project/mptcp/patch/311e074a3ca0465bdc5e4c2283e334bae5ccd306.1711296000.git.tanggeliang@kylinos.cn/
> 
> Thanks,
> -Geliang
> 
> > 
> > I am afraid the root cause of the EAGAIN thread has not been
> > figured
> > out yet:
> > https://lore.kernel.org/all/b3943f9a8bf595212b00e96ba850bf32893312cc.camel@kernel.org/
> > 
> > Lets drop patch 3 until it is understood why mptcp needs EAGAIN or
> > non-blocking IO.
> > It feels like there is some flakiness and it should be understood
> > and
> > avoided.
> > 
> > Other than the comment in patch 2, the first two patches lgtm.
> > Please
> > respin with
> > the first two patches.
> > 
> > > +				continue;
> > >   			err = -errno;
> > >   			break;
> > >   		}
> > > @@ -599,6 +602,7 @@ static void *send_recv_server(void *arg)
> > >   
> > >   int send_recv_data(int lfd, int fd, uint32_t total_bytes)
> > >   {
> > > +	int flags = fcntl(lfd, F_GETFL);
> > >   	ssize_t nr_recv = 0, bytes = 0;
> > >   	struct send_recv_arg arg = {
> > >   		.fd	= lfd,
> > > @@ -622,8 +626,11 @@ int send_recv_data(int lfd, int fd, uint32_t
> > > total_bytes)
> > >   			       MIN(total_bytes - bytes,
> > > sizeof(batch)), 0);
> > >   		if (nr_recv == -1 && errno == EINTR)
> > >   			continue;
> > > -		if (nr_recv == -1)
> > > +		if (nr_recv == -1) {
> > > +			if (flags & O_NONBLOCK && errno ==
> > > EWOULDBLOCK)
> > > +				continue;
> > >   			break;
> > > +		}
> > >   		bytes += nr_recv;
> > >   	}
> > >   
> > 
> > 
>
Matthieu Baerts (NGI0) April 22, 2024, 9:45 a.m. UTC | #4
Hi Geliang,

On 22/04/2024 08:50, Geliang Tang wrote:
> On Thu, 2024-04-11 at 14:52 +0800, Geliang Tang wrote:
>> mptcp-only
>>
>> Hi Matt & Mat,
>>
>> On Wed, 2024-04-10 at 14:34 -0700, Martin KaFai Lau wrote:
>>> On 4/9/24 11:13 PM, Geliang Tang wrote:
>>>> From: Geliang Tang <tanggeliang@kylinos.cn>
>>>>
>>>> Some tests, such as the MPTCP bpf tests, require send_recv_data
>>>> helper
>>>> to run in nonblock mode.
>>>>
>>>> This patch adds nonblock support for send_recv_data(). Check if
>>>> it
>>>> is
>>>> currently in nonblock mode, and if so, ignore EWOULDBLOCK to
>>>> continue
>>>> sending and receiving.
>>>>
>>>> Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn>
>>>> ---
>>>>   tools/testing/selftests/bpf/network_helpers.c | 9 ++++++++-
>>>>   1 file changed, 8 insertions(+), 1 deletion(-)
>>>>
>>>> diff --git a/tools/testing/selftests/bpf/network_helpers.c
>>>> b/tools/testing/selftests/bpf/network_helpers.c
>>>> index 137cd18ef3f2..ca16ef2b648e 100644
>>>> --- a/tools/testing/selftests/bpf/network_helpers.c
>>>> +++ b/tools/testing/selftests/bpf/network_helpers.c
>>>> @@ -555,6 +555,7 @@ struct send_recv_arg {
>>>>   static void *send_recv_server(void *arg)
>>>>   {
>>>>   	struct send_recv_arg *a = (struct send_recv_arg *)arg;
>>>> +	int flags = fcntl(a->fd, F_GETFL);
>>>>   	ssize_t nr_sent = 0, bytes = 0;
>>>>   	char batch[1500];
>>>>   	int err = 0, fd;
>>>> @@ -578,6 +579,8 @@ static void *send_recv_server(void *arg)
>>>>   		if (nr_sent == -1 && errno == EINTR)
>>>>   			continue;
>>>>   		if (nr_sent == -1) {
>>>> +			if (flags & O_NONBLOCK && errno ==
>>>> EWOULDBLOCK)
>>>
>>> I still don't see why it needs to be a non blocking IO. mptcp
>>> should
>>> work
>>> with blocking IO also, no? Does it really need non blocking IO to
>>> make
>>> mptcp test work? I would rather stay with blocking IO in selftest
>>> as
>>> much as
>>> possible for simplicity reason.
>>
>> I need some help here.
>>
>> This issue is reported by Matt in "CI: MPTCP BPF tests are now
>> validated", and my fixes ([1] and this patch) aren't accepted by
>> Martin. Is it normal to get EAGAINs in this case? Please give some
>> suggestions.

Thank you for the notification, I missed your previous question.

Regarding Martin's comment about O_NONBLOCK, I understand that with the
current BPF selftests -- so excluding the ones linked to the MPTCP
schedulers -- it is not needed to support O_NONBLOCK.

It looks like it is too early to do such modifications (patch 3), and
probably better to do MPTCP specific modifications only in our tree for
the moment. When upstreaming the new MPTCP BPF selftests, it will be
clearer for BPF maintainers what are our requirements, and such
modifications would make more sense, no?

> It fails in mptcp_sendmsg()
> 
> 1898 wait_for_memory:
> 1899                 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
> 1900                 __mptcp_push_pending(sk, msg->msg_flags);
> 1901                 ret = sk_stream_wait_memory(sk, &timeo);
> 1902                 if (ret)
> 1903                         goto do_error;
> 1904         }
> 
> sk_stream_wait_memory() fails in the case, and
> tcp_rtx_and_write_queues_empty(sk) is true.

My understanding is that with the current BPF selftests -- so excluding
the ones linked to the MPTCP schedulers here as well -- it looks
unlikely to have a EAGAIN errno, because only blocking IO is being used,
right?

I might be wrong, but with MPTCP schedulers, it is different: with
non-blocking IO, EAGAIN can be seen, and this case needs to be handled.
Actively waiting by retrying directly in the loop in case of EAGAIN is
not recommended, but probably OK for the tests (still, might be good to
add a comment to recommend polling instead). So here as well, such
modifications can stay in our tree for the moment.

Also, what is not clear to me is where we set the socket as the
non-blocking one. Is it only done for MPTCP servers/clients?

> I added a issue #487 to trace this issue.

Thanks!

Cheers,
Matt
Geliang Tang April 22, 2024, 10:04 a.m. UTC | #5
Hi Matt,

On Mon, 2024-04-22 at 11:45 +0200, Matthieu Baerts wrote:
> Hi Geliang,
> 
> On 22/04/2024 08:50, Geliang Tang wrote:
> > On Thu, 2024-04-11 at 14:52 +0800, Geliang Tang wrote:
> > > mptcp-only
> > > 
> > > Hi Matt & Mat,
> > > 
> > > On Wed, 2024-04-10 at 14:34 -0700, Martin KaFai Lau wrote:
> > > > On 4/9/24 11:13 PM, Geliang Tang wrote:
> > > > > From: Geliang Tang <tanggeliang@kylinos.cn>
> > > > > 
> > > > > Some tests, such as the MPTCP bpf tests, require
> > > > > send_recv_data
> > > > > helper
> > > > > to run in nonblock mode.
> > > > > 
> > > > > This patch adds nonblock support for send_recv_data(). Check
> > > > > if
> > > > > it
> > > > > is
> > > > > currently in nonblock mode, and if so, ignore EWOULDBLOCK to
> > > > > continue
> > > > > sending and receiving.
> > > > > 
> > > > > Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn>
> > > > > ---
> > > > >   tools/testing/selftests/bpf/network_helpers.c | 9 ++++++++-
> > > > >   1 file changed, 8 insertions(+), 1 deletion(-)
> > > > > 
> > > > > diff --git a/tools/testing/selftests/bpf/network_helpers.c
> > > > > b/tools/testing/selftests/bpf/network_helpers.c
> > > > > index 137cd18ef3f2..ca16ef2b648e 100644
> > > > > --- a/tools/testing/selftests/bpf/network_helpers.c
> > > > > +++ b/tools/testing/selftests/bpf/network_helpers.c
> > > > > @@ -555,6 +555,7 @@ struct send_recv_arg {
> > > > >   static void *send_recv_server(void *arg)
> > > > >   {
> > > > >   	struct send_recv_arg *a = (struct send_recv_arg
> > > > > *)arg;
> > > > > +	int flags = fcntl(a->fd, F_GETFL);
> > > > >   	ssize_t nr_sent = 0, bytes = 0;
> > > > >   	char batch[1500];
> > > > >   	int err = 0, fd;
> > > > > @@ -578,6 +579,8 @@ static void *send_recv_server(void *arg)
> > > > >   		if (nr_sent == -1 && errno == EINTR)
> > > > >   			continue;
> > > > >   		if (nr_sent == -1) {
> > > > > +			if (flags & O_NONBLOCK && errno ==
> > > > > EWOULDBLOCK)
> > > > 
> > > > I still don't see why it needs to be a non blocking IO. mptcp
> > > > should
> > > > work
> > > > with blocking IO also, no? Does it really need non blocking IO
> > > > to
> > > > make
> > > > mptcp test work? I would rather stay with blocking IO in
> > > > selftest
> > > > as
> > > > much as
> > > > possible for simplicity reason.
> > > 
> > > I need some help here.
> > > 
> > > This issue is reported by Matt in "CI: MPTCP BPF tests are now
> > > validated", and my fixes ([1] and this patch) aren't accepted by
> > > Martin. Is it normal to get EAGAINs in this case? Please give
> > > some
> > > suggestions.
> 
> Thank you for the notification, I missed your previous question.
> 
> Regarding Martin's comment about O_NONBLOCK, I understand that with
> the
> current BPF selftests -- so excluding the ones linked to the MPTCP
> schedulers -- it is not needed to support O_NONBLOCK.
> 
> It looks like it is too early to do such modifications (patch 3), and
> probably better to do MPTCP specific modifications only in our tree
> for
> the moment. When upstreaming the new MPTCP BPF selftests, it will be
> clearer for BPF maintainers what are our requirements, and such
> modifications would make more sense, no?
> 
> > It fails in mptcp_sendmsg()
> > 
> > 1898 wait_for_memory:
> > 1899                 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
> > 1900                 __mptcp_push_pending(sk, msg->msg_flags);
> > 1901                 ret = sk_stream_wait_memory(sk, &timeo);
> > 1902                 if (ret)
> > 1903                         goto do_error;
> > 1904         }
> > 
> > sk_stream_wait_memory() fails in the case, and
> > tcp_rtx_and_write_queues_empty(sk) is true.
> 
> My understanding is that with the current BPF selftests -- so
> excluding
> the ones linked to the MPTCP schedulers here as well -- it looks
> unlikely to have a EAGAIN errno, because only blocking IO is being
> used,
> right?
> 
> I might be wrong, but with MPTCP schedulers, it is different: with
> non-blocking IO, EAGAIN can be seen, and this case needs to be
> handled.
> Actively waiting by retrying directly in the loop in case of EAGAIN
> is
> not recommended, but probably OK for the tests (still, might be good
> to
> add a comment to recommend polling instead). So here as well, such
> modifications can stay in our tree for the moment.
> 
> Also, what is not clear to me is where we set the socket as the
> non-blocking one. Is it only done for MPTCP servers/clients?

I didn't set the socket as non-block in the end.

EAGAIN will be got no matter non-block flag is set or not.

In old version like "[mptcp-next,v7,0/5] setsockopt per subflow: BPF":

https://patchwork.kernel.org/project/mptcp/cover/cover.1712571740.git.tanggeliang@kylinos.cn/

"set_nonblock" is added in patch 2, and invoked in run_subflow() in
patch 3.

Thanks,
-Geliang

> 
> > I added a issue #487 to trace this issue.
> 
> Thanks!
> 
> Cheers,
> Matt
Matthieu Baerts (NGI0) April 22, 2024, 10:31 a.m. UTC | #6
On 22/04/2024 12:04, Geliang Tang wrote:> On Mon, 2024-04-22 at 11:45
+0200, Matthieu Baerts wrote:

(...)

>> I might be wrong, but with MPTCP schedulers, it is different: with
>> non-blocking IO, EAGAIN can be seen, and this case needs to be
>> handled.
>> Actively waiting by retrying directly in the loop in case of EAGAIN
>> is
>> not recommended, but probably OK for the tests (still, might be good
>> to
>> add a comment to recommend polling instead). So here as well, such
>> modifications can stay in our tree for the moment.
>>
>> Also, what is not clear to me is where we set the socket as the
>> non-blocking one. Is it only done for MPTCP servers/clients?
> 
> I didn't set the socket as non-block in the end.
> 
> EAGAIN will be got no matter non-block flag is set or not.

That's strange, no? When the userspace sends data with a blocking IO
socket, it should not get EAGAIN, no?

> In old version like "[mptcp-next,v7,0/5] setsockopt per subflow: BPF":
> 
> https://patchwork.kernel.org/project/mptcp/cover/cover.1712571740.git.tanggeliang@kylinos.cn/
> 
> "set_nonblock" is added in patch 2, and invoked in run_subflow() in
> patch 3.

I see, but the commit message of patch 2 doesn't explain why
set_nonblock() is needed for MPTCP. I see it is used in patch 3, but is
it really necessary? Maybe it was added for other reasons, that are no
longer valid today?

>>> I added a issue #487 to trace this issue.

Please also note that we didn't have this issue for a while. When we had
it, it is when KVM was not supported on the CI:

 $ /opt/virtme/virtme-run (...)
 Could not access KVM kernel module: No such file or directory
 qemu-system-x86_64: failed to initialize kvm: No such file or directory
 qemu-system-x86_64: falling back to tcg


Maybe we had this issue because the VM was abnormally too slow, and we
reached the 3 seconds timeout described by Martin in a previous message?
If this is due to the "abnormally slow setup", maybe we don't need to do
anything?

In other words, can you (easily) reproduce the issue on your side? With
or without KVM support?

Cheers,
Matt
Geliang Tang April 22, 2024, 10:34 a.m. UTC | #7
On Mon, 2024-04-22 at 12:31 +0200, Matthieu Baerts wrote:
> On 22/04/2024 12:04, Geliang Tang wrote:> On Mon, 2024-04-22 at 11:45
> +0200, Matthieu Baerts wrote:
> 
> (...)
> 
> > > I might be wrong, but with MPTCP schedulers, it is different:
> > > with
> > > non-blocking IO, EAGAIN can be seen, and this case needs to be
> > > handled.
> > > Actively waiting by retrying directly in the loop in case of
> > > EAGAIN
> > > is
> > > not recommended, but probably OK for the tests (still, might be
> > > good
> > > to
> > > add a comment to recommend polling instead). So here as well,
> > > such
> > > modifications can stay in our tree for the moment.
> > > 
> > > Also, what is not clear to me is where we set the socket as the
> > > non-blocking one. Is it only done for MPTCP servers/clients?
> > 
> > I didn't set the socket as non-block in the end.
> > 
> > EAGAIN will be got no matter non-block flag is set or not.
> 
> That's strange, no? When the userspace sends data with a blocking IO
> socket, it should not get EAGAIN, no?
> 
> > In old version like "[mptcp-next,v7,0/5] setsockopt per subflow:
> > BPF":
> > 
> > https://patchwork.kernel.org/project/mptcp/cover/cover.1712571740.git.tanggeliang@kylinos.cn/
> > 
> > "set_nonblock" is added in patch 2, and invoked in run_subflow() in
> > patch 3.
> 
> I see, but the commit message of patch 2 doesn't explain why
> set_nonblock() is needed for MPTCP. I see it is used in patch 3, but
> is
> it really necessary? Maybe it was added for other reasons, that are
> no
> longer valid today?
> 
> > > > I added a issue #487 to trace this issue.
> 
> Please also note that we didn't have this issue for a while. When we
> had
> it, it is when KVM was not supported on the CI:
> 
>  $ /opt/virtme/virtme-run (...)
>  Could not access KVM kernel module: No such file or directory
>  qemu-system-x86_64: failed to initialize kvm: No such file or
> directory
>  qemu-system-x86_64: falling back to tcg
> 
> 
> Maybe we had this issue because the VM was abnormally too slow, and
> we
> reached the 3 seconds timeout described by Martin in a previous
> message?
> If this is due to the "abnormally slow setup", maybe we don't need to
> do
> anything?
> 
> In other words, can you (easily) reproduce the issue on your side?
> With
> or without KVM support?

It's easy to reproduce by increasing total_bytes:

+++ b/tools/testing/selftests/bpf/prog_tests/mptcp.c
@@ -46,7 +46,7 @@
 #endif
 #define MPTCP_SCHED_NAME_MAX   16
 
-static const unsigned int total_bytes = 10 * 1024 * 1024;
+static const unsigned int total_bytes = 200 * 1024 * 1024;

Geliang

> 
> Cheers,
> Matt
Matthieu Baerts (NGI0) April 22, 2024, 10:39 a.m. UTC | #8
On 22/04/2024 12:34, Geliang Tang wrote:
> On Mon, 2024-04-22 at 12:31 +0200, Matthieu Baerts wrote:
>> On 22/04/2024 12:04, Geliang Tang wrote:> On Mon, 2024-04-22 at 11:45
>> +0200, Matthieu Baerts wrote:
>>
>> (...)
>>
>>>> I might be wrong, but with MPTCP schedulers, it is different:
>>>> with
>>>> non-blocking IO, EAGAIN can be seen, and this case needs to be
>>>> handled.
>>>> Actively waiting by retrying directly in the loop in case of
>>>> EAGAIN
>>>> is
>>>> not recommended, but probably OK for the tests (still, might be
>>>> good
>>>> to
>>>> add a comment to recommend polling instead). So here as well,
>>>> such
>>>> modifications can stay in our tree for the moment.
>>>>
>>>> Also, what is not clear to me is where we set the socket as the
>>>> non-blocking one. Is it only done for MPTCP servers/clients?
>>>
>>> I didn't set the socket as non-block in the end.
>>>
>>> EAGAIN will be got no matter non-block flag is set or not.
>>
>> That's strange, no? When the userspace sends data with a blocking IO
>> socket, it should not get EAGAIN, no?
>>
>>> In old version like "[mptcp-next,v7,0/5] setsockopt per subflow:
>>> BPF":
>>>
>>> https://patchwork.kernel.org/project/mptcp/cover/cover.1712571740.git.tanggeliang@kylinos.cn/
>>>
>>> "set_nonblock" is added in patch 2, and invoked in run_subflow() in
>>> patch 3.
>>
>> I see, but the commit message of patch 2 doesn't explain why
>> set_nonblock() is needed for MPTCP. I see it is used in patch 3, but
>> is
>> it really necessary? Maybe it was added for other reasons, that are
>> no
>> longer valid today?
>>
>>>>> I added a issue #487 to trace this issue.
>>
>> Please also note that we didn't have this issue for a while. When we
>> had
>> it, it is when KVM was not supported on the CI:
>>
>>  $ /opt/virtme/virtme-run (...)
>>  Could not access KVM kernel module: No such file or directory
>>  qemu-system-x86_64: failed to initialize kvm: No such file or
>> directory
>>  qemu-system-x86_64: falling back to tcg
>>
>>
>> Maybe we had this issue because the VM was abnormally too slow, and
>> we
>> reached the 3 seconds timeout described by Martin in a previous
>> message?
>> If this is due to the "abnormally slow setup", maybe we don't need to
>> do
>> anything?
>>
>> In other words, can you (easily) reproduce the issue on your side?
>> With
>> or without KVM support?
> 
> It's easy to reproduce by increasing total_bytes:
Even without "set_nonblock()"? With what is in our tree, we don't do that.

As I mentioned above, if 'send()' stops before the end with -1 and errno
set to EAGAIN while the socket is supposed to wait for the transfer to
be over (blocking IO), I guess there is a bug somewhere, but not in the
tests, no?

Cheers,
Matt
Geliang Tang April 23, 2024, 2:58 a.m. UTC | #9
On Mon, 2024-04-22 at 12:39 +0200, Matthieu Baerts wrote:
> On 22/04/2024 12:34, Geliang Tang wrote:
> > On Mon, 2024-04-22 at 12:31 +0200, Matthieu Baerts wrote:
> > > On 22/04/2024 12:04, Geliang Tang wrote:> On Mon, 2024-04-22 at
> > > 11:45
> > > +0200, Matthieu Baerts wrote:
> > > 
> > > (...)
> > > 
> > > > > I might be wrong, but with MPTCP schedulers, it is different:
> > > > > with
> > > > > non-blocking IO, EAGAIN can be seen, and this case needs to
> > > > > be
> > > > > handled.
> > > > > Actively waiting by retrying directly in the loop in case of
> > > > > EAGAIN
> > > > > is
> > > > > not recommended, but probably OK for the tests (still, might
> > > > > be
> > > > > good
> > > > > to
> > > > > add a comment to recommend polling instead). So here as well,
> > > > > such
> > > > > modifications can stay in our tree for the moment.
> > > > > 
> > > > > Also, what is not clear to me is where we set the socket as
> > > > > the
> > > > > non-blocking one. Is it only done for MPTCP servers/clients?
> > > > 
> > > > I didn't set the socket as non-block in the end.
> > > > 
> > > > EAGAIN will be got no matter non-block flag is set or not.
> > > 
> > > That's strange, no? When the userspace sends data with a blocking
> > > IO
> > > socket, it should not get EAGAIN, no?
> > > 
> > > > In old version like "[mptcp-next,v7,0/5] setsockopt per
> > > > subflow:
> > > > BPF":
> > > > 
> > > > https://patchwork.kernel.org/project/mptcp/cover/cover.1712571740.git.tanggeliang@kylinos.cn/
> > > > 
> > > > "set_nonblock" is added in patch 2, and invoked in
> > > > run_subflow() in
> > > > patch 3.
> > > 
> > > I see, but the commit message of patch 2 doesn't explain why
> > > set_nonblock() is needed for MPTCP. I see it is used in patch 3,
> > > but
> > > is
> > > it really necessary? Maybe it was added for other reasons, that
> > > are
> > > no
> > > longer valid today?
> > > 
> > > > > > I added a issue #487 to trace this issue.
> > > 
> > > Please also note that we didn't have this issue for a while. When
> > > we
> > > had
> > > it, it is when KVM was not supported on the CI:
> > > 
> > >  $ /opt/virtme/virtme-run (...)
> > >  Could not access KVM kernel module: No such file or directory
> > >  qemu-system-x86_64: failed to initialize kvm: No such file or
> > > directory
> > >  qemu-system-x86_64: falling back to tcg
> > > 
> > > 
> > > Maybe we had this issue because the VM was abnormally too slow,
> > > and
> > > we
> > > reached the 3 seconds timeout described by Martin in a previous
> > > message?
> > > If this is due to the "abnormally slow setup", maybe we don't
> > > need to
> > > do
> > > anything?
> > > 
> > > In other words, can you (easily) reproduce the issue on your
> > > side?
> > > With
> > > or without KVM support?
> > 
> > It's easy to reproduce by increasing total_bytes:
> Even without "set_nonblock()"? With what is in our tree, we don't do
> that.

Yes, without "set_nonblock". Without making any modifications, it can
be reproduced in our tree. Increasing totol_bytes can make reproduction
easier.

> 
> As I mentioned above, if 'send()' stops before the end with -1 and
> errno
> set to EAGAIN while the socket is supposed to wait for the transfer
> to
> be over (blocking IO), I guess there is a bug somewhere, but not in
> the
> tests, no?

Yes, I think it's a bug in kernel space in wait_for_memory in
mptcp_sendmsg().

Thanks,
-Geliang 

> 
> Cheers,
> Matt
Geliang Tang May 7, 2024, 4:04 a.m. UTC | #10
On Wed, 2024-04-10 at 14:34 -0700, Martin KaFai Lau wrote:
> On 4/9/24 11:13 PM, Geliang Tang wrote:
> > From: Geliang Tang <tanggeliang@kylinos.cn>
> > 
> > Some tests, such as the MPTCP bpf tests, require send_recv_data
> > helper
> > to run in nonblock mode.
> > 
> > This patch adds nonblock support for send_recv_data(). Check if it
> > is
> > currently in nonblock mode, and if so, ignore EWOULDBLOCK to
> > continue
> > sending and receiving.
> > 
> > Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn>
> > ---
> >   tools/testing/selftests/bpf/network_helpers.c | 9 ++++++++-
> >   1 file changed, 8 insertions(+), 1 deletion(-)
> > 
> > diff --git a/tools/testing/selftests/bpf/network_helpers.c
> > b/tools/testing/selftests/bpf/network_helpers.c
> > index 137cd18ef3f2..ca16ef2b648e 100644
> > --- a/tools/testing/selftests/bpf/network_helpers.c
> > +++ b/tools/testing/selftests/bpf/network_helpers.c
> > @@ -555,6 +555,7 @@ struct send_recv_arg {
> >   static void *send_recv_server(void *arg)
> >   {
> >   	struct send_recv_arg *a = (struct send_recv_arg *)arg;
> > +	int flags = fcntl(a->fd, F_GETFL);
> >   	ssize_t nr_sent = 0, bytes = 0;
> >   	char batch[1500];
> >   	int err = 0, fd;
> > @@ -578,6 +579,8 @@ static void *send_recv_server(void *arg)
> >   		if (nr_sent == -1 && errno == EINTR)
> >   			continue;
> >   		if (nr_sent == -1) {
> > +			if (flags & O_NONBLOCK && errno ==
> > EWOULDBLOCK)
> 
> I still don't see why it needs to be a non blocking IO. mptcp should
> work
> with blocking IO also, no? Does it really need non blocking IO to
> make
> mptcp test work? I would rather stay with blocking IO in selftest as
> much as
> possible for simplicity reason.
> 
> I am afraid the root cause of the EAGAIN thread has not been figured
> out yet:
> https://lore.kernel.org/all/b3943f9a8bf595212b00e96ba850bf32893312cc.camel@kernel.org/
> 
> Lets drop patch 3 until it is understood why mptcp needs EAGAIN or
> non-blocking IO.
> It feels like there is some flakiness and it should be understood and
> avoided.

Hi Martin,

I finally found the root cause of this issue. It is indeed an MPTCP
bug. It took me a long time to debug, and the fix is here:

https://patchwork.kernel.org/project/mptcp/patch/0ccc1c26d27d6ee7be22806a97983d37c6ca548c.1715053270.git.tanggeliang@kylinos.cn/

Thank you for insisting on not accepting these work around patches from
me in the user space, almost hiding a kernel bug.

-Geliang

> 
> Other than the comment in patch 2, the first two patches lgtm. Please
> respin with
> the first two patches.
> 
> > +				continue;
> >   			err = -errno;
> >   			break;
> >   		}
> > @@ -599,6 +602,7 @@ static void *send_recv_server(void *arg)
> >   
> >   int send_recv_data(int lfd, int fd, uint32_t total_bytes)
> >   {
> > +	int flags = fcntl(lfd, F_GETFL);
> >   	ssize_t nr_recv = 0, bytes = 0;
> >   	struct send_recv_arg arg = {
> >   		.fd	= lfd,
> > @@ -622,8 +626,11 @@ int send_recv_data(int lfd, int fd, uint32_t
> > total_bytes)
> >   			       MIN(total_bytes - bytes,
> > sizeof(batch)), 0);
> >   		if (nr_recv == -1 && errno == EINTR)
> >   			continue;
> > -		if (nr_recv == -1)
> > +		if (nr_recv == -1) {
> > +			if (flags & O_NONBLOCK && errno ==
> > EWOULDBLOCK)
> > +				continue;
> >   			break;
> > +		}
> >   		bytes += nr_recv;
> >   	}
> >   
> 
>
diff mbox series

Patch

diff --git a/tools/testing/selftests/bpf/network_helpers.c b/tools/testing/selftests/bpf/network_helpers.c
index 137cd18ef3f2..ca16ef2b648e 100644
--- a/tools/testing/selftests/bpf/network_helpers.c
+++ b/tools/testing/selftests/bpf/network_helpers.c
@@ -555,6 +555,7 @@  struct send_recv_arg {
 static void *send_recv_server(void *arg)
 {
 	struct send_recv_arg *a = (struct send_recv_arg *)arg;
+	int flags = fcntl(a->fd, F_GETFL);
 	ssize_t nr_sent = 0, bytes = 0;
 	char batch[1500];
 	int err = 0, fd;
@@ -578,6 +579,8 @@  static void *send_recv_server(void *arg)
 		if (nr_sent == -1 && errno == EINTR)
 			continue;
 		if (nr_sent == -1) {
+			if (flags & O_NONBLOCK && errno == EWOULDBLOCK)
+				continue;
 			err = -errno;
 			break;
 		}
@@ -599,6 +602,7 @@  static void *send_recv_server(void *arg)
 
 int send_recv_data(int lfd, int fd, uint32_t total_bytes)
 {
+	int flags = fcntl(lfd, F_GETFL);
 	ssize_t nr_recv = 0, bytes = 0;
 	struct send_recv_arg arg = {
 		.fd	= lfd,
@@ -622,8 +626,11 @@  int send_recv_data(int lfd, int fd, uint32_t total_bytes)
 			       MIN(total_bytes - bytes, sizeof(batch)), 0);
 		if (nr_recv == -1 && errno == EINTR)
 			continue;
-		if (nr_recv == -1)
+		if (nr_recv == -1) {
+			if (flags & O_NONBLOCK && errno == EWOULDBLOCK)
+				continue;
 			break;
+		}
 		bytes += nr_recv;
 	}