diff mbox series

[v10,04/18] af_vsock: implement SEQPACKET receive loop

Message ID 20210520191611.1271204-1-arseny.krasnov@kaspersky.com (mailing list archive)
State New, archived
Headers show
Series virtio/vsock: introduce SOCK_SEQPACKET support | expand

Commit Message

Arseny Krasnov May 20, 2021, 7:16 p.m. UTC
Add receive loop for SEQPACKET. It looks like receive loop for
STREAM, but there are differences:
1) It doesn't call notify callbacks.
2) It doesn't care about 'SO_SNDLOWAT' and 'SO_RCVLOWAT' values, because
   there is no sense for these values in SEQPACKET case.
3) It waits until whole record is received or error is found during
   receiving.
4) It processes and sets 'MSG_TRUNC' flag.

So to avoid extra conditions for two types of socket inside one loop, two
independent functions were created.

Signed-off-by: Arseny Krasnov <arseny.krasnov@kaspersky.com>
---
 v9 -> v10:
 1) Use 'msg_data_left()' instead of direct access to 'msg_hdr'.

 include/net/af_vsock.h   |  4 +++
 net/vmw_vsock/af_vsock.c | 72 +++++++++++++++++++++++++++++++++++++++-
 2 files changed, 75 insertions(+), 1 deletion(-)

Comments

Stefano Garzarella May 25, 2021, 2:08 p.m. UTC | #1
On Thu, May 20, 2021 at 10:16:08PM +0300, Arseny Krasnov wrote:
>Add receive loop for SEQPACKET. It looks like receive loop for
>STREAM, but there are differences:
>1) It doesn't call notify callbacks.
>2) It doesn't care about 'SO_SNDLOWAT' and 'SO_RCVLOWAT' values, because
>   there is no sense for these values in SEQPACKET case.
>3) It waits until whole record is received or error is found during
>   receiving.
>4) It processes and sets 'MSG_TRUNC' flag.
>
>So to avoid extra conditions for two types of socket inside one loop, two
>independent functions were created.
>
>Signed-off-by: Arseny Krasnov <arseny.krasnov@kaspersky.com>
>---
> v9 -> v10:
> 1) Use 'msg_data_left()' instead of direct access to 'msg_hdr'.
>
> include/net/af_vsock.h   |  4 +++
> net/vmw_vsock/af_vsock.c | 72 +++++++++++++++++++++++++++++++++++++++-
> 2 files changed, 75 insertions(+), 1 deletion(-)

Reviewed-by: Stefano Garzarella <sgarzare@redhat.com>
Stefano Garzarella June 4, 2021, 3:06 p.m. UTC | #2
On Thu, May 20, 2021 at 10:16:08PM +0300, Arseny Krasnov wrote:
>Add receive loop for SEQPACKET. It looks like receive loop for
>STREAM, but there are differences:
>1) It doesn't call notify callbacks.
>2) It doesn't care about 'SO_SNDLOWAT' and 'SO_RCVLOWAT' values, because
>   there is no sense for these values in SEQPACKET case.
>3) It waits until whole record is received or error is found during
>   receiving.
>4) It processes and sets 'MSG_TRUNC' flag.
>
>So to avoid extra conditions for two types of socket inside one loop, two
>independent functions were created.
>
>Signed-off-by: Arseny Krasnov <arseny.krasnov@kaspersky.com>
>---
> v9 -> v10:
> 1) Use 'msg_data_left()' instead of direct access to 'msg_hdr'.
>
> include/net/af_vsock.h   |  4 +++
> net/vmw_vsock/af_vsock.c | 72 +++++++++++++++++++++++++++++++++++++++-
> 2 files changed, 75 insertions(+), 1 deletion(-)
>
>diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h
>index b1c717286993..5175f5a52ce1 100644
>--- a/include/net/af_vsock.h
>+++ b/include/net/af_vsock.h
>@@ -135,6 +135,10 @@ struct vsock_transport {
> 	bool (*stream_is_active)(struct vsock_sock *);
> 	bool (*stream_allow)(u32 cid, u32 port);
>
>+	/* SEQ_PACKET. */
>+	ssize_t (*seqpacket_dequeue)(struct vsock_sock *vsk, struct msghdr *msg,
>+				     int flags, bool *msg_ready);
>+
> 	/* Notification. */
> 	int (*notify_poll_in)(struct vsock_sock *, size_t, bool *);
> 	int (*notify_poll_out)(struct vsock_sock *, size_t, bool *);
>diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
>index c4f6bfa1e381..aede474343d1 100644
>--- a/net/vmw_vsock/af_vsock.c
>+++ b/net/vmw_vsock/af_vsock.c
>@@ -1974,6 +1974,73 @@ static int __vsock_stream_recvmsg(struct sock *sk, struct msghdr *msg,
> 	return err;
> }
>
>+static int __vsock_seqpacket_recvmsg(struct sock *sk, struct msghdr *msg,
>+				     size_t len, int flags)
>+{
>+	const struct vsock_transport *transport;
>+	bool msg_ready;
>+	struct vsock_sock *vsk;
>+	ssize_t record_len;
>+	long timeout;
>+	int err = 0;
>+	DEFINE_WAIT(wait);
>+
>+	vsk = vsock_sk(sk);
>+	transport = vsk->transport;
>+
>+	timeout = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
>+	msg_ready = false;
>+	record_len = 0;
>+
>+	while (1) {
>+		ssize_t fragment_len;
>+
>+		if (vsock_wait_data(sk, &wait, timeout, NULL, 0) <= 0) {
>+			/* In case of any loop break(timeout, signal
>+			 * interrupt or shutdown), we report user that
>+			 * nothing was copied.
>+			 */
>+			err = 0;

Why we report that nothing was copied?

What happen to the bytes already copied in `msg`?


>+			break;
>+		}
>+
>+		fragment_len = transport->seqpacket_dequeue(vsk, msg, flags, &msg_ready);
>+
>+		if (fragment_len < 0) {
>+			err = -ENOMEM;
>+			break;
>+		}
>+
>+		record_len += fragment_len;
>+
>+		if (msg_ready)
>+			break;
>+	}
>+
>+	if (sk->sk_err)
>+		err = -sk->sk_err;
>+	else if (sk->sk_shutdown & RCV_SHUTDOWN)
>+		err = 0;
>+
>+	if (msg_ready && err == 0) {
>+		/* User sets MSG_TRUNC, so return real length of
>+		 * packet.
>+		 */
>+		if (flags & MSG_TRUNC)
>+			err = record_len;
>+		else
>+			err = len - msg_data_left(msg);
>+
>+		/* Always set MSG_TRUNC if real length of packet is
>+		 * bigger than user's buffer.
>+		 */
>+		if (record_len > len)
>+			msg->msg_flags |= MSG_TRUNC;
>+	}
>+
>+	return err;
>+}
>+
> static int
> vsock_connectible_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
> 			  int flags)
>@@ -2029,7 +2096,10 @@ vsock_connectible_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
> 		goto out;
> 	}
>
>-	err = __vsock_stream_recvmsg(sk, msg, len, flags);
>+	if (sk->sk_type == SOCK_STREAM)
>+		err = __vsock_stream_recvmsg(sk, msg, len, flags);
>+	else
>+		err = __vsock_seqpacket_recvmsg(sk, msg, len, flags);
>
> out:
> 	release_sock(sk);
>-- 
>2.25.1
>
Arseny Krasnov June 4, 2021, 6 p.m. UTC | #3
On 04.06.2021 18:06, Stefano Garzarella wrote:
> On Thu, May 20, 2021 at 10:16:08PM +0300, Arseny Krasnov wrote:
>> Add receive loop for SEQPACKET. It looks like receive loop for
>> STREAM, but there are differences:
>> 1) It doesn't call notify callbacks.
>> 2) It doesn't care about 'SO_SNDLOWAT' and 'SO_RCVLOWAT' values, because
>>   there is no sense for these values in SEQPACKET case.
>> 3) It waits until whole record is received or error is found during
>>   receiving.
>> 4) It processes and sets 'MSG_TRUNC' flag.
>>
>> So to avoid extra conditions for two types of socket inside one loop, two
>> independent functions were created.
>>
>> Signed-off-by: Arseny Krasnov <arseny.krasnov@kaspersky.com>
>> ---
>> v9 -> v10:
>> 1) Use 'msg_data_left()' instead of direct access to 'msg_hdr'.
>>
>> include/net/af_vsock.h   |  4 +++
>> net/vmw_vsock/af_vsock.c | 72 +++++++++++++++++++++++++++++++++++++++-
>> 2 files changed, 75 insertions(+), 1 deletion(-)
>>
>> diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h
>> index b1c717286993..5175f5a52ce1 100644
>> --- a/include/net/af_vsock.h
>> +++ b/include/net/af_vsock.h
>> @@ -135,6 +135,10 @@ struct vsock_transport {
>> 	bool (*stream_is_active)(struct vsock_sock *);
>> 	bool (*stream_allow)(u32 cid, u32 port);
>>
>> +	/* SEQ_PACKET. */
>> +	ssize_t (*seqpacket_dequeue)(struct vsock_sock *vsk, struct msghdr *msg,
>> +				     int flags, bool *msg_ready);
>> +
>> 	/* Notification. */
>> 	int (*notify_poll_in)(struct vsock_sock *, size_t, bool *);
>> 	int (*notify_poll_out)(struct vsock_sock *, size_t, bool *);
>> diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
>> index c4f6bfa1e381..aede474343d1 100644
>> --- a/net/vmw_vsock/af_vsock.c
>> +++ b/net/vmw_vsock/af_vsock.c
>> @@ -1974,6 +1974,73 @@ static int __vsock_stream_recvmsg(struct sock *sk, struct msghdr *msg,
>> 	return err;
>> }
>>
>> +static int __vsock_seqpacket_recvmsg(struct sock *sk, struct msghdr *msg,
>> +				     size_t len, int flags)
>> +{
>> +	const struct vsock_transport *transport;
>> +	bool msg_ready;
>> +	struct vsock_sock *vsk;
>> +	ssize_t record_len;
>> +	long timeout;
>> +	int err = 0;
>> +	DEFINE_WAIT(wait);
>> +
>> +	vsk = vsock_sk(sk);
>> +	transport = vsk->transport;
>> +
>> +	timeout = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
>> +	msg_ready = false;
>> +	record_len = 0;
>> +
>> +	while (1) {
>> +		ssize_t fragment_len;
>> +
>> +		if (vsock_wait_data(sk, &wait, timeout, NULL, 0) <= 0) {
>> +			/* In case of any loop break(timeout, signal
>> +			 * interrupt or shutdown), we report user that
>> +			 * nothing was copied.
>> +			 */
>> +			err = 0;
> Why we report that nothing was copied?
>
> What happen to the bytes already copied in `msg`?
Seems i need to return result of vsock_wait_data()...
>
>
>> +			break;
>> +		}
>> +
>> +		fragment_len = transport->seqpacket_dequeue(vsk, msg, flags, &msg_ready);
>> +
>> +		if (fragment_len < 0) {
>> +			err = -ENOMEM;
>> +			break;
>> +		}
>> +
>> +		record_len += fragment_len;
>> +
>> +		if (msg_ready)
>> +			break;
>> +	}
>> +
>> +	if (sk->sk_err)
>> +		err = -sk->sk_err;
>> +	else if (sk->sk_shutdown & RCV_SHUTDOWN)
>> +		err = 0;
>> +
>> +	if (msg_ready && err == 0) {
>> +		/* User sets MSG_TRUNC, so return real length of
>> +		 * packet.
>> +		 */
>> +		if (flags & MSG_TRUNC)
>> +			err = record_len;
>> +		else
>> +			err = len - msg_data_left(msg);
>> +
>> +		/* Always set MSG_TRUNC if real length of packet is
>> +		 * bigger than user's buffer.
>> +		 */
>> +		if (record_len > len)
>> +			msg->msg_flags |= MSG_TRUNC;
>> +	}
>> +
>> +	return err;
>> +}
>> +
>> static int
>> vsock_connectible_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
>> 			  int flags)
>> @@ -2029,7 +2096,10 @@ vsock_connectible_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
>> 		goto out;
>> 	}
>>
>> -	err = __vsock_stream_recvmsg(sk, msg, len, flags);
>> +	if (sk->sk_type == SOCK_STREAM)
>> +		err = __vsock_stream_recvmsg(sk, msg, len, flags);
>> +	else
>> +		err = __vsock_seqpacket_recvmsg(sk, msg, len, flags);
>>
>> out:
>> 	release_sock(sk);
>> -- 
>> 2.25.1
>>
>
Stefano Garzarella June 7, 2021, 10:48 a.m. UTC | #4
On Fri, Jun 04, 2021 at 09:00:14PM +0300, Arseny Krasnov wrote:
>
>On 04.06.2021 18:06, Stefano Garzarella wrote:
>> On Thu, May 20, 2021 at 10:16:08PM +0300, Arseny Krasnov wrote:
>>> Add receive loop for SEQPACKET. It looks like receive loop for
>>> STREAM, but there are differences:
>>> 1) It doesn't call notify callbacks.
>>> 2) It doesn't care about 'SO_SNDLOWAT' and 'SO_RCVLOWAT' values, because
>>>   there is no sense for these values in SEQPACKET case.
>>> 3) It waits until whole record is received or error is found during
>>>   receiving.
>>> 4) It processes and sets 'MSG_TRUNC' flag.
>>>
>>> So to avoid extra conditions for two types of socket inside one loop, two
>>> independent functions were created.
>>>
>>> Signed-off-by: Arseny Krasnov <arseny.krasnov@kaspersky.com>
>>> ---
>>> v9 -> v10:
>>> 1) Use 'msg_data_left()' instead of direct access to 'msg_hdr'.
>>>
>>> include/net/af_vsock.h   |  4 +++
>>> net/vmw_vsock/af_vsock.c | 72 +++++++++++++++++++++++++++++++++++++++-
>>> 2 files changed, 75 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h
>>> index b1c717286993..5175f5a52ce1 100644
>>> --- a/include/net/af_vsock.h
>>> +++ b/include/net/af_vsock.h
>>> @@ -135,6 +135,10 @@ struct vsock_transport {
>>> 	bool (*stream_is_active)(struct vsock_sock *);
>>> 	bool (*stream_allow)(u32 cid, u32 port);
>>>
>>> +	/* SEQ_PACKET. */
>>> +	ssize_t (*seqpacket_dequeue)(struct vsock_sock *vsk, struct msghdr *msg,
>>> +				     int flags, bool *msg_ready);
>>> +
>>> 	/* Notification. */
>>> 	int (*notify_poll_in)(struct vsock_sock *, size_t, bool *);
>>> 	int (*notify_poll_out)(struct vsock_sock *, size_t, bool *);
>>> diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
>>> index c4f6bfa1e381..aede474343d1 100644
>>> --- a/net/vmw_vsock/af_vsock.c
>>> +++ b/net/vmw_vsock/af_vsock.c
>>> @@ -1974,6 +1974,73 @@ static int __vsock_stream_recvmsg(struct sock *sk, struct msghdr *msg,
>>> 	return err;
>>> }
>>>
>>> +static int __vsock_seqpacket_recvmsg(struct sock *sk, struct msghdr *msg,
>>> +				     size_t len, int flags)
>>> +{
>>> +	const struct vsock_transport *transport;
>>> +	bool msg_ready;
>>> +	struct vsock_sock *vsk;
>>> +	ssize_t record_len;
>>> +	long timeout;
>>> +	int err = 0;
>>> +	DEFINE_WAIT(wait);
>>> +
>>> +	vsk = vsock_sk(sk);
>>> +	transport = vsk->transport;
>>> +
>>> +	timeout = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
>>> +	msg_ready = false;
>>> +	record_len = 0;
>>> +
>>> +	while (1) {
>>> +		ssize_t fragment_len;
>>> +
>>> +		if (vsock_wait_data(sk, &wait, timeout, NULL, 0) <= 0) {
>>> +			/* In case of any loop break(timeout, signal
>>> +			 * interrupt or shutdown), we report user that
>>> +			 * nothing was copied.
>>> +			 */
>>> +			err = 0;
>> Why we report that nothing was copied?
>>
>> What happen to the bytes already copied in `msg`?
>Seems i need to return result of vsock_wait_data()...

I'm not sure.

My biggest concern is if we reach timeout or get a signal while waiting 
for the other pieces of a message.
I believe that we should not start copying a message if we have not 
received all the fragments. Otherwise we have this problem.

When we are sure that we have all the pieces, then we should copy them 
without interrupting.

IIRC this was done in previous versions.

Stefano
Arseny Krasnov June 7, 2021, 11:29 a.m. UTC | #5
On 07.06.2021 13:48, Stefano Garzarella wrote:
> On Fri, Jun 04, 2021 at 09:00:14PM +0300, Arseny Krasnov wrote:
>> On 04.06.2021 18:06, Stefano Garzarella wrote:
>>> On Thu, May 20, 2021 at 10:16:08PM +0300, Arseny Krasnov wrote:
>>>> Add receive loop for SEQPACKET. It looks like receive loop for
>>>> STREAM, but there are differences:
>>>> 1) It doesn't call notify callbacks.
>>>> 2) It doesn't care about 'SO_SNDLOWAT' and 'SO_RCVLOWAT' values, because
>>>>   there is no sense for these values in SEQPACKET case.
>>>> 3) It waits until whole record is received or error is found during
>>>>   receiving.
>>>> 4) It processes and sets 'MSG_TRUNC' flag.
>>>>
>>>> So to avoid extra conditions for two types of socket inside one loop, two
>>>> independent functions were created.
>>>>
>>>> Signed-off-by: Arseny Krasnov <arseny.krasnov@kaspersky.com>
>>>> ---
>>>> v9 -> v10:
>>>> 1) Use 'msg_data_left()' instead of direct access to 'msg_hdr'.
>>>>
>>>> include/net/af_vsock.h   |  4 +++
>>>> net/vmw_vsock/af_vsock.c | 72 +++++++++++++++++++++++++++++++++++++++-
>>>> 2 files changed, 75 insertions(+), 1 deletion(-)
>>>>
>>>> diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h
>>>> index b1c717286993..5175f5a52ce1 100644
>>>> --- a/include/net/af_vsock.h
>>>> +++ b/include/net/af_vsock.h
>>>> @@ -135,6 +135,10 @@ struct vsock_transport {
>>>> 	bool (*stream_is_active)(struct vsock_sock *);
>>>> 	bool (*stream_allow)(u32 cid, u32 port);
>>>>
>>>> +	/* SEQ_PACKET. */
>>>> +	ssize_t (*seqpacket_dequeue)(struct vsock_sock *vsk, struct msghdr *msg,
>>>> +				     int flags, bool *msg_ready);
>>>> +
>>>> 	/* Notification. */
>>>> 	int (*notify_poll_in)(struct vsock_sock *, size_t, bool *);
>>>> 	int (*notify_poll_out)(struct vsock_sock *, size_t, bool *);
>>>> diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
>>>> index c4f6bfa1e381..aede474343d1 100644
>>>> --- a/net/vmw_vsock/af_vsock.c
>>>> +++ b/net/vmw_vsock/af_vsock.c
>>>> @@ -1974,6 +1974,73 @@ static int __vsock_stream_recvmsg(struct sock *sk, struct msghdr *msg,
>>>> 	return err;
>>>> }
>>>>
>>>> +static int __vsock_seqpacket_recvmsg(struct sock *sk, struct msghdr *msg,
>>>> +				     size_t len, int flags)
>>>> +{
>>>> +	const struct vsock_transport *transport;
>>>> +	bool msg_ready;
>>>> +	struct vsock_sock *vsk;
>>>> +	ssize_t record_len;
>>>> +	long timeout;
>>>> +	int err = 0;
>>>> +	DEFINE_WAIT(wait);
>>>> +
>>>> +	vsk = vsock_sk(sk);
>>>> +	transport = vsk->transport;
>>>> +
>>>> +	timeout = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
>>>> +	msg_ready = false;
>>>> +	record_len = 0;
>>>> +
>>>> +	while (1) {
>>>> +		ssize_t fragment_len;
>>>> +
>>>> +		if (vsock_wait_data(sk, &wait, timeout, NULL, 0) <= 0) {
>>>> +			/* In case of any loop break(timeout, signal
>>>> +			 * interrupt or shutdown), we report user that
>>>> +			 * nothing was copied.
>>>> +			 */
>>>> +			err = 0;
>>> Why we report that nothing was copied?
>>>
>>> What happen to the bytes already copied in `msg`?
>> Seems i need to return result of vsock_wait_data()...
> I'm not sure.
>
> My biggest concern is if we reach timeout or get a signal while waiting 
> for the other pieces of a message.
> I believe that we should not start copying a message if we have not 
> received all the fragments. Otherwise we have this problem.
>
> When we are sure that we have all the pieces, then we should copy them 
> without interrupting.
>
> IIRC this was done in previous versions.

As i remember, previous versions also returned 0, because i thought,

that for interrupted read we can copy piece of data to user's buffer,

but we must return that nothing copied or error. In this way user

won't read part of message, because syscall returned that there is

nothing to copy. So as i understand, it is not enough - user's buffer

must be touched only when whole message is copied?

>
> Stefano
>
>
Stefano Garzarella June 7, 2021, 12:28 p.m. UTC | #6
On Mon, Jun 07, 2021 at 02:29:28PM +0300, Arseny Krasnov wrote:
>
>On 07.06.2021 13:48, Stefano Garzarella wrote:
>> On Fri, Jun 04, 2021 at 09:00:14PM +0300, Arseny Krasnov wrote:
>>> On 04.06.2021 18:06, Stefano Garzarella wrote:
>>>> On Thu, May 20, 2021 at 10:16:08PM +0300, Arseny Krasnov wrote:
>>>>> Add receive loop for SEQPACKET. It looks like receive loop for
>>>>> STREAM, but there are differences:
>>>>> 1) It doesn't call notify callbacks.
>>>>> 2) It doesn't care about 'SO_SNDLOWAT' and 'SO_RCVLOWAT' values, because
>>>>>   there is no sense for these values in SEQPACKET case.
>>>>> 3) It waits until whole record is received or error is found during
>>>>>   receiving.
>>>>> 4) It processes and sets 'MSG_TRUNC' flag.
>>>>>
>>>>> So to avoid extra conditions for two types of socket inside one loop, two
>>>>> independent functions were created.
>>>>>
>>>>> Signed-off-by: Arseny Krasnov <arseny.krasnov@kaspersky.com>
>>>>> ---
>>>>> v9 -> v10:
>>>>> 1) Use 'msg_data_left()' instead of direct access to 'msg_hdr'.
>>>>>
>>>>> include/net/af_vsock.h   |  4 +++
>>>>> net/vmw_vsock/af_vsock.c | 72 +++++++++++++++++++++++++++++++++++++++-
>>>>> 2 files changed, 75 insertions(+), 1 deletion(-)
>>>>>
>>>>> diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h
>>>>> index b1c717286993..5175f5a52ce1 100644
>>>>> --- a/include/net/af_vsock.h
>>>>> +++ b/include/net/af_vsock.h
>>>>> @@ -135,6 +135,10 @@ struct vsock_transport {
>>>>> 	bool (*stream_is_active)(struct vsock_sock *);
>>>>> 	bool (*stream_allow)(u32 cid, u32 port);
>>>>>
>>>>> +	/* SEQ_PACKET. */
>>>>> +	ssize_t (*seqpacket_dequeue)(struct vsock_sock *vsk, struct msghdr *msg,
>>>>> +				     int flags, bool *msg_ready);
>>>>> +
>>>>> 	/* Notification. */
>>>>> 	int (*notify_poll_in)(struct vsock_sock *, size_t, bool *);
>>>>> 	int (*notify_poll_out)(struct vsock_sock *, size_t, bool *);
>>>>> diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
>>>>> index c4f6bfa1e381..aede474343d1 100644
>>>>> --- a/net/vmw_vsock/af_vsock.c
>>>>> +++ b/net/vmw_vsock/af_vsock.c
>>>>> @@ -1974,6 +1974,73 @@ static int __vsock_stream_recvmsg(struct sock *sk, struct msghdr *msg,
>>>>> 	return err;
>>>>> }
>>>>>
>>>>> +static int __vsock_seqpacket_recvmsg(struct sock *sk, struct msghdr *msg,
>>>>> +				     size_t len, int flags)
>>>>> +{
>>>>> +	const struct vsock_transport *transport;
>>>>> +	bool msg_ready;
>>>>> +	struct vsock_sock *vsk;
>>>>> +	ssize_t record_len;
>>>>> +	long timeout;
>>>>> +	int err = 0;
>>>>> +	DEFINE_WAIT(wait);
>>>>> +
>>>>> +	vsk = vsock_sk(sk);
>>>>> +	transport = vsk->transport;
>>>>> +
>>>>> +	timeout = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
>>>>> +	msg_ready = false;
>>>>> +	record_len = 0;
>>>>> +
>>>>> +	while (1) {
>>>>> +		ssize_t fragment_len;
>>>>> +
>>>>> +		if (vsock_wait_data(sk, &wait, timeout, NULL, 0) <= 0) {
>>>>> +			/* In case of any loop break(timeout, signal
>>>>> +			 * interrupt or shutdown), we report user that
>>>>> +			 * nothing was copied.
>>>>> +			 */
>>>>> +			err = 0;
>>>> Why we report that nothing was copied?
>>>>
>>>> What happen to the bytes already copied in `msg`?
>>> Seems i need to return result of vsock_wait_data()...
>> I'm not sure.
>>
>> My biggest concern is if we reach timeout or get a signal while waiting
>> for the other pieces of a message.
>> I believe that we should not start copying a message if we have not
>> received all the fragments. Otherwise we have this problem.
>>
>> When we are sure that we have all the pieces, then we should copy them
>> without interrupting.
>>
>> IIRC this was done in previous versions.
>
>As i remember, previous versions also returned 0, because i thought,
>that for interrupted read we can copy piece of data to user's buffer,
>but we must return that nothing copied or error. In this way user

This can also be fine, but we should remove packet form the rx_queue 
only when we are sure that we delivered the entire message.

>
>won't read part of message, because syscall returned that there is
>nothing to copy. So as i understand, it is not enough - user's buffer
>must be touched only when whole message is copied?

The important thing is to not remove packets from the rx_queue unless we 
are sure that everything went well and we are returning the entire 
message to the user.

Stefano
diff mbox series

Patch

diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h
index b1c717286993..5175f5a52ce1 100644
--- a/include/net/af_vsock.h
+++ b/include/net/af_vsock.h
@@ -135,6 +135,10 @@  struct vsock_transport {
 	bool (*stream_is_active)(struct vsock_sock *);
 	bool (*stream_allow)(u32 cid, u32 port);
 
+	/* SEQ_PACKET. */
+	ssize_t (*seqpacket_dequeue)(struct vsock_sock *vsk, struct msghdr *msg,
+				     int flags, bool *msg_ready);
+
 	/* Notification. */
 	int (*notify_poll_in)(struct vsock_sock *, size_t, bool *);
 	int (*notify_poll_out)(struct vsock_sock *, size_t, bool *);
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index c4f6bfa1e381..aede474343d1 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -1974,6 +1974,73 @@  static int __vsock_stream_recvmsg(struct sock *sk, struct msghdr *msg,
 	return err;
 }
 
+static int __vsock_seqpacket_recvmsg(struct sock *sk, struct msghdr *msg,
+				     size_t len, int flags)
+{
+	const struct vsock_transport *transport;
+	bool msg_ready;
+	struct vsock_sock *vsk;
+	ssize_t record_len;
+	long timeout;
+	int err = 0;
+	DEFINE_WAIT(wait);
+
+	vsk = vsock_sk(sk);
+	transport = vsk->transport;
+
+	timeout = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
+	msg_ready = false;
+	record_len = 0;
+
+	while (1) {
+		ssize_t fragment_len;
+
+		if (vsock_wait_data(sk, &wait, timeout, NULL, 0) <= 0) {
+			/* In case of any loop break(timeout, signal
+			 * interrupt or shutdown), we report user that
+			 * nothing was copied.
+			 */
+			err = 0;
+			break;
+		}
+
+		fragment_len = transport->seqpacket_dequeue(vsk, msg, flags, &msg_ready);
+
+		if (fragment_len < 0) {
+			err = -ENOMEM;
+			break;
+		}
+
+		record_len += fragment_len;
+
+		if (msg_ready)
+			break;
+	}
+
+	if (sk->sk_err)
+		err = -sk->sk_err;
+	else if (sk->sk_shutdown & RCV_SHUTDOWN)
+		err = 0;
+
+	if (msg_ready && err == 0) {
+		/* User sets MSG_TRUNC, so return real length of
+		 * packet.
+		 */
+		if (flags & MSG_TRUNC)
+			err = record_len;
+		else
+			err = len - msg_data_left(msg);
+
+		/* Always set MSG_TRUNC if real length of packet is
+		 * bigger than user's buffer.
+		 */
+		if (record_len > len)
+			msg->msg_flags |= MSG_TRUNC;
+	}
+
+	return err;
+}
+
 static int
 vsock_connectible_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
 			  int flags)
@@ -2029,7 +2096,10 @@  vsock_connectible_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
 		goto out;
 	}
 
-	err = __vsock_stream_recvmsg(sk, msg, len, flags);
+	if (sk->sk_type == SOCK_STREAM)
+		err = __vsock_stream_recvmsg(sk, msg, len, flags);
+	else
+		err = __vsock_seqpacket_recvmsg(sk, msg, len, flags);
 
 out:
 	release_sock(sk);