diff mbox series

[1/1] io_uring/net: send retry for zerocopy

Message ID b876a4838597d9bba4f3215db60d72c33c448ad0.1659622472.git.asml.silence@gmail.com (mailing list archive)
State New
Headers show
Series [1/1] io_uring/net: send retry for zerocopy | expand

Commit Message

Pavel Begunkov Aug. 4, 2022, 2:15 p.m. UTC
io_uring handles short sends/recvs for stream sockets when MSG_WAITALL
is set, however new zerocopy send is inconsistent in this regard, which
might be confusing. Handle short sends.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
---
 io_uring/net.c | 20 +++++++++++++++++---
 1 file changed, 17 insertions(+), 3 deletions(-)

Comments

Jens Axboe Aug. 4, 2022, 2:53 p.m. UTC | #1
On Thu, 4 Aug 2022 15:15:30 +0100, Pavel Begunkov wrote:
> io_uring handles short sends/recvs for stream sockets when MSG_WAITALL
> is set, however new zerocopy send is inconsistent in this regard, which
> might be confusing. Handle short sends.
> 
> 

Applied, thanks!

[1/1] io_uring/net: send retry for zerocopy
      commit: 4a933e62083ead6cd064293a7505c56165859320

Best regards,
Stefan Metzmacher Aug. 14, 2022, 9:31 a.m. UTC | #2
Hi Pavel,

> io_uring handles short sends/recvs for stream sockets when MSG_WAITALL
> is set, however new zerocopy send is inconsistent in this regard, which
> might be confusing. Handle short sends.
> 
> Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
> ---
>   io_uring/net.c | 20 +++++++++++++++++---
>   1 file changed, 17 insertions(+), 3 deletions(-)
> 
> diff --git a/io_uring/net.c b/io_uring/net.c
> index 32fc3da04e41..f9f080b3cc1e 100644
> --- a/io_uring/net.c
> +++ b/io_uring/net.c
> @@ -70,6 +70,7 @@ struct io_sendzc {
>   	unsigned			flags;
>   	unsigned			addr_len;
>   	void __user			*addr;
> +	size_t				done_io;
>   };
>   
>   #define IO_APOLL_MULTI_POLLED (REQ_F_APOLL_MULTISHOT | REQ_F_POLLED)
> @@ -878,6 +879,7 @@ int io_sendzc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
>   
>   	zc->addr = u64_to_user_ptr(READ_ONCE(sqe->addr2));
>   	zc->addr_len = READ_ONCE(sqe->addr_len);
> +	zc->done_io = 0;
>   
>   #ifdef CONFIG_COMPAT
>   	if (req->ctx->compat)
> @@ -1012,11 +1014,23 @@ int io_sendzc(struct io_kiocb *req, unsigned int issue_flags)
>   	if (unlikely(ret < min_ret)) {
>   		if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK))
>   			return -EAGAIN;
> -		return ret == -ERESTARTSYS ? -EINTR : ret;
> +		if (ret > 0 && io_net_retry(sock, msg.msg_flags)) {
> +			zc->len -= ret;
> +			zc->buf += ret;
> +			zc->done_io += ret;
> +			req->flags |= REQ_F_PARTIAL_IO;

Don't we need a prep_async function and/or something like
io_setup_async_msg() here to handle address?

metze
Jens Axboe Aug. 14, 2022, 2:05 p.m. UTC | #3
On 8/14/22 3:31 AM, Stefan Metzmacher wrote:
> Hi Pavel,
> 
>> io_uring handles short sends/recvs for stream sockets when MSG_WAITALL
>> is set, however new zerocopy send is inconsistent in this regard, which
>> might be confusing. Handle short sends.
>>
>> Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
>> ---
>>   io_uring/net.c | 20 +++++++++++++++++---
>>   1 file changed, 17 insertions(+), 3 deletions(-)
>>
>> diff --git a/io_uring/net.c b/io_uring/net.c
>> index 32fc3da04e41..f9f080b3cc1e 100644
>> --- a/io_uring/net.c
>> +++ b/io_uring/net.c
>> @@ -70,6 +70,7 @@ struct io_sendzc {
>>       unsigned            flags;
>>       unsigned            addr_len;
>>       void __user            *addr;
>> +    size_t                done_io;
>>   };
>>     #define IO_APOLL_MULTI_POLLED (REQ_F_APOLL_MULTISHOT | REQ_F_POLLED)
>> @@ -878,6 +879,7 @@ int io_sendzc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
>>         zc->addr = u64_to_user_ptr(READ_ONCE(sqe->addr2));
>>       zc->addr_len = READ_ONCE(sqe->addr_len);
>> +    zc->done_io = 0;
>>     #ifdef CONFIG_COMPAT
>>       if (req->ctx->compat)
>> @@ -1012,11 +1014,23 @@ int io_sendzc(struct io_kiocb *req, unsigned int issue_flags)
>>       if (unlikely(ret < min_ret)) {
>>           if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK))
>>               return -EAGAIN;
>> -        return ret == -ERESTARTSYS ? -EINTR : ret;
>> +        if (ret > 0 && io_net_retry(sock, msg.msg_flags)) {
>> +            zc->len -= ret;
>> +            zc->buf += ret;
>> +            zc->done_io += ret;
>> +            req->flags |= REQ_F_PARTIAL_IO;
> 
> Don't we need a prep_async function and/or something like
> io_setup_async_msg() here to handle address?

I don't think so, it's a non-vectored interface, so all the state is
already in io_sendzc.
Stefan Metzmacher Aug. 14, 2022, 2:11 p.m. UTC | #4
Hi Jens,

>>> io_uring handles short sends/recvs for stream sockets when MSG_WAITALL
>>> is set, however new zerocopy send is inconsistent in this regard, which
>>> might be confusing. Handle short sends.
>>>
>>> Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
>>> ---
>>>    io_uring/net.c | 20 +++++++++++++++++---
>>>    1 file changed, 17 insertions(+), 3 deletions(-)
>>>
>>> diff --git a/io_uring/net.c b/io_uring/net.c
>>> index 32fc3da04e41..f9f080b3cc1e 100644
>>> --- a/io_uring/net.c
>>> +++ b/io_uring/net.c
>>> @@ -70,6 +70,7 @@ struct io_sendzc {
>>>        unsigned            flags;
>>>        unsigned            addr_len;
>>>        void __user            *addr;
>>> +    size_t                done_io;
>>>    };
>>>      #define IO_APOLL_MULTI_POLLED (REQ_F_APOLL_MULTISHOT | REQ_F_POLLED)
>>> @@ -878,6 +879,7 @@ int io_sendzc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
>>>          zc->addr = u64_to_user_ptr(READ_ONCE(sqe->addr2));
>>>        zc->addr_len = READ_ONCE(sqe->addr_len);
>>> +    zc->done_io = 0;
>>>      #ifdef CONFIG_COMPAT
>>>        if (req->ctx->compat)
>>> @@ -1012,11 +1014,23 @@ int io_sendzc(struct io_kiocb *req, unsigned int issue_flags)
>>>        if (unlikely(ret < min_ret)) {
>>>            if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK))
>>>                return -EAGAIN;
>>> -        return ret == -ERESTARTSYS ? -EINTR : ret;
>>> +        if (ret > 0 && io_net_retry(sock, msg.msg_flags)) {
>>> +            zc->len -= ret;
>>> +            zc->buf += ret;
>>> +            zc->done_io += ret;
>>> +            req->flags |= REQ_F_PARTIAL_IO;
>>
>> Don't we need a prep_async function and/or something like
>> io_setup_async_msg() here to handle address?
> 
> I don't think so, it's a non-vectored interface, so all the state is
> already in io_sendzc.

This has support for sockaddr address compared to io_send(),
if the caller need to keep io_sendzc->addr valid until the qce arrived,
then we need to clearly document that, as that doesn't match the common practice
of other opcodes. Currently everything but data buffers can go after the sqe is
submitted.

metze
Jens Axboe Aug. 14, 2022, 2:13 p.m. UTC | #5
On 8/14/22 8:11 AM, Stefan Metzmacher wrote:
> Hi Jens,
> 
>>>> io_uring handles short sends/recvs for stream sockets when MSG_WAITALL
>>>> is set, however new zerocopy send is inconsistent in this regard, which
>>>> might be confusing. Handle short sends.
>>>>
>>>> Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
>>>> ---
>>>>    io_uring/net.c | 20 +++++++++++++++++---
>>>>    1 file changed, 17 insertions(+), 3 deletions(-)
>>>>
>>>> diff --git a/io_uring/net.c b/io_uring/net.c
>>>> index 32fc3da04e41..f9f080b3cc1e 100644
>>>> --- a/io_uring/net.c
>>>> +++ b/io_uring/net.c
>>>> @@ -70,6 +70,7 @@ struct io_sendzc {
>>>>        unsigned            flags;
>>>>        unsigned            addr_len;
>>>>        void __user            *addr;
>>>> +    size_t                done_io;
>>>>    };
>>>>      #define IO_APOLL_MULTI_POLLED (REQ_F_APOLL_MULTISHOT | REQ_F_POLLED)
>>>> @@ -878,6 +879,7 @@ int io_sendzc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
>>>>          zc->addr = u64_to_user_ptr(READ_ONCE(sqe->addr2));
>>>>        zc->addr_len = READ_ONCE(sqe->addr_len);
>>>> +    zc->done_io = 0;
>>>>      #ifdef CONFIG_COMPAT
>>>>        if (req->ctx->compat)
>>>> @@ -1012,11 +1014,23 @@ int io_sendzc(struct io_kiocb *req, unsigned int issue_flags)
>>>>        if (unlikely(ret < min_ret)) {
>>>>            if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK))
>>>>                return -EAGAIN;
>>>> -        return ret == -ERESTARTSYS ? -EINTR : ret;
>>>> +        if (ret > 0 && io_net_retry(sock, msg.msg_flags)) {
>>>> +            zc->len -= ret;
>>>> +            zc->buf += ret;
>>>> +            zc->done_io += ret;
>>>> +            req->flags |= REQ_F_PARTIAL_IO;
>>>
>>> Don't we need a prep_async function and/or something like
>>> io_setup_async_msg() here to handle address?
>>
>> I don't think so, it's a non-vectored interface, so all the state is
>> already in io_sendzc.
> 
> This has support for sockaddr address compared to io_send(),
> if the caller need to keep io_sendzc->addr valid until the qce arrived,
> then we need to clearly document that, as that doesn't match the common practice
> of other opcodes. Currently everything but data buffers can go after the sqe is
> submitted.

Good point, it's not just the 'from' address. Pavel?
Stefan Metzmacher Aug. 14, 2022, 2:48 p.m. UTC | #6
Am 14.08.22 um 16:13 schrieb Jens Axboe:
> On 8/14/22 8:11 AM, Stefan Metzmacher wrote:
>> Hi Jens,
>>
>>>>> io_uring handles short sends/recvs for stream sockets when MSG_WAITALL
>>>>> is set, however new zerocopy send is inconsistent in this regard, which
>>>>> might be confusing. Handle short sends.
>>>>>
>>>>> Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
>>>>> ---
>>>>>     io_uring/net.c | 20 +++++++++++++++++---
>>>>>     1 file changed, 17 insertions(+), 3 deletions(-)
>>>>>
>>>>> diff --git a/io_uring/net.c b/io_uring/net.c
>>>>> index 32fc3da04e41..f9f080b3cc1e 100644
>>>>> --- a/io_uring/net.c
>>>>> +++ b/io_uring/net.c
>>>>> @@ -70,6 +70,7 @@ struct io_sendzc {
>>>>>         unsigned            flags;
>>>>>         unsigned            addr_len;
>>>>>         void __user            *addr;
>>>>> +    size_t                done_io;
>>>>>     };
>>>>>       #define IO_APOLL_MULTI_POLLED (REQ_F_APOLL_MULTISHOT | REQ_F_POLLED)
>>>>> @@ -878,6 +879,7 @@ int io_sendzc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
>>>>>           zc->addr = u64_to_user_ptr(READ_ONCE(sqe->addr2));
>>>>>         zc->addr_len = READ_ONCE(sqe->addr_len);
>>>>> +    zc->done_io = 0;
>>>>>       #ifdef CONFIG_COMPAT
>>>>>         if (req->ctx->compat)
>>>>> @@ -1012,11 +1014,23 @@ int io_sendzc(struct io_kiocb *req, unsigned int issue_flags)
>>>>>         if (unlikely(ret < min_ret)) {
>>>>>             if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK))
>>>>>                 return -EAGAIN;
>>>>> -        return ret == -ERESTARTSYS ? -EINTR : ret;
>>>>> +        if (ret > 0 && io_net_retry(sock, msg.msg_flags)) {
>>>>> +            zc->len -= ret;
>>>>> +            zc->buf += ret;
>>>>> +            zc->done_io += ret;
>>>>> +            req->flags |= REQ_F_PARTIAL_IO;
>>>>
>>>> Don't we need a prep_async function and/or something like
>>>> io_setup_async_msg() here to handle address?
>>>
>>> I don't think so, it's a non-vectored interface, so all the state is
>>> already in io_sendzc.
>>
>> This has support for sockaddr address compared to io_send(),
>> if the caller need to keep io_sendzc->addr valid until the qce arrived,
>> then we need to clearly document that, as that doesn't match the common practice
>> of other opcodes. Currently everything but data buffers can go after the sqe is
>> submitted.
> 
> Good point, it's not just the 'from' address. Pavel?

It's basically dest_addr from:

        ssize_t sendto(int sockfd, const void *buf, size_t len, int flags,
                       const struct sockaddr *dest_addr, socklen_t addrlen);

It's not used in most cases, but for non-connected udp sockets you need it.

Maybe the fixed io_op_def.async_size could be changed to something that only
allocated the async data if needed. Maybe the prep_async() hook could to the allocation
itself if needed.

metze
Pavel Begunkov Aug. 15, 2022, 9:14 a.m. UTC | #7
On 8/14/22 15:48, Stefan Metzmacher wrote:
> Am 14.08.22 um 16:13 schrieb Jens Axboe:
>> On 8/14/22 8:11 AM, Stefan Metzmacher wrote:
>>>>> Don't we need a prep_async function and/or something like
>>>>> io_setup_async_msg() here to handle address?

>>> This has support for sockaddr address compared to io_send(),
>>> if the caller need to keep io_sendzc->addr valid until the qce arrived,
>>> then we need to clearly document that, as that doesn't match the common practice
>>> of other opcodes. Currently everything but data buffers can go after the sqe is
>>> submitted.

Yes, can be this way if we agree it's preferable.

>> Good point, it's not just the 'from' address. Pavel?

It is

> It's basically dest_addr from:
> 
>         ssize_t sendto(int sockfd, const void *buf, size_t len, int flags,
>                        const struct sockaddr *dest_addr, socklen_t addrlen);
> 
> It's not used in most cases, but for non-connected udp sockets you need it.
> 
> Maybe the fixed io_op_def.async_size could be changed to something that only
> allocated the async data if needed. Maybe the prep_async() hook could to the allocation
> itself if needed.

It's details, easy to implement, e.g. we can just decouple the
allocation from async preparation. I'll give it a try after sending
other small zc API changes
diff mbox series

Patch

diff --git a/io_uring/net.c b/io_uring/net.c
index 32fc3da04e41..f9f080b3cc1e 100644
--- a/io_uring/net.c
+++ b/io_uring/net.c
@@ -70,6 +70,7 @@  struct io_sendzc {
 	unsigned			flags;
 	unsigned			addr_len;
 	void __user			*addr;
+	size_t				done_io;
 };
 
 #define IO_APOLL_MULTI_POLLED (REQ_F_APOLL_MULTISHOT | REQ_F_POLLED)
@@ -878,6 +879,7 @@  int io_sendzc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 
 	zc->addr = u64_to_user_ptr(READ_ONCE(sqe->addr2));
 	zc->addr_len = READ_ONCE(sqe->addr_len);
+	zc->done_io = 0;
 
 #ifdef CONFIG_COMPAT
 	if (req->ctx->compat)
@@ -1012,11 +1014,23 @@  int io_sendzc(struct io_kiocb *req, unsigned int issue_flags)
 	if (unlikely(ret < min_ret)) {
 		if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK))
 			return -EAGAIN;
-		return ret == -ERESTARTSYS ? -EINTR : ret;
+		if (ret > 0 && io_net_retry(sock, msg.msg_flags)) {
+			zc->len -= ret;
+			zc->buf += ret;
+			zc->done_io += ret;
+			req->flags |= REQ_F_PARTIAL_IO;
+			return -EAGAIN;
+		}
+		if (ret == -ERESTARTSYS)
+			ret = -EINTR;
+	} else if (zc->flags & IORING_RECVSEND_NOTIF_FLUSH) {
+		io_notif_slot_flush_submit(notif_slot, 0);
 	}
 
-	if (zc->flags & IORING_RECVSEND_NOTIF_FLUSH)
-		io_notif_slot_flush_submit(notif_slot, 0);
+	if (ret >= 0)
+		ret += zc->done_io;
+	else if (zc->done_io)
+		ret = zc->done_io;
 	io_req_set_res(req, ret, 0);
 	return IOU_OK;
 }