Message ID | 064c8731-e7f9-c415-5d4d-141a559e2017@kernel.dk (mailing list archive) |
---|---|
State | RFC |
Delegated to: | Netdev Maintainers |
Headers | show |
Series | net: pass back data left in socket after receive | expand |
Context | Check | Description |
---|---|---|
netdev/tree_selection | success | Guessing tree name failed - patch did not apply, async |
On Wed, 27 Apr 2022 13:49:34 -0600 Jens Axboe wrote: > This is currently done for CMSG_INQ, add an ability to do so via struct > msghdr as well and have CMSG_INQ use that too. If the caller sets > msghdr->msg_get_inq, then we'll pass back the hint in msghdr->msg_inq. > > Rearrange struct msghdr a bit so we can add this member while shrinking > it at the same time. On a 64-bit build, it was 96 bytes before this > change and 88 bytes afterwards. > > Signed-off-by: Jens Axboe <axboe@kernel.dk> LGTM, but I said that once before.. Eric? FWIW the io_uring patch that uses it is here: https://git.kernel.dk/cgit/linux-block/commit/?h=io_uring-flags2 > diff --git a/include/linux/socket.h b/include/linux/socket.h > index 6f85f5d957ef..12085c9a8544 100644 > --- a/include/linux/socket.h > +++ b/include/linux/socket.h > @@ -50,6 +50,9 @@ struct linger { > struct msghdr { > void *msg_name; /* ptr to socket address structure */ > int msg_namelen; /* size of socket address structure */ > + > + int msg_inq; /* output, data left in socket */ > + > struct iov_iter msg_iter; /* data */ > > /* > @@ -62,8 +65,9 @@ struct msghdr { > void __user *msg_control_user; > }; > bool msg_control_is_user : 1; > - __kernel_size_t msg_controllen; /* ancillary data buffer length */ > + bool msg_get_inq : 1;/* return INQ after receive */ > unsigned int msg_flags; /* flags on received message */ > + __kernel_size_t msg_controllen; /* ancillary data buffer length */ > struct kiocb *msg_iocb; /* ptr to iocb for async requests */ > }; > > diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c > index cf18fbcbf123..78d79e26fb4d 100644 > --- a/net/ipv4/tcp.c > +++ b/net/ipv4/tcp.c > @@ -2335,8 +2335,10 @@ static int tcp_recvmsg_locked(struct sock *sk, struct msghdr *msg, size_t len, > if (sk->sk_state == TCP_LISTEN) > goto out; > > - if (tp->recvmsg_inq) > + if (tp->recvmsg_inq) { > *cmsg_flags = TCP_CMSG_INQ; > + msg->msg_get_inq = 1; > + } > timeo = sock_rcvtimeo(sk, nonblock); > > /* Urgent data needs to be handled specially. */ > @@ -2559,7 +2561,7 @@ static int tcp_recvmsg_locked(struct sock *sk, struct msghdr *msg, size_t len, > int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, > int flags, int *addr_len) > { > - int cmsg_flags = 0, ret, inq; > + int cmsg_flags = 0, ret; > struct scm_timestamping_internal tss; > > if (unlikely(flags & MSG_ERRQUEUE)) > @@ -2576,12 +2578,14 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, > release_sock(sk); > sk_defer_free_flush(sk); > > - if (cmsg_flags && ret >= 0) { > + if ((cmsg_flags || msg->msg_get_inq) && ret >= 0) { > if (cmsg_flags & TCP_CMSG_TS) > tcp_recv_timestamp(msg, sk, &tss); > + if (msg->msg_get_inq) > + msg->msg_inq = tcp_inq_hint(sk); > if (cmsg_flags & TCP_CMSG_INQ) { > - inq = tcp_inq_hint(sk); > - put_cmsg(msg, SOL_TCP, TCP_CM_INQ, sizeof(inq), &inq); > + put_cmsg(msg, SOL_TCP, TCP_CM_INQ, sizeof(msg->msg_inq), > + &msg->msg_inq); > } > } > return ret; >
On Thu, Apr 28, 2022 at 3:18 PM Jakub Kicinski <kuba@kernel.org> wrote: > > On Wed, 27 Apr 2022 13:49:34 -0600 Jens Axboe wrote: > > This is currently done for CMSG_INQ, add an ability to do so via struct > > msghdr as well and have CMSG_INQ use that too. If the caller sets > > msghdr->msg_get_inq, then we'll pass back the hint in msghdr->msg_inq. > > > > Rearrange struct msghdr a bit so we can add this member while shrinking > > it at the same time. On a 64-bit build, it was 96 bytes before this > > change and 88 bytes afterwards. > > > > Signed-off-by: Jens Axboe <axboe@kernel.dk> > > LGTM, but I said that once before.. Eric? For some reason I have not received this patch to my primary email address. This looks good, but needs to be rebased for net-next, see below ? Also, maybe the title should reflect that it is really a TCP patch. > > FWIW the io_uring patch that uses it is here: > https://git.kernel.dk/cgit/linux-block/commit/?h=io_uring-flags2 > > > diff --git a/include/linux/socket.h b/include/linux/socket.h > > index 6f85f5d957ef..12085c9a8544 100644 > > --- a/include/linux/socket.h > > +++ b/include/linux/socket.h > > @@ -50,6 +50,9 @@ struct linger { > > struct msghdr { > > void *msg_name; /* ptr to socket address structure */ > > int msg_namelen; /* size of socket address structure */ > > + > > + int msg_inq; /* output, data left in socket */ > > + > > struct iov_iter msg_iter; /* data */ > > > > /* > > @@ -62,8 +65,9 @@ struct msghdr { > > void __user *msg_control_user; > > }; > > bool msg_control_is_user : 1; > > - __kernel_size_t msg_controllen; /* ancillary data buffer length */ > > + bool msg_get_inq : 1;/* return INQ after receive */ > > unsigned int msg_flags; /* flags on received message */ > > + __kernel_size_t msg_controllen; /* ancillary data buffer length */ > > struct kiocb *msg_iocb; /* ptr to iocb for async requests */ > > }; > > > > diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c > > index cf18fbcbf123..78d79e26fb4d 100644 > > --- a/net/ipv4/tcp.c > > +++ b/net/ipv4/tcp.c > > @@ -2335,8 +2335,10 @@ static int tcp_recvmsg_locked(struct sock *sk, struct msghdr *msg, size_t len, > > if (sk->sk_state == TCP_LISTEN) > > goto out; > > > > - if (tp->recvmsg_inq) > > + if (tp->recvmsg_inq) { > > *cmsg_flags = TCP_CMSG_INQ; > > + msg->msg_get_inq = 1; > > + } > > timeo = sock_rcvtimeo(sk, nonblock); > > > > /* Urgent data needs to be handled specially. */ > > @@ -2559,7 +2561,7 @@ static int tcp_recvmsg_locked(struct sock *sk, struct msghdr *msg, size_t len, > > int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, > > int flags, int *addr_len) > > { > > - int cmsg_flags = 0, ret, inq; > > + int cmsg_flags = 0, ret; > > struct scm_timestamping_internal tss; > > > > if (unlikely(flags & MSG_ERRQUEUE)) > > @@ -2576,12 +2578,14 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, > > release_sock(sk); > > sk_defer_free_flush(sk); sk_defer_free_flush() has been removed in net-next. > > > > - if (cmsg_flags && ret >= 0) { > > + if ((cmsg_flags || msg->msg_get_inq) && ret >= 0) { > > if (cmsg_flags & TCP_CMSG_TS) > > tcp_recv_timestamp(msg, sk, &tss); > > + if (msg->msg_get_inq) > > + msg->msg_inq = tcp_inq_hint(sk); > > if (cmsg_flags & TCP_CMSG_INQ) { > > - inq = tcp_inq_hint(sk); > > - put_cmsg(msg, SOL_TCP, TCP_CM_INQ, sizeof(inq), &inq); > > + put_cmsg(msg, SOL_TCP, TCP_CM_INQ, sizeof(msg->msg_inq), > > + &msg->msg_inq); > > } > > } > > return ret; > > >
On 4/28/22 4:37 PM, Eric Dumazet wrote: > On Thu, Apr 28, 2022 at 3:18 PM Jakub Kicinski <kuba@kernel.org> wrote: >> >> On Wed, 27 Apr 2022 13:49:34 -0600 Jens Axboe wrote: >>> This is currently done for CMSG_INQ, add an ability to do so via struct >>> msghdr as well and have CMSG_INQ use that too. If the caller sets >>> msghdr->msg_get_inq, then we'll pass back the hint in msghdr->msg_inq. >>> >>> Rearrange struct msghdr a bit so we can add this member while shrinking >>> it at the same time. On a 64-bit build, it was 96 bytes before this >>> change and 88 bytes afterwards. >>> >>> Signed-off-by: Jens Axboe <axboe@kernel.dk> >> >> LGTM, but I said that once before.. Eric? > > For some reason I have not received this patch to my primary email address. Huh weird. In general, email is a giant clown town these days, unfortunately... > This looks good, but needs to be rebased for net-next, see below ? Thanks - yes Jakub also mentioned that to me. > Also, maybe the title should reflect that it is really a TCP patch. I'll send out a v2 against net-next and update the commit as well.
diff --git a/include/linux/socket.h b/include/linux/socket.h index 6f85f5d957ef..12085c9a8544 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -50,6 +50,9 @@ struct linger { struct msghdr { void *msg_name; /* ptr to socket address structure */ int msg_namelen; /* size of socket address structure */ + + int msg_inq; /* output, data left in socket */ + struct iov_iter msg_iter; /* data */ /* @@ -62,8 +65,9 @@ struct msghdr { void __user *msg_control_user; }; bool msg_control_is_user : 1; - __kernel_size_t msg_controllen; /* ancillary data buffer length */ + bool msg_get_inq : 1;/* return INQ after receive */ unsigned int msg_flags; /* flags on received message */ + __kernel_size_t msg_controllen; /* ancillary data buffer length */ struct kiocb *msg_iocb; /* ptr to iocb for async requests */ }; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index cf18fbcbf123..78d79e26fb4d 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2335,8 +2335,10 @@ static int tcp_recvmsg_locked(struct sock *sk, struct msghdr *msg, size_t len, if (sk->sk_state == TCP_LISTEN) goto out; - if (tp->recvmsg_inq) + if (tp->recvmsg_inq) { *cmsg_flags = TCP_CMSG_INQ; + msg->msg_get_inq = 1; + } timeo = sock_rcvtimeo(sk, nonblock); /* Urgent data needs to be handled specially. */ @@ -2559,7 +2561,7 @@ static int tcp_recvmsg_locked(struct sock *sk, struct msghdr *msg, size_t len, int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, int flags, int *addr_len) { - int cmsg_flags = 0, ret, inq; + int cmsg_flags = 0, ret; struct scm_timestamping_internal tss; if (unlikely(flags & MSG_ERRQUEUE)) @@ -2576,12 +2578,14 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, release_sock(sk); sk_defer_free_flush(sk); - if (cmsg_flags && ret >= 0) { + if ((cmsg_flags || msg->msg_get_inq) && ret >= 0) { if (cmsg_flags & TCP_CMSG_TS) tcp_recv_timestamp(msg, sk, &tss); + if (msg->msg_get_inq) + msg->msg_inq = tcp_inq_hint(sk); if (cmsg_flags & TCP_CMSG_INQ) { - inq = tcp_inq_hint(sk); - put_cmsg(msg, SOL_TCP, TCP_CM_INQ, sizeof(inq), &inq); + put_cmsg(msg, SOL_TCP, TCP_CM_INQ, sizeof(msg->msg_inq), + &msg->msg_inq); } } return ret;
This is currently done for CMSG_INQ, add an ability to do so via struct msghdr as well and have CMSG_INQ use that too. If the caller sets msghdr->msg_get_inq, then we'll pass back the hint in msghdr->msg_inq. Rearrange struct msghdr a bit so we can add this member while shrinking it at the same time. On a 64-bit build, it was 96 bytes before this change and 88 bytes afterwards. Signed-off-by: Jens Axboe <axboe@kernel.dk> ---