Message ID | bdbbff06f20c100c00e59932ffecbd18ad699f57.1684166247.git.asml.silence@gmail.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | minor tcp io_uring zc optimisations | expand |
On 5/15/23 10:06 AM, Pavel Begunkov wrote: > diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c > index 40f591f7fce1..3d18e295bb2f 100644 > --- a/net/ipv4/tcp.c > +++ b/net/ipv4/tcp.c > @@ -1231,7 +1231,6 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size) > if ((flags & MSG_ZEROCOPY) && size) { > if (msg->msg_ubuf) { > uarg = msg->msg_ubuf; > - net_zcopy_get(uarg); > zc = sk->sk_route_caps & NETIF_F_SG; > } else if (sock_flag(sk, SOCK_ZEROCOPY)) { > skb = tcp_write_queue_tail(sk); > @@ -1458,7 +1457,9 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size) > tcp_push(sk, flags, mss_now, tp->nonagle, size_goal); > } > out_nopush: > - net_zcopy_put(uarg); > + /* msg->msg_ubuf is pinned by the caller so we don't take extra refs */ > + if (uarg && !msg->msg_ubuf) > + net_zcopy_put(uarg); > return copied + copied_syn; > > do_error: > @@ -1467,7 +1468,9 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size) > if (copied + copied_syn) > goto out; > out_err: > - net_zcopy_put_abort(uarg, true); > + /* msg->msg_ubuf is pinned by the caller so we don't take extra refs */ > + if (uarg && !msg->msg_ubuf) > + net_zcopy_put_abort(uarg, true); > err = sk_stream_error(sk, flags, err); > /* make sure we wake any epoll edge trigger waiter */ > if (unlikely(tcp_rtx_and_write_queues_empty(sk) && err == -EAGAIN)) { Both net_zcopy_put_abort and net_zcopy_put have an `if (uarg)` check.
On Mon, May 15, 2023 at 7:29 PM David Ahern <dsahern@kernel.org> wrote: > > On 5/15/23 10:06 AM, Pavel Begunkov wrote: > > diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c > > index 40f591f7fce1..3d18e295bb2f 100644 > > --- a/net/ipv4/tcp.c > > +++ b/net/ipv4/tcp.c > > @@ -1231,7 +1231,6 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size) > > if ((flags & MSG_ZEROCOPY) && size) { > > if (msg->msg_ubuf) { > > uarg = msg->msg_ubuf; > > - net_zcopy_get(uarg); > > zc = sk->sk_route_caps & NETIF_F_SG; > > } else if (sock_flag(sk, SOCK_ZEROCOPY)) { > > skb = tcp_write_queue_tail(sk); > > @@ -1458,7 +1457,9 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size) > > tcp_push(sk, flags, mss_now, tp->nonagle, size_goal); > > } > > out_nopush: > > - net_zcopy_put(uarg); > > + /* msg->msg_ubuf is pinned by the caller so we don't take extra refs */ > > + if (uarg && !msg->msg_ubuf) > > + net_zcopy_put(uarg); > > return copied + copied_syn; > > > > do_error: > > @@ -1467,7 +1468,9 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size) > > if (copied + copied_syn) > > goto out; > > out_err: > > - net_zcopy_put_abort(uarg, true); > > + /* msg->msg_ubuf is pinned by the caller so we don't take extra refs */ > > + if (uarg && !msg->msg_ubuf) > > + net_zcopy_put_abort(uarg, true); > > err = sk_stream_error(sk, flags, err); > > /* make sure we wake any epoll edge trigger waiter */ > > if (unlikely(tcp_rtx_and_write_queues_empty(sk) && err == -EAGAIN)) { > > Both net_zcopy_put_abort and net_zcopy_put have an `if (uarg)` check. Right, but here this might avoid a read of msg->msg_ubuf, which might be more expensive to fetch. Compiler will probably remove the second test (uarg) from net_zcopy_put() Reviewed-by: Eric Dumazet <edumazet@google.com>
On 5/15/23 12:14 PM, Eric Dumazet wrote: > On Mon, May 15, 2023 at 7:29 PM David Ahern <dsahern@kernel.org> wrote: >> >> On 5/15/23 10:06 AM, Pavel Begunkov wrote: >>> diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c >>> index 40f591f7fce1..3d18e295bb2f 100644 >>> --- a/net/ipv4/tcp.c >>> +++ b/net/ipv4/tcp.c >>> @@ -1231,7 +1231,6 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size) >>> if ((flags & MSG_ZEROCOPY) && size) { >>> if (msg->msg_ubuf) { >>> uarg = msg->msg_ubuf; >>> - net_zcopy_get(uarg); >>> zc = sk->sk_route_caps & NETIF_F_SG; >>> } else if (sock_flag(sk, SOCK_ZEROCOPY)) { >>> skb = tcp_write_queue_tail(sk); >>> @@ -1458,7 +1457,9 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size) >>> tcp_push(sk, flags, mss_now, tp->nonagle, size_goal); >>> } >>> out_nopush: >>> - net_zcopy_put(uarg); >>> + /* msg->msg_ubuf is pinned by the caller so we don't take extra refs */ >>> + if (uarg && !msg->msg_ubuf) >>> + net_zcopy_put(uarg); >>> return copied + copied_syn; >>> >>> do_error: >>> @@ -1467,7 +1468,9 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size) >>> if (copied + copied_syn) >>> goto out; >>> out_err: >>> - net_zcopy_put_abort(uarg, true); >>> + /* msg->msg_ubuf is pinned by the caller so we don't take extra refs */ >>> + if (uarg && !msg->msg_ubuf) >>> + net_zcopy_put_abort(uarg, true); >>> err = sk_stream_error(sk, flags, err); >>> /* make sure we wake any epoll edge trigger waiter */ >>> if (unlikely(tcp_rtx_and_write_queues_empty(sk) && err == -EAGAIN)) { >> >> Both net_zcopy_put_abort and net_zcopy_put have an `if (uarg)` check. > > Right, but here this might avoid a read of msg->msg_ubuf, which might > be more expensive to fetch. agreed. > > Compiler will probably remove the second test (uarg) from net_zcopy_put() > > Reviewed-by: Eric Dumazet <edumazet@google.com> The one in net_zcopy_put can be removed with the above change. It's other caller is net_zcopy_put_abort which has already checked uarg is set.
On 5/15/23 19:40, David Ahern wrote: > On 5/15/23 12:14 PM, Eric Dumazet wrote: >> On Mon, May 15, 2023 at 7:29 PM David Ahern <dsahern@kernel.org> wrote: >>> >>> On 5/15/23 10:06 AM, Pavel Begunkov wrote: >>>> diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c >>>> index 40f591f7fce1..3d18e295bb2f 100644 >>>> --- a/net/ipv4/tcp.c >>>> +++ b/net/ipv4/tcp.c >>>> @@ -1231,7 +1231,6 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size) >>>> if ((flags & MSG_ZEROCOPY) && size) { >>>> if (msg->msg_ubuf) { >>>> uarg = msg->msg_ubuf; >>>> - net_zcopy_get(uarg); >>>> zc = sk->sk_route_caps & NETIF_F_SG; >>>> } else if (sock_flag(sk, SOCK_ZEROCOPY)) { >>>> skb = tcp_write_queue_tail(sk); >>>> @@ -1458,7 +1457,9 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size) >>>> tcp_push(sk, flags, mss_now, tp->nonagle, size_goal); >>>> } >>>> out_nopush: >>>> - net_zcopy_put(uarg); >>>> + /* msg->msg_ubuf is pinned by the caller so we don't take extra refs */ >>>> + if (uarg && !msg->msg_ubuf) >>>> + net_zcopy_put(uarg); >>>> return copied + copied_syn; >>>> >>>> do_error: >>>> @@ -1467,7 +1468,9 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size) >>>> if (copied + copied_syn) >>>> goto out; >>>> out_err: >>>> - net_zcopy_put_abort(uarg, true); >>>> + /* msg->msg_ubuf is pinned by the caller so we don't take extra refs */ >>>> + if (uarg && !msg->msg_ubuf) >>>> + net_zcopy_put_abort(uarg, true); >>>> err = sk_stream_error(sk, flags, err); >>>> /* make sure we wake any epoll edge trigger waiter */ >>>> if (unlikely(tcp_rtx_and_write_queues_empty(sk) && err == -EAGAIN)) { >>> >>> Both net_zcopy_put_abort and net_zcopy_put have an `if (uarg)` check. >> >> Right, but here this might avoid a read of msg->msg_ubuf, which might >> be more expensive to fetch. > > agreed. I put it there to avoid one extra check in the non-zerocopy path. msg->msg_ubuf is null there, the conditional will pass and it'll still have to test uarg. >> Compiler will probably remove the second test (uarg) from net_zcopy_put() >> >> Reviewed-by: Eric Dumazet <edumazet@google.com> Thank you for reviews! > The one in net_zcopy_put can be removed with the above change. It's > other caller is net_zcopy_put_abort which has already checked uarg is set. Ah yes, do you want me to fold it in?
On 5/16/23 6:59 AM, Pavel Begunkov wrote: > > >> The one in net_zcopy_put can be removed with the above change. It's >> other caller is net_zcopy_put_abort which has already checked uarg is >> set. > > Ah yes, do you want me to fold it in? > no preference.
On 5/16/23 15:37, David Ahern wrote: > On 5/16/23 6:59 AM, Pavel Begunkov wrote: >> >> >>> The one in net_zcopy_put can be removed with the above change. It's >>> other caller is net_zcopy_put_abort which has already checked uarg is >>> set. >> >> Ah yes, do you want me to fold it in? >> > > no preference. I'll leave it for another patch then. It might be interesting to try remove null checks for all *_zcopy_* helpers, but it didn't feel right last time I tried.
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 40f591f7fce1..3d18e295bb2f 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1231,7 +1231,6 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size) if ((flags & MSG_ZEROCOPY) && size) { if (msg->msg_ubuf) { uarg = msg->msg_ubuf; - net_zcopy_get(uarg); zc = sk->sk_route_caps & NETIF_F_SG; } else if (sock_flag(sk, SOCK_ZEROCOPY)) { skb = tcp_write_queue_tail(sk); @@ -1458,7 +1457,9 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size) tcp_push(sk, flags, mss_now, tp->nonagle, size_goal); } out_nopush: - net_zcopy_put(uarg); + /* msg->msg_ubuf is pinned by the caller so we don't take extra refs */ + if (uarg && !msg->msg_ubuf) + net_zcopy_put(uarg); return copied + copied_syn; do_error: @@ -1467,7 +1468,9 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size) if (copied + copied_syn) goto out; out_err: - net_zcopy_put_abort(uarg, true); + /* msg->msg_ubuf is pinned by the caller so we don't take extra refs */ + if (uarg && !msg->msg_ubuf) + net_zcopy_put_abort(uarg, true); err = sk_stream_error(sk, flags, err); /* make sure we wake any epoll edge trigger waiter */ if (unlikely(tcp_rtx_and_write_queues_empty(sk) && err == -EAGAIN)) {
io_uring keeps a reference to ubuf_info during submission, so if tcp_sendmsg_locked() sees msghdr::msg_ubuf in can be sure the buffer will be kept alive and doesn't need to additionally pin it. Signed-off-by: Pavel Begunkov <asml.silence@gmail.com> --- net/ipv4/tcp.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-)