Message ID | 20211111151418.17276-4-fw@strlen.de (mailing list archive) |
---|---|
State | Superseded, archived |
Delegated to: | Mat Martineau |
Headers | show |
Series | TCP_INQ support | expand |
Context | Check | Description |
---|---|---|
matttbe/build | success | Build and static analysis OK |
matttbe/checkpatch | success | total: 0 errors, 0 warnings, 0 checks, 72 lines checked |
matttbe/KVM_Validation__normal | warning | Unstable: 2 failed test(s): packetdrill_sockopts selftest_mptcp_join |
matttbe/KVM_Validation__debug | warning | Unstable: 3 failed test(s): packetdrill_sockopts selftest_diag selftest_mptcp_join |
On Thu, 11 Nov 2021, Florian Westphal wrote: > Allows to query in-sequence data ready for read(), total bytes in > write queue and total bytes in write queue that have not yet been sent. > > v2: remove unneeded READ_ONCE() (Paolo Abeni) > > Signed-off-by: Florian Westphal <fw@strlen.de> > --- > net/mptcp/protocol.c | 54 ++++++++++++++++++++++++++++++++++++++++++++ > 1 file changed, 54 insertions(+) > > diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c > index 61047d04e849..1c4604283b00 100644 > --- a/net/mptcp/protocol.c > +++ b/net/mptcp/protocol.c > @@ -22,6 +22,7 @@ > #endif > #include <net/mptcp.h> > #include <net/xfrm.h> > +#include <asm/ioctls.h> > #include "protocol.h" > #include "mib.h" > > @@ -3260,6 +3261,58 @@ static int mptcp_forward_alloc_get(const struct sock *sk) > return sk->sk_forward_alloc + mptcp_sk(sk)->rmem_fwd_alloc; > } > > +static int mptcp_ioctl_outq(const struct mptcp_sock *msk, u64 v) > +{ > + const struct sock *sk = (void *)msk; > + u64 delta; > + > + if (sk->sk_state == TCP_LISTEN) > + return -EINVAL; > + > + if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) > + return 0; > + > + delta = msk->write_seq - v; > + if (delta > INT_MAX) > + delta = INT_MAX; > + > + return (int)delta; > +} > + > +static int mptcp_ioctl(struct sock *sk, int cmd, unsigned long arg) > +{ > + struct mptcp_sock *msk = mptcp_sk(sk); > + int answ; > + bool slow; Reverse-xmas issue here. > + > + switch (cmd) { > + case SIOCINQ: > + if (sk->sk_state == TCP_LISTEN) > + return -EINVAL; > + > + lock_sock(sk); > + if (skb_queue_empty(&msk->receive_queue)) I'm guessing that the idea here is that answering with a nonzero value is enough to get the userspace program to do another read, rather than always imposing overhead to dig through the subflow rx buffers and sk->sk_receive_queue? That does seem like a good way to go, at least with regard to calling __mptcp_move_skbs(). Would it be useful to check sk->sk_receive_queue and splice if non-empty (which is relatively cheap) to get a slightly more accurate inq number? To maybe answer this question for myself - the socket lock was just acquired so it's unlikely for skbs to have been added to sk_receive_queue, and the extra complexity may not be worth it. - Mat > + __mptcp_move_skbs(msk); > + answ = mptcp_inq_hint(sk); > + release_sock(sk); > + break; > + case SIOCOUTQ: > + slow = lock_sock_fast(sk); > + answ = mptcp_ioctl_outq(msk, READ_ONCE(msk->snd_una)); > + unlock_sock_fast(sk, slow); > + break; > + case SIOCOUTQNSD: > + slow = lock_sock_fast(sk); > + answ = mptcp_ioctl_outq(msk, msk->snd_nxt); > + unlock_sock_fast(sk, slow); > + break; > + default: > + return -ENOIOCTLCMD; > + } > + > + return put_user(answ, (int __user *)arg); > +} > + > static struct proto mptcp_prot = { > .name = "MPTCP", > .owner = THIS_MODULE, > @@ -3272,6 +3325,7 @@ static struct proto mptcp_prot = { > .shutdown = mptcp_shutdown, > .destroy = mptcp_destroy, > .sendmsg = mptcp_sendmsg, > + .ioctl = mptcp_ioctl, > .recvmsg = mptcp_recvmsg, > .release_cb = mptcp_release_cb, > .hash = mptcp_hash, > -- > 2.32.0 > > > -- Mat Martineau Intel
Mat Martineau <mathew.j.martineau@linux.intel.com> wrote: > > + lock_sock(sk); > > + if (skb_queue_empty(&msk->receive_queue)) > > I'm guessing that the idea here is that answering with a nonzero value is > enough to get the userspace program to do another read, rather than always > imposing overhead to dig through the subflow rx buffers and > sk->sk_receive_queue? That does seem like a good way to go, at least with > regard to calling __mptcp_move_skbs(). > > Would it be useful to check sk->sk_receive_queue and splice if non-empty > (which is relatively cheap) to get a slightly more accurate inq number? To > maybe answer this question for myself - the socket lock was just acquired so > it's unlikely for skbs to have been added to sk_receive_queue, and the extra > complexity may not be worth it. The idea was to avoid a bogus '0' answer; msk rx queue might be empty but sk_rx q might have new data. I can make the __mptcp_move_skbs() unconditional, as you said we already acquired the msk socket lock so might as well splice pending subflows and so on.
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 61047d04e849..1c4604283b00 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -22,6 +22,7 @@ #endif #include <net/mptcp.h> #include <net/xfrm.h> +#include <asm/ioctls.h> #include "protocol.h" #include "mib.h" @@ -3260,6 +3261,58 @@ static int mptcp_forward_alloc_get(const struct sock *sk) return sk->sk_forward_alloc + mptcp_sk(sk)->rmem_fwd_alloc; } +static int mptcp_ioctl_outq(const struct mptcp_sock *msk, u64 v) +{ + const struct sock *sk = (void *)msk; + u64 delta; + + if (sk->sk_state == TCP_LISTEN) + return -EINVAL; + + if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) + return 0; + + delta = msk->write_seq - v; + if (delta > INT_MAX) + delta = INT_MAX; + + return (int)delta; +} + +static int mptcp_ioctl(struct sock *sk, int cmd, unsigned long arg) +{ + struct mptcp_sock *msk = mptcp_sk(sk); + int answ; + bool slow; + + switch (cmd) { + case SIOCINQ: + if (sk->sk_state == TCP_LISTEN) + return -EINVAL; + + lock_sock(sk); + if (skb_queue_empty(&msk->receive_queue)) + __mptcp_move_skbs(msk); + answ = mptcp_inq_hint(sk); + release_sock(sk); + break; + case SIOCOUTQ: + slow = lock_sock_fast(sk); + answ = mptcp_ioctl_outq(msk, READ_ONCE(msk->snd_una)); + unlock_sock_fast(sk, slow); + break; + case SIOCOUTQNSD: + slow = lock_sock_fast(sk); + answ = mptcp_ioctl_outq(msk, msk->snd_nxt); + unlock_sock_fast(sk, slow); + break; + default: + return -ENOIOCTLCMD; + } + + return put_user(answ, (int __user *)arg); +} + static struct proto mptcp_prot = { .name = "MPTCP", .owner = THIS_MODULE, @@ -3272,6 +3325,7 @@ static struct proto mptcp_prot = { .shutdown = mptcp_shutdown, .destroy = mptcp_destroy, .sendmsg = mptcp_sendmsg, + .ioctl = mptcp_ioctl, .recvmsg = mptcp_recvmsg, .release_cb = mptcp_release_cb, .hash = mptcp_hash,
Allows to query in-sequence data ready for read(), total bytes in write queue and total bytes in write queue that have not yet been sent. v2: remove unneeded READ_ONCE() (Paolo Abeni) Signed-off-by: Florian Westphal <fw@strlen.de> --- net/mptcp/protocol.c | 54 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+)