Message ID | 1eb5eb91a3cdb5d0f03e20120b1c5c109d690fc8.1629386016.git.pabeni@redhat.com (mailing list archive) |
---|---|
State | Accepted, archived |
Commit | 2cea5d97a2ffeb2c2c929ba1b883cbdb3c5d8d62 |
Delegated to: | Mat Martineau |
Headers | show |
Series | mptcp: minor receive path optimization | expand |
On Thu, 19 Aug 2021, Paolo Abeni wrote: > Most MPTCP packets carries a single MPTCP subption: the > DSS containing the mapping for the current packet. > > Check explicitly for the above, so that is such scenario we > replace most conditional statements with a single likely() one. > > Signed-off-by: Paolo Abeni <pabeni@redhat.com> > --- > net/mptcp/options.c | 70 +++++++++++++++++++++++---------------------- > 1 file changed, 36 insertions(+), 34 deletions(-) > > diff --git a/net/mptcp/options.c b/net/mptcp/options.c > index 0d33c020062f..af22d76f1699 100644 > --- a/net/mptcp/options.c > +++ b/net/mptcp/options.c > @@ -1111,48 +1111,50 @@ bool mptcp_incoming_options(struct sock *sk, struct sk_buff *skb) > if (!check_fully_established(msk, sk, subflow, skb, &mp_opt)) > return sk->sk_state != TCP_CLOSE; > > - if ((mp_opt.suboptions & OPTION_MPTCP_FASTCLOSE) && > - msk->local_key == mp_opt.rcvr_key) { > - WRITE_ONCE(msk->rcv_fastclose, true); > - mptcp_schedule_work((struct sock *)msk); > - } > + if (unlikely(mp_opt.suboptions != OPTION_MPTCP_DSS)) { This diff is a lot shorter when ignoring whitespace :) One question - is it worth optimizing for the mp_opt.suboptions = 0 case too? While our stack will send duplicate DSS mappings when the mapped data is split across multiple TCP packets, other stacks might send each mapping only once (hopefully not, since that will affect GRO/etc). I think this series is good for the export branch now, though: Reviewed-by: Mat Martineau <mathew.j.martineau@linux.intel.com> > + if ((mp_opt.suboptions & OPTION_MPTCP_FASTCLOSE) && > + msk->local_key == mp_opt.rcvr_key) { > + WRITE_ONCE(msk->rcv_fastclose, true); > + mptcp_schedule_work((struct sock *)msk); > + } > > - if ((mp_opt.suboptions & OPTION_MPTCP_ADD_ADDR) && add_addr_hmac_valid(msk, &mp_opt)) { > - if (!mp_opt.echo) { > - mptcp_pm_add_addr_received(msk, &mp_opt.addr); > - MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_ADDADDR); > - } else { > - mptcp_pm_add_addr_echoed(msk, &mp_opt.addr); > - mptcp_pm_del_add_timer(msk, &mp_opt.addr, true); > - MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_ECHOADD); > + if ((mp_opt.suboptions & OPTION_MPTCP_ADD_ADDR) && add_addr_hmac_valid(msk, &mp_opt)) { > + if (!mp_opt.echo) { > + mptcp_pm_add_addr_received(msk, &mp_opt.addr); > + MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_ADDADDR); > + } else { > + mptcp_pm_add_addr_echoed(msk, &mp_opt.addr); > + mptcp_pm_del_add_timer(msk, &mp_opt.addr, true); > + MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_ECHOADD); > + } > + > + if (mp_opt.addr.port) > + MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_PORTADD); > } > > - if (mp_opt.addr.port) > - MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_PORTADD); > - } > + if (mp_opt.suboptions & OPTION_MPTCP_RM_ADDR) > + mptcp_pm_rm_addr_received(msk, &mp_opt.rm_list); > > - if (mp_opt.suboptions & OPTION_MPTCP_RM_ADDR) > - mptcp_pm_rm_addr_received(msk, &mp_opt.rm_list); > + if (mp_opt.suboptions & OPTION_MPTCP_PRIO) { > + mptcp_pm_mp_prio_received(sk, mp_opt.backup); > + MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPPRIORX); > + } > > - if (mp_opt.suboptions & OPTION_MPTCP_PRIO) { > - mptcp_pm_mp_prio_received(sk, mp_opt.backup); > - MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPPRIORX); > - } > + if (mp_opt.suboptions & OPTION_MPTCP_FAIL) { > + mptcp_pm_mp_fail_received(sk, mp_opt.fail_seq); > + MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPFAILRX); > + } > > - if (mp_opt.suboptions & OPTION_MPTCP_FAIL) { > - mptcp_pm_mp_fail_received(sk, mp_opt.fail_seq); > - MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPFAILRX); > - } > + if (mp_opt.suboptions & OPTION_MPTCP_RST) { > + subflow->reset_seen = 1; > + subflow->reset_reason = mp_opt.reset_reason; > + subflow->reset_transient = mp_opt.reset_transient; > + } > > - if (mp_opt.suboptions & OPTION_MPTCP_RST) { > - subflow->reset_seen = 1; > - subflow->reset_reason = mp_opt.reset_reason; > - subflow->reset_transient = mp_opt.reset_transient; > + if (!(mp_opt.suboptions & OPTION_MPTCP_DSS)) > + return true; > } > > - if (!(mp_opt.suboptions & OPTION_MPTCP_DSS)) > - return true; > - > /* we can't wait for recvmsg() to update the ack_seq, otherwise > * monodirectional flows will stuck > */ > @@ -1179,7 +1181,7 @@ bool mptcp_incoming_options(struct sock *sk, struct sk_buff *skb) > > memset(mpext, 0, sizeof(*mpext)); > > - if (mp_opt.use_map) { > + if (likely(mp_opt.use_map)) { > if (mp_opt.mpc_map) { > /* this is an MP_CAPABLE carrying MPTCP data > * we know this map the first chunk of data > -- > 2.26.3 -- Mat Martineau Intel
On Thu, 2021-08-19 at 17:24 -0700, Mat Martineau wrote: > On Thu, 19 Aug 2021, Paolo Abeni wrote: > > > Most MPTCP packets carries a single MPTCP subption: the > > DSS containing the mapping for the current packet. > > > > Check explicitly for the above, so that is such scenario we > > replace most conditional statements with a single likely() one. > > > > Signed-off-by: Paolo Abeni <pabeni@redhat.com> > > --- > > net/mptcp/options.c | 70 +++++++++++++++++++++++---------------------- > > 1 file changed, 36 insertions(+), 34 deletions(-) > > > > diff --git a/net/mptcp/options.c b/net/mptcp/options.c > > index 0d33c020062f..af22d76f1699 100644 > > --- a/net/mptcp/options.c > > +++ b/net/mptcp/options.c > > @@ -1111,48 +1111,50 @@ bool mptcp_incoming_options(struct sock *sk, struct sk_buff *skb) > > if (!check_fully_established(msk, sk, subflow, skb, &mp_opt)) > > return sk->sk_state != TCP_CLOSE; > > > > - if ((mp_opt.suboptions & OPTION_MPTCP_FASTCLOSE) && > > - msk->local_key == mp_opt.rcvr_key) { > > - WRITE_ONCE(msk->rcv_fastclose, true); > > - mptcp_schedule_work((struct sock *)msk); > > - } > > + if (unlikely(mp_opt.suboptions != OPTION_MPTCP_DSS)) { > > This diff is a lot shorter when ignoring whitespace :) > > One question - is it worth optimizing for the mp_opt.suboptions = 0 case > too? While our stack will send duplicate DSS mappings when the mapped data > is split across multiple TCP packets, other stacks might send each mapping > only once (hopefully not, since that will affect GRO/etc). I *think* even mptcp.org is sending DSS on every packets, to help GRO/GSO. Generally speaking, with intermittent DSS, we kill both TSO and GRO. It does not looks like a fast path. I guess we could add that check with time, if we see it being useful. Thanks! Paolo
Hi Paolo, On 20/08/2021 09:58, Paolo Abeni wrote: > On Thu, 2021-08-19 at 17:24 -0700, Mat Martineau wrote: >> On Thu, 19 Aug 2021, Paolo Abeni wrote: >> >>> Most MPTCP packets carries a single MPTCP subption: the >>> DSS containing the mapping for the current packet. >>> >>> Check explicitly for the above, so that is such scenario we >>> replace most conditional statements with a single likely() one. >>> >>> Signed-off-by: Paolo Abeni <pabeni@redhat.com> >>> --- >>> net/mptcp/options.c | 70 +++++++++++++++++++++++---------------------- >>> 1 file changed, 36 insertions(+), 34 deletions(-) >>> >>> diff --git a/net/mptcp/options.c b/net/mptcp/options.c >>> index 0d33c020062f..af22d76f1699 100644 >>> --- a/net/mptcp/options.c >>> +++ b/net/mptcp/options.c >>> @@ -1111,48 +1111,50 @@ bool mptcp_incoming_options(struct sock *sk, struct sk_buff *skb) >>> if (!check_fully_established(msk, sk, subflow, skb, &mp_opt)) >>> return sk->sk_state != TCP_CLOSE; >>> >>> - if ((mp_opt.suboptions & OPTION_MPTCP_FASTCLOSE) && >>> - msk->local_key == mp_opt.rcvr_key) { >>> - WRITE_ONCE(msk->rcv_fastclose, true); >>> - mptcp_schedule_work((struct sock *)msk); >>> - } >>> + if (unlikely(mp_opt.suboptions != OPTION_MPTCP_DSS)) { >> >> This diff is a lot shorter when ignoring whitespace :) >> >> One question - is it worth optimizing for the mp_opt.suboptions = 0 case >> too? While our stack will send duplicate DSS mappings when the mapped data >> is split across multiple TCP packets, other stacks might send each mapping >> only once (hopefully not, since that will affect GRO/etc). > > I *think* even mptcp.org is sending DSS on every packets, to help > GRO/GSO. Yes it does. (or at least, that's what is expected :) ) > Generally speaking, with intermittent DSS, we kill both TSO and GRO. Hopefully, NIC vendors could recognise MPTCP options and aggregate packets if they are in-order from MPTCP point of view as well. > It does not looks like a fast path. I guess we could add that check with > time, if we see it being useful. Cheers, Matt
diff --git a/net/mptcp/options.c b/net/mptcp/options.c index 0d33c020062f..af22d76f1699 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -1111,48 +1111,50 @@ bool mptcp_incoming_options(struct sock *sk, struct sk_buff *skb) if (!check_fully_established(msk, sk, subflow, skb, &mp_opt)) return sk->sk_state != TCP_CLOSE; - if ((mp_opt.suboptions & OPTION_MPTCP_FASTCLOSE) && - msk->local_key == mp_opt.rcvr_key) { - WRITE_ONCE(msk->rcv_fastclose, true); - mptcp_schedule_work((struct sock *)msk); - } + if (unlikely(mp_opt.suboptions != OPTION_MPTCP_DSS)) { + if ((mp_opt.suboptions & OPTION_MPTCP_FASTCLOSE) && + msk->local_key == mp_opt.rcvr_key) { + WRITE_ONCE(msk->rcv_fastclose, true); + mptcp_schedule_work((struct sock *)msk); + } - if ((mp_opt.suboptions & OPTION_MPTCP_ADD_ADDR) && add_addr_hmac_valid(msk, &mp_opt)) { - if (!mp_opt.echo) { - mptcp_pm_add_addr_received(msk, &mp_opt.addr); - MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_ADDADDR); - } else { - mptcp_pm_add_addr_echoed(msk, &mp_opt.addr); - mptcp_pm_del_add_timer(msk, &mp_opt.addr, true); - MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_ECHOADD); + if ((mp_opt.suboptions & OPTION_MPTCP_ADD_ADDR) && add_addr_hmac_valid(msk, &mp_opt)) { + if (!mp_opt.echo) { + mptcp_pm_add_addr_received(msk, &mp_opt.addr); + MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_ADDADDR); + } else { + mptcp_pm_add_addr_echoed(msk, &mp_opt.addr); + mptcp_pm_del_add_timer(msk, &mp_opt.addr, true); + MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_ECHOADD); + } + + if (mp_opt.addr.port) + MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_PORTADD); } - if (mp_opt.addr.port) - MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_PORTADD); - } + if (mp_opt.suboptions & OPTION_MPTCP_RM_ADDR) + mptcp_pm_rm_addr_received(msk, &mp_opt.rm_list); - if (mp_opt.suboptions & OPTION_MPTCP_RM_ADDR) - mptcp_pm_rm_addr_received(msk, &mp_opt.rm_list); + if (mp_opt.suboptions & OPTION_MPTCP_PRIO) { + mptcp_pm_mp_prio_received(sk, mp_opt.backup); + MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPPRIORX); + } - if (mp_opt.suboptions & OPTION_MPTCP_PRIO) { - mptcp_pm_mp_prio_received(sk, mp_opt.backup); - MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPPRIORX); - } + if (mp_opt.suboptions & OPTION_MPTCP_FAIL) { + mptcp_pm_mp_fail_received(sk, mp_opt.fail_seq); + MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPFAILRX); + } - if (mp_opt.suboptions & OPTION_MPTCP_FAIL) { - mptcp_pm_mp_fail_received(sk, mp_opt.fail_seq); - MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPFAILRX); - } + if (mp_opt.suboptions & OPTION_MPTCP_RST) { + subflow->reset_seen = 1; + subflow->reset_reason = mp_opt.reset_reason; + subflow->reset_transient = mp_opt.reset_transient; + } - if (mp_opt.suboptions & OPTION_MPTCP_RST) { - subflow->reset_seen = 1; - subflow->reset_reason = mp_opt.reset_reason; - subflow->reset_transient = mp_opt.reset_transient; + if (!(mp_opt.suboptions & OPTION_MPTCP_DSS)) + return true; } - if (!(mp_opt.suboptions & OPTION_MPTCP_DSS)) - return true; - /* we can't wait for recvmsg() to update the ack_seq, otherwise * monodirectional flows will stuck */ @@ -1179,7 +1181,7 @@ bool mptcp_incoming_options(struct sock *sk, struct sk_buff *skb) memset(mpext, 0, sizeof(*mpext)); - if (mp_opt.use_map) { + if (likely(mp_opt.use_map)) { if (mp_opt.mpc_map) { /* this is an MP_CAPABLE carrying MPTCP data * we know this map the first chunk of data
Most MPTCP packets carries a single MPTCP subption: the DSS containing the mapping for the current packet. Check explicitly for the above, so that is such scenario we replace most conditional statements with a single likely() one. Signed-off-by: Paolo Abeni <pabeni@redhat.com> --- net/mptcp/options.c | 70 +++++++++++++++++++++++---------------------- 1 file changed, 36 insertions(+), 34 deletions(-)