Message ID | 20220516034519.184876-9-imagedong@tencent.com (mailing list archive) |
---|---|
State | Superseded |
Delegated to: | Netdev Maintainers |
Headers | show |
Series | net: tcp: add skb drop reasons to tcp state change | expand |
On Sun, May 15, 2022 at 8:46 PM <menglong8.dong@gmail.com> wrote: > > From: Menglong Dong <imagedong@tencent.com> > > In order to get the reasons of skb drops, add a function argument of > type 'enum skb_drop_reason *reason' to tcp_timewait_state_process(). > > In the origin code, all packets to time-wait socket are treated as > dropping with kfree_skb(), which can make users confused. Therefore, > we use consume_skb() for the skbs that are 'good'. We can check the > value of 'reason' to decide use kfree_skb() or consume_skb(). > > The new reason 'TIMEWAIT' is added for the case that the skb is dropped > as the socket in time-wait state. > > Signed-off-by: Menglong Dong <imagedong@tencent.com> > --- > include/linux/skbuff.h | 5 +++++ > include/net/tcp.h | 7 ++++--- > net/ipv4/tcp_ipv4.c | 11 +++++++++-- > net/ipv4/tcp_minisocks.c | 24 ++++++++++++++++++++---- > net/ipv6/tcp_ipv6.c | 10 ++++++++-- > 5 files changed, 46 insertions(+), 11 deletions(-) > > diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h > index 4578bbab5a3e..8d18fc5a5af6 100644 > --- a/include/linux/skbuff.h > +++ b/include/linux/skbuff.h > @@ -560,6 +560,10 @@ struct sk_buff; > * SKB_DROP_REASON_TCP_REQQFULLDROP > * request queue of the listen socket is full, corresponding to > * LINUX_MIB_TCPREQQFULLDROP > + * > + * SKB_DROP_REASON_TIMEWAIT > + * socket is in time-wait state and all packet that received will > + * be treated as 'drop', except a good 'SYN' packet > */ > #define __DEFINE_SKB_DROP_REASON(FN) \ > FN(NOT_SPECIFIED) \ > @@ -631,6 +635,7 @@ struct sk_buff; > FN(TCP_ABORTONDATA) \ > FN(LISTENOVERFLOWS) \ > FN(TCP_REQQFULLDROP) \ > + FN(TIMEWAIT) \ > FN(MAX) > > /* The reason of skb drop, which is used in kfree_skb_reason(). > diff --git a/include/net/tcp.h b/include/net/tcp.h > index 082dd0627e2e..88217b8d95ac 100644 > --- a/include/net/tcp.h > +++ b/include/net/tcp.h > @@ -380,9 +380,10 @@ enum tcp_tw_status { > }; > > > -enum tcp_tw_status tcp_timewait_state_process(struct inet_timewait_sock *tw, > - struct sk_buff *skb, > - const struct tcphdr *th); > +enum tcp_tw_status > +tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb, > + const struct tcphdr *th, > + enum skb_drop_reason *reason); > struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, > struct request_sock *req, bool fastopen, > bool *lost_race); > diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c > index 708f92b03f42..9174ee162633 100644 > --- a/net/ipv4/tcp_ipv4.c > +++ b/net/ipv4/tcp_ipv4.c > @@ -2134,7 +2134,8 @@ int tcp_v4_rcv(struct sk_buff *skb) > inet_twsk_put(inet_twsk(sk)); > goto csum_error; > } > - switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) { > + switch (tcp_timewait_state_process(inet_twsk(sk), skb, th, > + &drop_reason)) { > case TCP_TW_SYN: { > struct sock *sk2 = inet_lookup_listener(dev_net(skb->dev), > &tcp_hashinfo, skb, > @@ -2150,12 +2151,18 @@ int tcp_v4_rcv(struct sk_buff *skb) > refcounted = false; > goto process; > } > + /* TCP_FLAGS or NO_SOCKET? */ > + SKB_DR_SET(drop_reason, TCP_FLAGS); > } > /* to ACK */ > fallthrough; > case TCP_TW_ACK: > tcp_v4_timewait_ack(sk, skb); > - break; > + refcounted = false; > + if (drop_reason) > + goto discard_it; > + else > + goto put_and_return; > case TCP_TW_RST: > tcp_v4_send_reset(sk, skb); > inet_twsk_deschedule_put(inet_twsk(sk)); > diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c > index 1a21018f6f64..329724118b7f 100644 > --- a/net/ipv4/tcp_minisocks.c > +++ b/net/ipv4/tcp_minisocks.c > @@ -83,13 +83,15 @@ tcp_timewait_check_oow_rate_limit(struct inet_timewait_sock *tw, > */ > enum tcp_tw_status > tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb, > - const struct tcphdr *th) > + const struct tcphdr *th, > + enum skb_drop_reason *reason) > { > struct tcp_options_received tmp_opt; > struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw); > bool paws_reject = false; > > tmp_opt.saw_tstamp = 0; > + *reason = SKB_DROP_REASON_NOT_SPECIFIED; > if (th->doff > (sizeof(*th) >> 2) && tcptw->tw_ts_recent_stamp) { > tcp_parse_options(twsk_net(tw), skb, &tmp_opt, 0, NULL); > > @@ -113,11 +115,16 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb, > return tcp_timewait_check_oow_rate_limit( > tw, skb, LINUX_MIB_TCPACKSKIPPEDFINWAIT2); > > - if (th->rst) > + if (th->rst) { > + SKB_DR_SET(*reason, TCP_RESET); > goto kill; > + } > > - if (th->syn && !before(TCP_SKB_CB(skb)->seq, tcptw->tw_rcv_nxt)) > + if (th->syn && !before(TCP_SKB_CB(skb)->seq, > + tcptw->tw_rcv_nxt)) { > + SKB_DR_SET(*reason, TCP_FLAGS); > return TCP_TW_RST; > + } > > /* Dup ACK? */ > if (!th->ack || > @@ -143,6 +150,9 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb, > } > > inet_twsk_reschedule(tw, TCP_TIMEWAIT_LEN); > + > + /* skb should be free normally on this case. */ > + *reason = SKB_NOT_DROPPED_YET; > return TCP_TW_ACK; > } > > @@ -174,6 +184,7 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb, > * protocol bug yet. > */ > if (twsk_net(tw)->ipv4.sysctl_tcp_rfc1337 == 0) { > + SKB_DR_SET(*reason, TCP_RESET); > kill: > inet_twsk_deschedule_put(tw); > return TCP_TW_SUCCESS; > @@ -216,11 +227,14 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb, > if (isn == 0) > isn++; > TCP_SKB_CB(skb)->tcp_tw_isn = isn; > + *reason = SKB_NOT_DROPPED_YET; > return TCP_TW_SYN; > } > > - if (paws_reject) > + if (paws_reject) { > + SKB_DR_SET(*reason, TCP_RFC7323_PAWS); > __NET_INC_STATS(twsk_net(tw), LINUX_MIB_PAWSESTABREJECTED); > + } > > if (!th->rst) { > /* In this case we must reset the TIMEWAIT timer. > @@ -232,9 +246,11 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb, > if (paws_reject || th->ack) > inet_twsk_reschedule(tw, TCP_TIMEWAIT_LEN); > > + SKB_DR_OR(*reason, TIMEWAIT); > return tcp_timewait_check_oow_rate_limit( > tw, skb, LINUX_MIB_TCPACKSKIPPEDTIMEWAIT); > } > + SKB_DR_SET(*reason, TCP_RESET); > inet_twsk_put(tw); > return TCP_TW_SUCCESS; > } > diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c > index 27c51991bd54..5c777006de3d 100644 > --- a/net/ipv6/tcp_ipv6.c > +++ b/net/ipv6/tcp_ipv6.c > @@ -1795,7 +1795,8 @@ INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb) > goto csum_error; > } > > - switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) { > + switch (tcp_timewait_state_process(inet_twsk(sk), skb, th, > + &drop_reason)) { > case TCP_TW_SYN: > { > struct sock *sk2; > @@ -1815,12 +1816,17 @@ INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb) > refcounted = false; > goto process; > } > + SKB_DR_SET(drop_reason, TCP_FLAGS); > } > /* to ACK */ > fallthrough; > case TCP_TW_ACK: > tcp_v6_timewait_ack(sk, skb); > - break; > + refcounted = false; > + if (drop_reason) > + goto discard_it; > + else > + goto put_and_return; My brain exploded. I guarantee you that whoever is going to look at this code in one year will be completely lost. Adding so much complexity is crazy. About: refcounted = false ; This is not going to be used id "goto discard_it;" is taken. If the "goto put_and_return;" is taken, this boils down to: return ret ? -1 : 0; Are you sure ret is even initialized at this point ? Why are we doing this ? We were doing "return 0;" Also, where is skb freed ? Are you calling tcp_v6_timewait_ack(sk, skb) after skb has been freed ??? > case TCP_TW_RST: > tcp_v6_send_reset(sk, skb); > inet_twsk_deschedule_put(inet_twsk(sk)); > -- > 2.36.1 >
On Mon, May 16, 2022 at 12:22 PM Eric Dumazet <edumazet@google.com> wrote: > > On Sun, May 15, 2022 at 8:46 PM <menglong8.dong@gmail.com> wrote: > > > > From: Menglong Dong <imagedong@tencent.com> > > > > In order to get the reasons of skb drops, add a function argument of > > type 'enum skb_drop_reason *reason' to tcp_timewait_state_process(). > > > > In the origin code, all packets to time-wait socket are treated as > > dropping with kfree_skb(), which can make users confused. Therefore, > > we use consume_skb() for the skbs that are 'good'. We can check the > > value of 'reason' to decide use kfree_skb() or consume_skb(). > > > > The new reason 'TIMEWAIT' is added for the case that the skb is dropped > > as the socket in time-wait state. > > > > Signed-off-by: Menglong Dong <imagedong@tencent.com> > > --- > > include/linux/skbuff.h | 5 +++++ > > include/net/tcp.h | 7 ++++--- > > net/ipv4/tcp_ipv4.c | 11 +++++++++-- > > net/ipv4/tcp_minisocks.c | 24 ++++++++++++++++++++---- > > net/ipv6/tcp_ipv6.c | 10 ++++++++-- > > 5 files changed, 46 insertions(+), 11 deletions(-) > > > > diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h > > index 4578bbab5a3e..8d18fc5a5af6 100644 > > --- a/include/linux/skbuff.h > > +++ b/include/linux/skbuff.h > > @@ -560,6 +560,10 @@ struct sk_buff; > > * SKB_DROP_REASON_TCP_REQQFULLDROP > > * request queue of the listen socket is full, corresponding to > > * LINUX_MIB_TCPREQQFULLDROP > > + * > > + * SKB_DROP_REASON_TIMEWAIT > > + * socket is in time-wait state and all packet that received will > > + * be treated as 'drop', except a good 'SYN' packet > > */ > > #define __DEFINE_SKB_DROP_REASON(FN) \ > > FN(NOT_SPECIFIED) \ > > @@ -631,6 +635,7 @@ struct sk_buff; > > FN(TCP_ABORTONDATA) \ > > FN(LISTENOVERFLOWS) \ > > FN(TCP_REQQFULLDROP) \ > > + FN(TIMEWAIT) \ > > FN(MAX) > > > > /* The reason of skb drop, which is used in kfree_skb_reason(). > > diff --git a/include/net/tcp.h b/include/net/tcp.h > > index 082dd0627e2e..88217b8d95ac 100644 > > --- a/include/net/tcp.h > > +++ b/include/net/tcp.h > > @@ -380,9 +380,10 @@ enum tcp_tw_status { > > }; > > > > > > -enum tcp_tw_status tcp_timewait_state_process(struct inet_timewait_sock *tw, > > - struct sk_buff *skb, > > - const struct tcphdr *th); > > +enum tcp_tw_status > > +tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb, > > + const struct tcphdr *th, > > + enum skb_drop_reason *reason); > > struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, > > struct request_sock *req, bool fastopen, > > bool *lost_race); > > diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c > > index 708f92b03f42..9174ee162633 100644 > > --- a/net/ipv4/tcp_ipv4.c > > +++ b/net/ipv4/tcp_ipv4.c > > @@ -2134,7 +2134,8 @@ int tcp_v4_rcv(struct sk_buff *skb) > > inet_twsk_put(inet_twsk(sk)); > > goto csum_error; > > } > > - switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) { > > + switch (tcp_timewait_state_process(inet_twsk(sk), skb, th, > > + &drop_reason)) { > > case TCP_TW_SYN: { > > struct sock *sk2 = inet_lookup_listener(dev_net(skb->dev), > > &tcp_hashinfo, skb, > > @@ -2150,12 +2151,18 @@ int tcp_v4_rcv(struct sk_buff *skb) > > refcounted = false; > > goto process; > > } > > + /* TCP_FLAGS or NO_SOCKET? */ > > + SKB_DR_SET(drop_reason, TCP_FLAGS); > > } > > /* to ACK */ > > fallthrough; > > case TCP_TW_ACK: > > tcp_v4_timewait_ack(sk, skb); > > - break; > > + refcounted = false; > > + if (drop_reason) > > + goto discard_it; > > + else > > + goto put_and_return; > > case TCP_TW_RST: > > tcp_v4_send_reset(sk, skb); > > inet_twsk_deschedule_put(inet_twsk(sk)); > > diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c > > index 1a21018f6f64..329724118b7f 100644 > > --- a/net/ipv4/tcp_minisocks.c > > +++ b/net/ipv4/tcp_minisocks.c > > @@ -83,13 +83,15 @@ tcp_timewait_check_oow_rate_limit(struct inet_timewait_sock *tw, > > */ > > enum tcp_tw_status > > tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb, > > - const struct tcphdr *th) > > + const struct tcphdr *th, > > + enum skb_drop_reason *reason) > > { > > struct tcp_options_received tmp_opt; > > struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw); > > bool paws_reject = false; > > > > tmp_opt.saw_tstamp = 0; > > + *reason = SKB_DROP_REASON_NOT_SPECIFIED; > > if (th->doff > (sizeof(*th) >> 2) && tcptw->tw_ts_recent_stamp) { > > tcp_parse_options(twsk_net(tw), skb, &tmp_opt, 0, NULL); > > > > @@ -113,11 +115,16 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb, > > return tcp_timewait_check_oow_rate_limit( > > tw, skb, LINUX_MIB_TCPACKSKIPPEDFINWAIT2); > > > > - if (th->rst) > > + if (th->rst) { > > + SKB_DR_SET(*reason, TCP_RESET); > > goto kill; > > + } > > > > - if (th->syn && !before(TCP_SKB_CB(skb)->seq, tcptw->tw_rcv_nxt)) > > + if (th->syn && !before(TCP_SKB_CB(skb)->seq, > > + tcptw->tw_rcv_nxt)) { > > + SKB_DR_SET(*reason, TCP_FLAGS); > > return TCP_TW_RST; > > + } > > > > /* Dup ACK? */ > > if (!th->ack || > > @@ -143,6 +150,9 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb, > > } > > > > inet_twsk_reschedule(tw, TCP_TIMEWAIT_LEN); > > + > > + /* skb should be free normally on this case. */ > > + *reason = SKB_NOT_DROPPED_YET; > > return TCP_TW_ACK; > > } > > > > @@ -174,6 +184,7 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb, > > * protocol bug yet. > > */ > > if (twsk_net(tw)->ipv4.sysctl_tcp_rfc1337 == 0) { > > + SKB_DR_SET(*reason, TCP_RESET); > > kill: > > inet_twsk_deschedule_put(tw); > > return TCP_TW_SUCCESS; > > @@ -216,11 +227,14 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb, > > if (isn == 0) > > isn++; > > TCP_SKB_CB(skb)->tcp_tw_isn = isn; > > + *reason = SKB_NOT_DROPPED_YET; > > return TCP_TW_SYN; > > } > > > > - if (paws_reject) > > + if (paws_reject) { > > + SKB_DR_SET(*reason, TCP_RFC7323_PAWS); > > __NET_INC_STATS(twsk_net(tw), LINUX_MIB_PAWSESTABREJECTED); > > + } > > > > if (!th->rst) { > > /* In this case we must reset the TIMEWAIT timer. > > @@ -232,9 +246,11 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb, > > if (paws_reject || th->ack) > > inet_twsk_reschedule(tw, TCP_TIMEWAIT_LEN); > > > > + SKB_DR_OR(*reason, TIMEWAIT); > > return tcp_timewait_check_oow_rate_limit( > > tw, skb, LINUX_MIB_TCPACKSKIPPEDTIMEWAIT); > > } > > + SKB_DR_SET(*reason, TCP_RESET); > > inet_twsk_put(tw); > > return TCP_TW_SUCCESS; > > } > > diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c > > index 27c51991bd54..5c777006de3d 100644 > > --- a/net/ipv6/tcp_ipv6.c > > +++ b/net/ipv6/tcp_ipv6.c > > @@ -1795,7 +1795,8 @@ INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb) > > goto csum_error; > > } > > > > - switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) { > > + switch (tcp_timewait_state_process(inet_twsk(sk), skb, th, > > + &drop_reason)) { > > case TCP_TW_SYN: > > { > > struct sock *sk2; > > @@ -1815,12 +1816,17 @@ INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb) > > refcounted = false; > > goto process; > > } > > + SKB_DR_SET(drop_reason, TCP_FLAGS); > > } > > /* to ACK */ > > fallthrough; > > case TCP_TW_ACK: > > tcp_v6_timewait_ack(sk, skb); > > - break; > > + refcounted = false; > > + if (drop_reason) > > + goto discard_it; > > + else > > + goto put_and_return; > > My brain exploded. > I guarantee you that whoever is going to look at this code in one year > will be completely lost. > Adding so much complexity is crazy. > Yeah, I'm almost lost while adding this code. > About: refcounted = false ; > > This is not going to be used id "goto discard_it;" is taken. > > If the "goto put_and_return;" is taken, this boils down to: > > return ret ? -1 : 0; > > Are you sure ret is even initialized at this point ? > About this part, it seems it's my mistake. The correct code should be: case TCP_TW_ACK: tcp_v6_timewait_ack(sk, skb); - break; + if (drop_reason) { + goto discard_it; + } else { + consume_skb(skb); /* free skb here */ + return 0; + } As you said, it's a little complex. > Why are we doing this ? We were doing "return 0;" > > Also, where is skb freed ? > > Are you calling tcp_v6_timewait_ack(sk, skb) after skb has been freed ??? > > > > > > case TCP_TW_RST: > > tcp_v6_send_reset(sk, skb); > > inet_twsk_deschedule_put(inet_twsk(sk)); > > -- > > 2.36.1 > >
Hi, Thank you for the patch! Perhaps something to improve: [auto build test WARNING on net-next/master] url: https://github.com/intel-lab-lkp/linux/commits/menglong8-dong-gmail-com/net-tcp-add-skb-drop-reasons-to-tcp-state-change/20220516-114934 base: https://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next.git d9713088158b23973266e07fdc85ff7d68791a8c config: mips-mtx1_defconfig (https://download.01.org/0day-ci/archive/20220516/202205162352.OThc1nAw-lkp@intel.com/config) compiler: clang version 15.0.0 (https://github.com/llvm/llvm-project 853fa8ee225edf2d0de94b0dcbd31bea916e825e) reproduce (this is a W=1 build): wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross chmod +x ~/bin/make.cross # install mips cross compiling tool for clang build # apt-get install binutils-mips-linux-gnu # https://github.com/intel-lab-lkp/linux/commit/6a657e07d2943a7df8277769f29624ea28599e09 git remote add linux-review https://github.com/intel-lab-lkp/linux git fetch --no-tags linux-review menglong8-dong-gmail-com/net-tcp-add-skb-drop-reasons-to-tcp-state-change/20220516-114934 git checkout 6a657e07d2943a7df8277769f29624ea28599e09 # save the config file mkdir build_dir && cp config build_dir/.config COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross W=1 O=build_dir ARCH=mips SHELL=/bin/bash net/ipv4/ net/ipv6/ If you fix the issue, kindly add following tag as appropriate Reported-by: kernel test robot <lkp@intel.com> All warnings (new ones prefixed by >>): >> net/ipv4/tcp_ipv4.c:2161:7: warning: variable 'ret' is used uninitialized whenever 'if' condition is false [-Wsometimes-uninitialized] if (drop_reason) ^~~~~~~~~~~ net/ipv4/tcp_ipv4.c:2092:9: note: uninitialized use occurs here return ret; ^~~ net/ipv4/tcp_ipv4.c:2161:3: note: remove the 'if' if its condition is always true if (drop_reason) ^~~~~~~~~~~~~~~~ net/ipv4/tcp_ipv4.c:1926:9: note: initialize the variable 'ret' to silence this warning int ret; ^ = 0 1 warning generated. -- >> net/ipv6/tcp_ipv6.c:1825:7: warning: variable 'ret' is used uninitialized whenever 'if' condition is false [-Wsometimes-uninitialized] if (drop_reason) ^~~~~~~~~~~ net/ipv6/tcp_ipv6.c:1753:9: note: uninitialized use occurs here return ret ? -1 : 0; ^~~ net/ipv6/tcp_ipv6.c:1825:3: note: remove the 'if' if its condition is always true if (drop_reason) ^~~~~~~~~~~~~~~~ net/ipv6/tcp_ipv6.c:1594:9: note: initialize the variable 'ret' to silence this warning int ret; ^ = 0 1 warning generated. vim +2161 net/ipv4/tcp_ipv4.c 1911 1912 /* 1913 * From tcp_input.c 1914 */ 1915 1916 int tcp_v4_rcv(struct sk_buff *skb) 1917 { 1918 struct net *net = dev_net(skb->dev); 1919 enum skb_drop_reason drop_reason; 1920 int sdif = inet_sdif(skb); 1921 int dif = inet_iif(skb); 1922 const struct iphdr *iph; 1923 const struct tcphdr *th; 1924 bool refcounted; 1925 struct sock *sk; 1926 int ret; 1927 1928 drop_reason = SKB_DROP_REASON_NOT_SPECIFIED; 1929 if (skb->pkt_type != PACKET_HOST) 1930 goto discard_it; 1931 1932 /* Count it even if it's bad */ 1933 __TCP_INC_STATS(net, TCP_MIB_INSEGS); 1934 1935 if (!pskb_may_pull(skb, sizeof(struct tcphdr))) 1936 goto discard_it; 1937 1938 th = (const struct tcphdr *)skb->data; 1939 1940 if (unlikely(th->doff < sizeof(struct tcphdr) / 4)) { 1941 drop_reason = SKB_DROP_REASON_PKT_TOO_SMALL; 1942 goto bad_packet; 1943 } 1944 if (!pskb_may_pull(skb, th->doff * 4)) 1945 goto discard_it; 1946 1947 /* An explanation is required here, I think. 1948 * Packet length and doff are validated by header prediction, 1949 * provided case of th->doff==0 is eliminated. 1950 * So, we defer the checks. */ 1951 1952 if (skb_checksum_init(skb, IPPROTO_TCP, inet_compute_pseudo)) 1953 goto csum_error; 1954 1955 th = (const struct tcphdr *)skb->data; 1956 iph = ip_hdr(skb); 1957 lookup: 1958 sk = __inet_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th), th->source, 1959 th->dest, sdif, &refcounted); 1960 if (!sk) 1961 goto no_tcp_socket; 1962 1963 process: 1964 if (sk->sk_state == TCP_TIME_WAIT) 1965 goto do_time_wait; 1966 1967 if (sk->sk_state == TCP_NEW_SYN_RECV) { 1968 struct request_sock *req = inet_reqsk(sk); 1969 bool req_stolen = false; 1970 struct sock *nsk; 1971 1972 sk = req->rsk_listener; 1973 drop_reason = tcp_inbound_md5_hash(sk, skb, 1974 &iph->saddr, &iph->daddr, 1975 AF_INET, dif, sdif); 1976 if (unlikely(drop_reason)) { 1977 sk_drops_add(sk, skb); 1978 reqsk_put(req); 1979 goto discard_it; 1980 } 1981 if (tcp_checksum_complete(skb)) { 1982 reqsk_put(req); 1983 goto csum_error; 1984 } 1985 if (unlikely(sk->sk_state != TCP_LISTEN)) { 1986 nsk = reuseport_migrate_sock(sk, req_to_sk(req), skb); 1987 if (!nsk) { 1988 inet_csk_reqsk_queue_drop_and_put(sk, req); 1989 goto lookup; 1990 } 1991 sk = nsk; 1992 /* reuseport_migrate_sock() has already held one sk_refcnt 1993 * before returning. 1994 */ 1995 } else { 1996 /* We own a reference on the listener, increase it again 1997 * as we might lose it too soon. 1998 */ 1999 sock_hold(sk); 2000 } 2001 refcounted = true; 2002 nsk = NULL; 2003 if (!tcp_filter(sk, skb)) { 2004 th = (const struct tcphdr *)skb->data; 2005 iph = ip_hdr(skb); 2006 tcp_v4_fill_cb(skb, iph, th); 2007 nsk = tcp_check_req(sk, skb, req, false, &req_stolen); 2008 } else { 2009 drop_reason = SKB_DROP_REASON_SOCKET_FILTER; 2010 } 2011 if (!nsk) { 2012 reqsk_put(req); 2013 if (req_stolen) { 2014 /* Another cpu got exclusive access to req 2015 * and created a full blown socket. 2016 * Try to feed this packet to this socket 2017 * instead of discarding it. 2018 */ 2019 tcp_v4_restore_cb(skb); 2020 sock_put(sk); 2021 goto lookup; 2022 } 2023 goto discard_and_relse; 2024 } 2025 if (nsk == sk) { 2026 reqsk_put(req); 2027 tcp_v4_restore_cb(skb); 2028 } else { 2029 drop_reason = tcp_child_process(sk, nsk, skb); 2030 if (drop_reason) { 2031 tcp_v4_send_reset(nsk, skb); 2032 goto discard_and_relse; 2033 } else { 2034 sock_put(sk); 2035 return 0; 2036 } 2037 } 2038 } 2039 2040 if (static_branch_unlikely(&ip4_min_ttl)) { 2041 /* min_ttl can be changed concurrently from do_ip_setsockopt() */ 2042 if (unlikely(iph->ttl < READ_ONCE(inet_sk(sk)->min_ttl))) { 2043 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP); 2044 goto discard_and_relse; 2045 } 2046 } 2047 2048 if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) { 2049 drop_reason = SKB_DROP_REASON_XFRM_POLICY; 2050 goto discard_and_relse; 2051 } 2052 2053 drop_reason = tcp_inbound_md5_hash(sk, skb, &iph->saddr, 2054 &iph->daddr, AF_INET, dif, sdif); 2055 if (drop_reason) 2056 goto discard_and_relse; 2057 2058 nf_reset_ct(skb); 2059 2060 if (tcp_filter(sk, skb)) { 2061 drop_reason = SKB_DROP_REASON_SOCKET_FILTER; 2062 goto discard_and_relse; 2063 } 2064 th = (const struct tcphdr *)skb->data; 2065 iph = ip_hdr(skb); 2066 tcp_v4_fill_cb(skb, iph, th); 2067 2068 skb->dev = NULL; 2069 2070 if (sk->sk_state == TCP_LISTEN) { 2071 ret = tcp_v4_do_rcv(sk, skb); 2072 goto put_and_return; 2073 } 2074 2075 sk_incoming_cpu_update(sk); 2076 2077 bh_lock_sock_nested(sk); 2078 tcp_segs_in(tcp_sk(sk), skb); 2079 ret = 0; 2080 if (!sock_owned_by_user(sk)) { 2081 ret = tcp_v4_do_rcv(sk, skb); 2082 } else { 2083 if (tcp_add_backlog(sk, skb, &drop_reason)) 2084 goto discard_and_relse; 2085 } 2086 bh_unlock_sock(sk); 2087 2088 put_and_return: 2089 if (refcounted) 2090 sock_put(sk); 2091 2092 return ret; 2093 2094 no_tcp_socket: 2095 drop_reason = SKB_DROP_REASON_NO_SOCKET; 2096 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) 2097 goto discard_it; 2098 2099 tcp_v4_fill_cb(skb, iph, th); 2100 2101 if (tcp_checksum_complete(skb)) { 2102 csum_error: 2103 drop_reason = SKB_DROP_REASON_TCP_CSUM; 2104 trace_tcp_bad_csum(skb); 2105 __TCP_INC_STATS(net, TCP_MIB_CSUMERRORS); 2106 bad_packet: 2107 __TCP_INC_STATS(net, TCP_MIB_INERRS); 2108 } else { 2109 tcp_v4_send_reset(NULL, skb); 2110 } 2111 2112 discard_it: 2113 /* Discard frame. */ 2114 kfree_skb_reason(skb, drop_reason); 2115 return 0; 2116 2117 discard_and_relse: 2118 sk_drops_add(sk, skb); 2119 if (refcounted) 2120 sock_put(sk); 2121 goto discard_it; 2122 2123 do_time_wait: 2124 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) { 2125 drop_reason = SKB_DROP_REASON_XFRM_POLICY; 2126 inet_twsk_put(inet_twsk(sk)); 2127 goto discard_it; 2128 } 2129 2130 tcp_v4_fill_cb(skb, iph, th); 2131 2132 if (tcp_checksum_complete(skb)) { 2133 inet_twsk_put(inet_twsk(sk)); 2134 goto csum_error; 2135 } 2136 switch (tcp_timewait_state_process(inet_twsk(sk), skb, th, 2137 &drop_reason)) { 2138 case TCP_TW_SYN: { 2139 struct sock *sk2 = inet_lookup_listener(dev_net(skb->dev), 2140 &tcp_hashinfo, skb, 2141 __tcp_hdrlen(th), 2142 iph->saddr, th->source, 2143 iph->daddr, th->dest, 2144 inet_iif(skb), 2145 sdif); 2146 if (sk2) { 2147 inet_twsk_deschedule_put(inet_twsk(sk)); 2148 sk = sk2; 2149 tcp_v4_restore_cb(skb); 2150 refcounted = false; 2151 goto process; 2152 } 2153 /* TCP_FLAGS or NO_SOCKET? */ 2154 SKB_DR_SET(drop_reason, TCP_FLAGS); 2155 } 2156 /* to ACK */ 2157 fallthrough; 2158 case TCP_TW_ACK: 2159 tcp_v4_timewait_ack(sk, skb); 2160 refcounted = false; > 2161 if (drop_reason) 2162 goto discard_it; 2163 else 2164 goto put_and_return; 2165 case TCP_TW_RST: 2166 tcp_v4_send_reset(sk, skb); 2167 inet_twsk_deschedule_put(inet_twsk(sk)); 2168 goto discard_it; 2169 case TCP_TW_SUCCESS:; 2170 } 2171 goto discard_it; 2172 } 2173
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 4578bbab5a3e..8d18fc5a5af6 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -560,6 +560,10 @@ struct sk_buff; * SKB_DROP_REASON_TCP_REQQFULLDROP * request queue of the listen socket is full, corresponding to * LINUX_MIB_TCPREQQFULLDROP + * + * SKB_DROP_REASON_TIMEWAIT + * socket is in time-wait state and all packet that received will + * be treated as 'drop', except a good 'SYN' packet */ #define __DEFINE_SKB_DROP_REASON(FN) \ FN(NOT_SPECIFIED) \ @@ -631,6 +635,7 @@ struct sk_buff; FN(TCP_ABORTONDATA) \ FN(LISTENOVERFLOWS) \ FN(TCP_REQQFULLDROP) \ + FN(TIMEWAIT) \ FN(MAX) /* The reason of skb drop, which is used in kfree_skb_reason(). diff --git a/include/net/tcp.h b/include/net/tcp.h index 082dd0627e2e..88217b8d95ac 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -380,9 +380,10 @@ enum tcp_tw_status { }; -enum tcp_tw_status tcp_timewait_state_process(struct inet_timewait_sock *tw, - struct sk_buff *skb, - const struct tcphdr *th); +enum tcp_tw_status +tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb, + const struct tcphdr *th, + enum skb_drop_reason *reason); struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, struct request_sock *req, bool fastopen, bool *lost_race); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 708f92b03f42..9174ee162633 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2134,7 +2134,8 @@ int tcp_v4_rcv(struct sk_buff *skb) inet_twsk_put(inet_twsk(sk)); goto csum_error; } - switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) { + switch (tcp_timewait_state_process(inet_twsk(sk), skb, th, + &drop_reason)) { case TCP_TW_SYN: { struct sock *sk2 = inet_lookup_listener(dev_net(skb->dev), &tcp_hashinfo, skb, @@ -2150,12 +2151,18 @@ int tcp_v4_rcv(struct sk_buff *skb) refcounted = false; goto process; } + /* TCP_FLAGS or NO_SOCKET? */ + SKB_DR_SET(drop_reason, TCP_FLAGS); } /* to ACK */ fallthrough; case TCP_TW_ACK: tcp_v4_timewait_ack(sk, skb); - break; + refcounted = false; + if (drop_reason) + goto discard_it; + else + goto put_and_return; case TCP_TW_RST: tcp_v4_send_reset(sk, skb); inet_twsk_deschedule_put(inet_twsk(sk)); diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 1a21018f6f64..329724118b7f 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -83,13 +83,15 @@ tcp_timewait_check_oow_rate_limit(struct inet_timewait_sock *tw, */ enum tcp_tw_status tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb, - const struct tcphdr *th) + const struct tcphdr *th, + enum skb_drop_reason *reason) { struct tcp_options_received tmp_opt; struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw); bool paws_reject = false; tmp_opt.saw_tstamp = 0; + *reason = SKB_DROP_REASON_NOT_SPECIFIED; if (th->doff > (sizeof(*th) >> 2) && tcptw->tw_ts_recent_stamp) { tcp_parse_options(twsk_net(tw), skb, &tmp_opt, 0, NULL); @@ -113,11 +115,16 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb, return tcp_timewait_check_oow_rate_limit( tw, skb, LINUX_MIB_TCPACKSKIPPEDFINWAIT2); - if (th->rst) + if (th->rst) { + SKB_DR_SET(*reason, TCP_RESET); goto kill; + } - if (th->syn && !before(TCP_SKB_CB(skb)->seq, tcptw->tw_rcv_nxt)) + if (th->syn && !before(TCP_SKB_CB(skb)->seq, + tcptw->tw_rcv_nxt)) { + SKB_DR_SET(*reason, TCP_FLAGS); return TCP_TW_RST; + } /* Dup ACK? */ if (!th->ack || @@ -143,6 +150,9 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb, } inet_twsk_reschedule(tw, TCP_TIMEWAIT_LEN); + + /* skb should be free normally on this case. */ + *reason = SKB_NOT_DROPPED_YET; return TCP_TW_ACK; } @@ -174,6 +184,7 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb, * protocol bug yet. */ if (twsk_net(tw)->ipv4.sysctl_tcp_rfc1337 == 0) { + SKB_DR_SET(*reason, TCP_RESET); kill: inet_twsk_deschedule_put(tw); return TCP_TW_SUCCESS; @@ -216,11 +227,14 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb, if (isn == 0) isn++; TCP_SKB_CB(skb)->tcp_tw_isn = isn; + *reason = SKB_NOT_DROPPED_YET; return TCP_TW_SYN; } - if (paws_reject) + if (paws_reject) { + SKB_DR_SET(*reason, TCP_RFC7323_PAWS); __NET_INC_STATS(twsk_net(tw), LINUX_MIB_PAWSESTABREJECTED); + } if (!th->rst) { /* In this case we must reset the TIMEWAIT timer. @@ -232,9 +246,11 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb, if (paws_reject || th->ack) inet_twsk_reschedule(tw, TCP_TIMEWAIT_LEN); + SKB_DR_OR(*reason, TIMEWAIT); return tcp_timewait_check_oow_rate_limit( tw, skb, LINUX_MIB_TCPACKSKIPPEDTIMEWAIT); } + SKB_DR_SET(*reason, TCP_RESET); inet_twsk_put(tw); return TCP_TW_SUCCESS; } diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 27c51991bd54..5c777006de3d 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1795,7 +1795,8 @@ INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb) goto csum_error; } - switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) { + switch (tcp_timewait_state_process(inet_twsk(sk), skb, th, + &drop_reason)) { case TCP_TW_SYN: { struct sock *sk2; @@ -1815,12 +1816,17 @@ INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb) refcounted = false; goto process; } + SKB_DR_SET(drop_reason, TCP_FLAGS); } /* to ACK */ fallthrough; case TCP_TW_ACK: tcp_v6_timewait_ack(sk, skb); - break; + refcounted = false; + if (drop_reason) + goto discard_it; + else + goto put_and_return; case TCP_TW_RST: tcp_v6_send_reset(sk, skb); inet_twsk_deschedule_put(inet_twsk(sk));