diff mbox series

[net-next,v3,7/9] net: tcp: add skb drop reasons to tcp tw code path

Message ID 20220610034204.67901-8-imagedong@tencent.com (mailing list archive)
State Superseded
Delegated to: Netdev Maintainers
Headers show
Series net: tcp: add skb drop reasons to tcp state change | expand

Checks

Context Check Description
netdev/tree_selection success Clearly marked for net-next, async
netdev/fixes_present success Fixes tag not required for -next series
netdev/subject_prefix success Link
netdev/cover_letter success Series has a cover letter
netdev/patch_count success Link
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 5651 this patch: 5651
netdev/cc_maintainers success CCed 7 of 7 maintainers
netdev/build_clang success Errors and warnings before: 1213 this patch: 1213
netdev/module_param success Was 0 now: 0
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 5800 this patch: 5800
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 152 lines checked
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Menglong Dong June 10, 2022, 3:42 a.m. UTC
From: Menglong Dong <imagedong@tencent.com>

In order to get the reasons of skb drops, add a function argument of
type 'enum skb_drop_reason *reason' to tcp_timewait_state_process().

In the origin code, all packets to time-wait socket are treated as
dropping with kfree_skb(), which can make users confused. Therefore,
we use consume_skb() for the skbs that are 'good'. We can check the
value of 'reason' to decide use kfree_skb() or consume_skb().

The new reason 'TIMEWAIT' is added for the case that the skb is dropped
as the socket in time-wait state.

Reviewed-by: Jiang Biao <benbjiang@tencent.com>
Reviewed-by: Hao Peng <flyingpeng@tencent.com>
Signed-off-by: Menglong Dong <imagedong@tencent.com>
Reported-by: Eric Dumazet <edumazet@google.com>
---
v2:
- skb is not freed on TCP_TW_ACK and 'ret' is not initizalized, fix
  it (Eric Dumazet)
---
 include/net/dropreason.h |  6 ++++++
 include/net/tcp.h        |  7 ++++---
 net/ipv4/tcp_ipv4.c      |  9 ++++++++-
 net/ipv4/tcp_minisocks.c | 24 ++++++++++++++++++++----
 net/ipv6/tcp_ipv6.c      |  8 +++++++-
 5 files changed, 45 insertions(+), 9 deletions(-)

Comments

Eric Dumazet June 10, 2022, 9:04 a.m. UTC | #1
On Thu, Jun 9, 2022 at 8:45 PM <menglong8.dong@gmail.com> wrote:
>
> From: Menglong Dong <imagedong@tencent.com>
>
> In order to get the reasons of skb drops, add a function argument of
> type 'enum skb_drop_reason *reason' to tcp_timewait_state_process().
>
> In the origin code, all packets to time-wait socket are treated as
> dropping with kfree_skb(), which can make users confused. Therefore,
> we use consume_skb() for the skbs that are 'good'. We can check the
> value of 'reason' to decide use kfree_skb() or consume_skb().
>
> The new reason 'TIMEWAIT' is added for the case that the skb is dropped
> as the socket in time-wait state.
>
> Reviewed-by: Jiang Biao <benbjiang@tencent.com>
> Reviewed-by: Hao Peng <flyingpeng@tencent.com>
> Signed-off-by: Menglong Dong <imagedong@tencent.com>
> Reported-by: Eric Dumazet <edumazet@google.com>

I have suggested that consume_skb() could be an alias of
kfree_skb_reason(skb, SKB_NOT_DROPPED),
or something like that.

In order to avoid silly constructs all over the places :

if (reason)
   kfree_skb_reason(skb, reason);
else
  consume_skb(skb);

->

kfree_skb_reason(skb, reason);

> ---
> v2:
> - skb is not freed on TCP_TW_ACK and 'ret' is not initizalized, fix
>   it (Eric Dumazet)
> ---
>  include/net/dropreason.h |  6 ++++++
>  include/net/tcp.h        |  7 ++++---
>  net/ipv4/tcp_ipv4.c      |  9 ++++++++-
>  net/ipv4/tcp_minisocks.c | 24 ++++++++++++++++++++----
>  net/ipv6/tcp_ipv6.c      |  8 +++++++-
>  5 files changed, 45 insertions(+), 9 deletions(-)
>
> diff --git a/include/net/dropreason.h b/include/net/dropreason.h
> index 86e89d3022b5..3c6f8e0f7f16 100644
> --- a/include/net/dropreason.h
> +++ b/include/net/dropreason.h
> @@ -258,6 +258,12 @@ enum skb_drop_reason {
>          * socket is full, corresponding to LINUX_MIB_TCPREQQFULLDROP
>          */
>         SKB_DROP_REASON_TCP_REQQFULLDROP,
> +       /**
> +        * @SKB_DROP_REASON_TIMEWAIT: socket is in time-wait state and all
> +        * packet that received will be treated as 'drop', except a good
> +        * 'SYN' packet
> +        */
> +       SKB_DROP_REASON_TIMEWAIT,
>         /**
>          * @SKB_DROP_REASON_MAX: the maximum of drop reason, which shouldn't be
>          * used as a real 'reason'
> diff --git a/include/net/tcp.h b/include/net/tcp.h
> index 082dd0627e2e..88217b8d95ac 100644
> --- a/include/net/tcp.h
> +++ b/include/net/tcp.h
> @@ -380,9 +380,10 @@ enum tcp_tw_status {
>  };
>
>
> -enum tcp_tw_status tcp_timewait_state_process(struct inet_timewait_sock *tw,
> -                                             struct sk_buff *skb,
> -                                             const struct tcphdr *th);
> +enum tcp_tw_status
> +tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb,
> +                          const struct tcphdr *th,
> +                          enum skb_drop_reason *reason);
>  struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
>                            struct request_sock *req, bool fastopen,
>                            bool *lost_race);
> diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
> index 804fc5e0203e..e7bd2f410a4a 100644
> --- a/net/ipv4/tcp_ipv4.c
> +++ b/net/ipv4/tcp_ipv4.c
> @@ -2134,7 +2134,8 @@ int tcp_v4_rcv(struct sk_buff *skb)
>                 inet_twsk_put(inet_twsk(sk));
>                 goto csum_error;
>         }
> -       switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
> +       switch (tcp_timewait_state_process(inet_twsk(sk), skb, th,
> +                                          &drop_reason)) {
>         case TCP_TW_SYN: {
>                 struct sock *sk2 = inet_lookup_listener(dev_net(skb->dev),
>                                                         &tcp_hashinfo, skb,
> @@ -2150,11 +2151,17 @@ int tcp_v4_rcv(struct sk_buff *skb)
>                         refcounted = false;
>                         goto process;
>                 }
> +               /* TCP_FLAGS or NO_SOCKET? */
> +               SKB_DR_SET(drop_reason, TCP_FLAGS);
>         }
>                 /* to ACK */
>                 fallthrough;
>         case TCP_TW_ACK:
>                 tcp_v4_timewait_ack(sk, skb);
> +               if (!drop_reason) {
> +                       consume_skb(skb);
> +                       return 0;
> +               }

Sorry, this is the kind of change which makes the whole exercise
source of numerous problems later
when backports are needed.

You are sending patches today, but we are not sure you will be willing
to spend days of work and tests to validate
future backports with conflicts.

>                 break;
>         case TCP_TW_RST:
>                 tcp_v4_send_reset(sk, skb);
> diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
> index 1a21018f6f64..329724118b7f 100644
> --- a/net/ipv4/tcp_minisocks.c
> +++ b/net/ipv4/tcp_minisocks.c
> @@ -83,13 +83,15 @@ tcp_timewait_check_oow_rate_limit(struct inet_timewait_sock *tw,
>   */
>  enum tcp_tw_status
>  tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb,
> -                          const struct tcphdr *th)
> +                          const struct tcphdr *th,
> +                          enum skb_drop_reason *reason)
>  {
>         struct tcp_options_received tmp_opt;
>         struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw);
>         bool paws_reject = false;
>
>         tmp_opt.saw_tstamp = 0;
> +       *reason = SKB_DROP_REASON_NOT_SPECIFIED;
>         if (th->doff > (sizeof(*th) >> 2) && tcptw->tw_ts_recent_stamp) {
>                 tcp_parse_options(twsk_net(tw), skb, &tmp_opt, 0, NULL);
>
> @@ -113,11 +115,16 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb,
>                         return tcp_timewait_check_oow_rate_limit(
>                                 tw, skb, LINUX_MIB_TCPACKSKIPPEDFINWAIT2);
>
> -               if (th->rst)
> +               if (th->rst) {
> +                       SKB_DR_SET(*reason, TCP_RESET);
>                         goto kill;
> +               }
>
> -               if (th->syn && !before(TCP_SKB_CB(skb)->seq, tcptw->tw_rcv_nxt))
> +               if (th->syn && !before(TCP_SKB_CB(skb)->seq,
> +                                      tcptw->tw_rcv_nxt)) {
> +                       SKB_DR_SET(*reason, TCP_FLAGS);
>                         return TCP_TW_RST;
> +               }
>
>                 /* Dup ACK? */
>                 if (!th->ack ||
> @@ -143,6 +150,9 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb,
>                 }
>
>                 inet_twsk_reschedule(tw, TCP_TIMEWAIT_LEN);
> +
> +               /* skb should be free normally on this case. */
> +               *reason = SKB_NOT_DROPPED_YET;
>                 return TCP_TW_ACK;
>         }
>
> @@ -174,6 +184,7 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb,
>                          * protocol bug yet.
>                          */
>                         if (twsk_net(tw)->ipv4.sysctl_tcp_rfc1337 == 0) {
> +                               SKB_DR_SET(*reason, TCP_RESET);
>  kill:
>                                 inet_twsk_deschedule_put(tw);
>                                 return TCP_TW_SUCCESS;
> @@ -216,11 +227,14 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb,
>                 if (isn == 0)
>                         isn++;
>                 TCP_SKB_CB(skb)->tcp_tw_isn = isn;
> +               *reason = SKB_NOT_DROPPED_YET;
>                 return TCP_TW_SYN;
>         }
>
> -       if (paws_reject)
> +       if (paws_reject) {
> +               SKB_DR_SET(*reason, TCP_RFC7323_PAWS);
>                 __NET_INC_STATS(twsk_net(tw), LINUX_MIB_PAWSESTABREJECTED);
> +       }
>
>         if (!th->rst) {
>                 /* In this case we must reset the TIMEWAIT timer.
> @@ -232,9 +246,11 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb,
>                 if (paws_reject || th->ack)
>                         inet_twsk_reschedule(tw, TCP_TIMEWAIT_LEN);
>
> +               SKB_DR_OR(*reason, TIMEWAIT);
>                 return tcp_timewait_check_oow_rate_limit(
>                         tw, skb, LINUX_MIB_TCPACKSKIPPEDTIMEWAIT);
>         }
> +       SKB_DR_SET(*reason, TCP_RESET);
>         inet_twsk_put(tw);
>         return TCP_TW_SUCCESS;
>  }
> diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
> index 0d2ba9d90529..41551a3b679b 100644
> --- a/net/ipv6/tcp_ipv6.c
> +++ b/net/ipv6/tcp_ipv6.c
> @@ -1795,7 +1795,8 @@ INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb)
>                 goto csum_error;
>         }
>
> -       switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
> +       switch (tcp_timewait_state_process(inet_twsk(sk), skb, th,
> +                                          &drop_reason)) {
>         case TCP_TW_SYN:
>         {
>                 struct sock *sk2;
> @@ -1815,11 +1816,16 @@ INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb)
>                         refcounted = false;
>                         goto process;
>                 }
> +               SKB_DR_SET(drop_reason, TCP_FLAGS);
>         }
>                 /* to ACK */
>                 fallthrough;
>         case TCP_TW_ACK:
>                 tcp_v6_timewait_ack(sk, skb);
> +               if (!drop_reason) {
> +                       consume_skb(skb);
> +                       return 0;
> +               }
>                 break;
>         case TCP_TW_RST:
>                 tcp_v6_send_reset(sk, skb);
> --
> 2.36.1
>
Menglong Dong June 13, 2022, 2:41 a.m. UTC | #2
On Fri, Jun 10, 2022 at 5:04 PM Eric Dumazet <edumazet@google.com> wrote:
>
> On Thu, Jun 9, 2022 at 8:45 PM <menglong8.dong@gmail.com> wrote:
> >
> > From: Menglong Dong <imagedong@tencent.com>
> >
> > In order to get the reasons of skb drops, add a function argument of
> > type 'enum skb_drop_reason *reason' to tcp_timewait_state_process().
> >
> > In the origin code, all packets to time-wait socket are treated as
> > dropping with kfree_skb(), which can make users confused. Therefore,
> > we use consume_skb() for the skbs that are 'good'. We can check the
> > value of 'reason' to decide use kfree_skb() or consume_skb().
> >
> > The new reason 'TIMEWAIT' is added for the case that the skb is dropped
> > as the socket in time-wait state.
> >
> > Reviewed-by: Jiang Biao <benbjiang@tencent.com>
> > Reviewed-by: Hao Peng <flyingpeng@tencent.com>
> > Signed-off-by: Menglong Dong <imagedong@tencent.com>
> > Reported-by: Eric Dumazet <edumazet@google.com>
>
> I have suggested that consume_skb() could be an alias of
> kfree_skb_reason(skb, SKB_NOT_DROPPED),
> or something like that.
>
> In order to avoid silly constructs all over the places :
>
> if (reason)
>    kfree_skb_reason(skb, reason);
> else
>   consume_skb(skb);
>
> ->
>
> kfree_skb_reason(skb, reason);
>

This seems to be a good idea in some cases. However, I
am a little worried about that SKB_NOT_DROPPED can
be passed to kfree_skb_reason() by mistake, which happened
before. (Maybe I'm just overly worried, as this can be avoided
during we coding)

> > ---
> > v2:
> > - skb is not freed on TCP_TW_ACK and 'ret' is not initizalized, fix
> >   it (Eric Dumazet)
> > ---
> >  include/net/dropreason.h |  6 ++++++
> >  include/net/tcp.h        |  7 ++++---
> >  net/ipv4/tcp_ipv4.c      |  9 ++++++++-
> >  net/ipv4/tcp_minisocks.c | 24 ++++++++++++++++++++----
> >  net/ipv6/tcp_ipv6.c      |  8 +++++++-
> >  5 files changed, 45 insertions(+), 9 deletions(-)
> >
> > diff --git a/include/net/dropreason.h b/include/net/dropreason.h
> > index 86e89d3022b5..3c6f8e0f7f16 100644
> > --- a/include/net/dropreason.h
> > +++ b/include/net/dropreason.h
> > @@ -258,6 +258,12 @@ enum skb_drop_reason {
> >          * socket is full, corresponding to LINUX_MIB_TCPREQQFULLDROP
> >          */
> >         SKB_DROP_REASON_TCP_REQQFULLDROP,
> > +       /**
> > +        * @SKB_DROP_REASON_TIMEWAIT: socket is in time-wait state and all
> > +        * packet that received will be treated as 'drop', except a good
> > +        * 'SYN' packet
> > +        */
> > +       SKB_DROP_REASON_TIMEWAIT,
> >         /**
> >          * @SKB_DROP_REASON_MAX: the maximum of drop reason, which shouldn't be
> >          * used as a real 'reason'
> > diff --git a/include/net/tcp.h b/include/net/tcp.h
> > index 082dd0627e2e..88217b8d95ac 100644
> > --- a/include/net/tcp.h
> > +++ b/include/net/tcp.h
> > @@ -380,9 +380,10 @@ enum tcp_tw_status {
> >  };
> >
> >
> > -enum tcp_tw_status tcp_timewait_state_process(struct inet_timewait_sock *tw,
> > -                                             struct sk_buff *skb,
> > -                                             const struct tcphdr *th);
> > +enum tcp_tw_status
> > +tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb,
> > +                          const struct tcphdr *th,
> > +                          enum skb_drop_reason *reason);
> >  struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
> >                            struct request_sock *req, bool fastopen,
> >                            bool *lost_race);
> > diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
> > index 804fc5e0203e..e7bd2f410a4a 100644
> > --- a/net/ipv4/tcp_ipv4.c
> > +++ b/net/ipv4/tcp_ipv4.c
> > @@ -2134,7 +2134,8 @@ int tcp_v4_rcv(struct sk_buff *skb)
> >                 inet_twsk_put(inet_twsk(sk));
> >                 goto csum_error;
> >         }
> > -       switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
> > +       switch (tcp_timewait_state_process(inet_twsk(sk), skb, th,
> > +                                          &drop_reason)) {
> >         case TCP_TW_SYN: {
> >                 struct sock *sk2 = inet_lookup_listener(dev_net(skb->dev),
> >                                                         &tcp_hashinfo, skb,
> > @@ -2150,11 +2151,17 @@ int tcp_v4_rcv(struct sk_buff *skb)
> >                         refcounted = false;
> >                         goto process;
> >                 }
> > +               /* TCP_FLAGS or NO_SOCKET? */
> > +               SKB_DR_SET(drop_reason, TCP_FLAGS);
> >         }
> >                 /* to ACK */
> >                 fallthrough;
> >         case TCP_TW_ACK:
> >                 tcp_v4_timewait_ack(sk, skb);
> > +               if (!drop_reason) {
> > +                       consume_skb(skb);
> > +                       return 0;
> > +               }
>
> Sorry, this is the kind of change which makes the whole exercise
> source of numerous problems later
> when backports are needed.
>
> You are sending patches today, but we are not sure you will be willing
> to spend days of work and tests to validate
> future backports with conflicts.

With making kfree_skb_reason(skb, SKB_NOT_DROPPED) consume_skb(skb),
it seems that the code here doesn't need to be changed anymore.


>
> >                 break;
> >         case TCP_TW_RST:
> >                 tcp_v4_send_reset(sk, skb);
> > diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
> > index 1a21018f6f64..329724118b7f 100644
> > --- a/net/ipv4/tcp_minisocks.c
> > +++ b/net/ipv4/tcp_minisocks.c
> > @@ -83,13 +83,15 @@ tcp_timewait_check_oow_rate_limit(struct inet_timewait_sock *tw,
> >   */
> >  enum tcp_tw_status
> >  tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb,
> > -                          const struct tcphdr *th)
> > +                          const struct tcphdr *th,
> > +                          enum skb_drop_reason *reason)
> >  {
> >         struct tcp_options_received tmp_opt;
> >         struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw);
> >         bool paws_reject = false;
> >
> >         tmp_opt.saw_tstamp = 0;
> > +       *reason = SKB_DROP_REASON_NOT_SPECIFIED;
> >         if (th->doff > (sizeof(*th) >> 2) && tcptw->tw_ts_recent_stamp) {
> >                 tcp_parse_options(twsk_net(tw), skb, &tmp_opt, 0, NULL);
> >
> > @@ -113,11 +115,16 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb,
> >                         return tcp_timewait_check_oow_rate_limit(
> >                                 tw, skb, LINUX_MIB_TCPACKSKIPPEDFINWAIT2);
> >
> > -               if (th->rst)
> > +               if (th->rst) {
> > +                       SKB_DR_SET(*reason, TCP_RESET);
> >                         goto kill;
> > +               }
> >
> > -               if (th->syn && !before(TCP_SKB_CB(skb)->seq, tcptw->tw_rcv_nxt))
> > +               if (th->syn && !before(TCP_SKB_CB(skb)->seq,
> > +                                      tcptw->tw_rcv_nxt)) {
> > +                       SKB_DR_SET(*reason, TCP_FLAGS);
> >                         return TCP_TW_RST;
> > +               }
> >
> >                 /* Dup ACK? */
> >                 if (!th->ack ||
> > @@ -143,6 +150,9 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb,
> >                 }
> >
> >                 inet_twsk_reschedule(tw, TCP_TIMEWAIT_LEN);
> > +
> > +               /* skb should be free normally on this case. */
> > +               *reason = SKB_NOT_DROPPED_YET;
> >                 return TCP_TW_ACK;
> >         }
> >
> > @@ -174,6 +184,7 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb,
> >                          * protocol bug yet.
> >                          */
> >                         if (twsk_net(tw)->ipv4.sysctl_tcp_rfc1337 == 0) {
> > +                               SKB_DR_SET(*reason, TCP_RESET);
> >  kill:
> >                                 inet_twsk_deschedule_put(tw);
> >                                 return TCP_TW_SUCCESS;
> > @@ -216,11 +227,14 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb,
> >                 if (isn == 0)
> >                         isn++;
> >                 TCP_SKB_CB(skb)->tcp_tw_isn = isn;
> > +               *reason = SKB_NOT_DROPPED_YET;
> >                 return TCP_TW_SYN;
> >         }
> >
> > -       if (paws_reject)
> > +       if (paws_reject) {
> > +               SKB_DR_SET(*reason, TCP_RFC7323_PAWS);
> >                 __NET_INC_STATS(twsk_net(tw), LINUX_MIB_PAWSESTABREJECTED);
> > +       }
> >
> >         if (!th->rst) {
> >                 /* In this case we must reset the TIMEWAIT timer.
> > @@ -232,9 +246,11 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb,
> >                 if (paws_reject || th->ack)
> >                         inet_twsk_reschedule(tw, TCP_TIMEWAIT_LEN);
> >
> > +               SKB_DR_OR(*reason, TIMEWAIT);
> >                 return tcp_timewait_check_oow_rate_limit(
> >                         tw, skb, LINUX_MIB_TCPACKSKIPPEDTIMEWAIT);
> >         }
> > +       SKB_DR_SET(*reason, TCP_RESET);
> >         inet_twsk_put(tw);
> >         return TCP_TW_SUCCESS;
> >  }
> > diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
> > index 0d2ba9d90529..41551a3b679b 100644
> > --- a/net/ipv6/tcp_ipv6.c
> > +++ b/net/ipv6/tcp_ipv6.c
> > @@ -1795,7 +1795,8 @@ INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb)
> >                 goto csum_error;
> >         }
> >
> > -       switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
> > +       switch (tcp_timewait_state_process(inet_twsk(sk), skb, th,
> > +                                          &drop_reason)) {
> >         case TCP_TW_SYN:
> >         {
> >                 struct sock *sk2;
> > @@ -1815,11 +1816,16 @@ INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb)
> >                         refcounted = false;
> >                         goto process;
> >                 }
> > +               SKB_DR_SET(drop_reason, TCP_FLAGS);
> >         }
> >                 /* to ACK */
> >                 fallthrough;
> >         case TCP_TW_ACK:
> >                 tcp_v6_timewait_ack(sk, skb);
> > +               if (!drop_reason) {
> > +                       consume_skb(skb);
> > +                       return 0;
> > +               }
> >                 break;
> >         case TCP_TW_RST:
> >                 tcp_v6_send_reset(sk, skb);
> > --
> > 2.36.1
> >
diff mbox series

Patch

diff --git a/include/net/dropreason.h b/include/net/dropreason.h
index 86e89d3022b5..3c6f8e0f7f16 100644
--- a/include/net/dropreason.h
+++ b/include/net/dropreason.h
@@ -258,6 +258,12 @@  enum skb_drop_reason {
 	 * socket is full, corresponding to LINUX_MIB_TCPREQQFULLDROP
 	 */
 	SKB_DROP_REASON_TCP_REQQFULLDROP,
+	/**
+	 * @SKB_DROP_REASON_TIMEWAIT: socket is in time-wait state and all
+	 * packet that received will be treated as 'drop', except a good
+	 * 'SYN' packet
+	 */
+	SKB_DROP_REASON_TIMEWAIT,
 	/**
 	 * @SKB_DROP_REASON_MAX: the maximum of drop reason, which shouldn't be
 	 * used as a real 'reason'
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 082dd0627e2e..88217b8d95ac 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -380,9 +380,10 @@  enum tcp_tw_status {
 };
 
 
-enum tcp_tw_status tcp_timewait_state_process(struct inet_timewait_sock *tw,
-					      struct sk_buff *skb,
-					      const struct tcphdr *th);
+enum tcp_tw_status
+tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb,
+			   const struct tcphdr *th,
+			   enum skb_drop_reason *reason);
 struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
 			   struct request_sock *req, bool fastopen,
 			   bool *lost_race);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 804fc5e0203e..e7bd2f410a4a 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -2134,7 +2134,8 @@  int tcp_v4_rcv(struct sk_buff *skb)
 		inet_twsk_put(inet_twsk(sk));
 		goto csum_error;
 	}
-	switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
+	switch (tcp_timewait_state_process(inet_twsk(sk), skb, th,
+					   &drop_reason)) {
 	case TCP_TW_SYN: {
 		struct sock *sk2 = inet_lookup_listener(dev_net(skb->dev),
 							&tcp_hashinfo, skb,
@@ -2150,11 +2151,17 @@  int tcp_v4_rcv(struct sk_buff *skb)
 			refcounted = false;
 			goto process;
 		}
+		/* TCP_FLAGS or NO_SOCKET? */
+		SKB_DR_SET(drop_reason, TCP_FLAGS);
 	}
 		/* to ACK */
 		fallthrough;
 	case TCP_TW_ACK:
 		tcp_v4_timewait_ack(sk, skb);
+		if (!drop_reason) {
+			consume_skb(skb);
+			return 0;
+		}
 		break;
 	case TCP_TW_RST:
 		tcp_v4_send_reset(sk, skb);
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 1a21018f6f64..329724118b7f 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -83,13 +83,15 @@  tcp_timewait_check_oow_rate_limit(struct inet_timewait_sock *tw,
  */
 enum tcp_tw_status
 tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb,
-			   const struct tcphdr *th)
+			   const struct tcphdr *th,
+			   enum skb_drop_reason *reason)
 {
 	struct tcp_options_received tmp_opt;
 	struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw);
 	bool paws_reject = false;
 
 	tmp_opt.saw_tstamp = 0;
+	*reason = SKB_DROP_REASON_NOT_SPECIFIED;
 	if (th->doff > (sizeof(*th) >> 2) && tcptw->tw_ts_recent_stamp) {
 		tcp_parse_options(twsk_net(tw), skb, &tmp_opt, 0, NULL);
 
@@ -113,11 +115,16 @@  tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb,
 			return tcp_timewait_check_oow_rate_limit(
 				tw, skb, LINUX_MIB_TCPACKSKIPPEDFINWAIT2);
 
-		if (th->rst)
+		if (th->rst) {
+			SKB_DR_SET(*reason, TCP_RESET);
 			goto kill;
+		}
 
-		if (th->syn && !before(TCP_SKB_CB(skb)->seq, tcptw->tw_rcv_nxt))
+		if (th->syn && !before(TCP_SKB_CB(skb)->seq,
+				       tcptw->tw_rcv_nxt)) {
+			SKB_DR_SET(*reason, TCP_FLAGS);
 			return TCP_TW_RST;
+		}
 
 		/* Dup ACK? */
 		if (!th->ack ||
@@ -143,6 +150,9 @@  tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb,
 		}
 
 		inet_twsk_reschedule(tw, TCP_TIMEWAIT_LEN);
+
+		/* skb should be free normally on this case. */
+		*reason = SKB_NOT_DROPPED_YET;
 		return TCP_TW_ACK;
 	}
 
@@ -174,6 +184,7 @@  tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb,
 			 * protocol bug yet.
 			 */
 			if (twsk_net(tw)->ipv4.sysctl_tcp_rfc1337 == 0) {
+				SKB_DR_SET(*reason, TCP_RESET);
 kill:
 				inet_twsk_deschedule_put(tw);
 				return TCP_TW_SUCCESS;
@@ -216,11 +227,14 @@  tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb,
 		if (isn == 0)
 			isn++;
 		TCP_SKB_CB(skb)->tcp_tw_isn = isn;
+		*reason = SKB_NOT_DROPPED_YET;
 		return TCP_TW_SYN;
 	}
 
-	if (paws_reject)
+	if (paws_reject) {
+		SKB_DR_SET(*reason, TCP_RFC7323_PAWS);
 		__NET_INC_STATS(twsk_net(tw), LINUX_MIB_PAWSESTABREJECTED);
+	}
 
 	if (!th->rst) {
 		/* In this case we must reset the TIMEWAIT timer.
@@ -232,9 +246,11 @@  tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb,
 		if (paws_reject || th->ack)
 			inet_twsk_reschedule(tw, TCP_TIMEWAIT_LEN);
 
+		SKB_DR_OR(*reason, TIMEWAIT);
 		return tcp_timewait_check_oow_rate_limit(
 			tw, skb, LINUX_MIB_TCPACKSKIPPEDTIMEWAIT);
 	}
+	SKB_DR_SET(*reason, TCP_RESET);
 	inet_twsk_put(tw);
 	return TCP_TW_SUCCESS;
 }
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 0d2ba9d90529..41551a3b679b 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1795,7 +1795,8 @@  INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb)
 		goto csum_error;
 	}
 
-	switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
+	switch (tcp_timewait_state_process(inet_twsk(sk), skb, th,
+					   &drop_reason)) {
 	case TCP_TW_SYN:
 	{
 		struct sock *sk2;
@@ -1815,11 +1816,16 @@  INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb)
 			refcounted = false;
 			goto process;
 		}
+		SKB_DR_SET(drop_reason, TCP_FLAGS);
 	}
 		/* to ACK */
 		fallthrough;
 	case TCP_TW_ACK:
 		tcp_v6_timewait_ack(sk, skb);
+		if (!drop_reason) {
+			consume_skb(skb);
+			return 0;
+		}
 		break;
 	case TCP_TW_RST:
 		tcp_v6_send_reset(sk, skb);