diff mbox series

[net-next,2/2] tcp: factorize logic into tcp_epollin_ready()

Message ID 20210212232214.2869897-3-eric.dumazet@gmail.com (mailing list archive)
State Accepted
Delegated to: Netdev Maintainers
Headers show
Series tcp: mem pressure vs SO_RCVLOWAT | expand

Checks

Context Check Description
netdev/cover_letter success Link
netdev/fixes_present success Link
netdev/patch_count success Link
netdev/tree_selection success Clearly marked for net-next
netdev/subject_prefix success Link
netdev/cc_maintainers warning 2 maintainers not CCed: yoshfuji@linux-ipv6.org dsahern@kernel.org
netdev/source_inline success Was 1 now: 0
netdev/verify_signedoff success Link
netdev/module_param success Was 0 now: 0
netdev/build_32bit success Errors and warnings before: 1496 this patch: 1496
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/verify_fixes success Link
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 66 lines checked
netdev/build_allmodconfig_warn success Errors and warnings before: 1498 this patch: 1498
netdev/header_inline success Link
netdev/stable success Stable not CCed

Commit Message

Eric Dumazet Feb. 12, 2021, 11:22 p.m. UTC
From: Eric Dumazet <edumazet@google.com>

Both tcp_data_ready() and tcp_stream_is_readable() share the same logic.

Add tcp_epollin_ready() helper to avoid duplication.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Arjun Roy <arjunroy@google.com>
Cc: Wei Wang <weiwan@google.com>
---
 include/net/tcp.h    | 12 ++++++++++++
 net/ipv4/tcp.c       | 16 ++++------------
 net/ipv4/tcp_input.c | 11 ++---------
 3 files changed, 18 insertions(+), 21 deletions(-)

Comments

Wei Wang Feb. 13, 2021, 12:30 a.m. UTC | #1
On Fri, Feb 12, 2021 at 3:22 PM Eric Dumazet <eric.dumazet@gmail.com> wrote:
>
> From: Eric Dumazet <edumazet@google.com>
>
> Both tcp_data_ready() and tcp_stream_is_readable() share the same logic.
>
> Add tcp_epollin_ready() helper to avoid duplication.
>
> Signed-off-by: Eric Dumazet <edumazet@google.com>
> Cc: Arjun Roy <arjunroy@google.com>
> Cc: Wei Wang <weiwan@google.com>
> ---
>  include/net/tcp.h    | 12 ++++++++++++
>  net/ipv4/tcp.c       | 16 ++++------------
>  net/ipv4/tcp_input.c | 11 ++---------
>  3 files changed, 18 insertions(+), 21 deletions(-)
>
> diff --git a/include/net/tcp.h b/include/net/tcp.h
> index 244208f6f6c2ace87920b633e469421f557427a6..484eb2362645fd478f59b26b42457ecf4510eb14 100644
> --- a/include/net/tcp.h
> +++ b/include/net/tcp.h
> @@ -1442,6 +1442,18 @@ static inline bool tcp_rmem_pressure(const struct sock *sk)
>         return atomic_read(&sk->sk_rmem_alloc) > threshold;
>  }
>
> +static inline bool tcp_epollin_ready(const struct sock *sk, int target)
> +{
> +       const struct tcp_sock *tp = tcp_sk(sk);
> +       int avail = READ_ONCE(tp->rcv_nxt) - READ_ONCE(tp->copied_seq);
> +
> +       if (avail <= 0)
> +               return false;
> +
> +       return (avail >= target) || tcp_rmem_pressure(sk) ||
> +              (tcp_receive_window(tp) <= inet_csk(sk)->icsk_ack.rcv_mss);
> +}
> +
>  extern void tcp_openreq_init_rwin(struct request_sock *req,
>                                   const struct sock *sk_listener,
>                                   const struct dst_entry *dst);
> diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
> index 9896ca10bb340924b779cb6a7606d57fdd5c3357..7a6b58ae408d1fb1e5536ccfed8215be123f3b57 100644
> --- a/net/ipv4/tcp.c
> +++ b/net/ipv4/tcp.c
> @@ -481,19 +481,11 @@ static void tcp_tx_timestamp(struct sock *sk, u16 tsflags)
>         }
>  }
>
> -static inline bool tcp_stream_is_readable(const struct tcp_sock *tp,
> -                                         int target, struct sock *sk)
> +static bool tcp_stream_is_readable(struct sock *sk, int target)
>  {
> -       int avail = READ_ONCE(tp->rcv_nxt) - READ_ONCE(tp->copied_seq);
> +       if (tcp_epollin_ready(sk, target))
> +               return true;
>
> -       if (avail > 0) {
> -               if (avail >= target)
> -                       return true;
> -               if (tcp_rmem_pressure(sk))
> -                       return true;
> -               if (tcp_receive_window(tp) <= inet_csk(sk)->icsk_ack.rcv_mss)
> -                       return true;
> -       }
>         if (sk->sk_prot->stream_memory_read)
>                 return sk->sk_prot->stream_memory_read(sk);
>         return false;
> @@ -568,7 +560,7 @@ __poll_t tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
>                     tp->urg_data)
>                         target++;
>
> -               if (tcp_stream_is_readable(tp, target, sk))
> +               if (tcp_stream_is_readable(sk, target))
>                         mask |= EPOLLIN | EPOLLRDNORM;
>
>                 if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
> diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
> index a8f8f98159531e5d1c80660972148986f6acd20a..e32a7056cb7640c67ef2d6a4d9484684d2602fcd 100644
> --- a/net/ipv4/tcp_input.c
> +++ b/net/ipv4/tcp_input.c
> @@ -4924,15 +4924,8 @@ int tcp_send_rcvq(struct sock *sk, struct msghdr *msg, size_t size)
>
>  void tcp_data_ready(struct sock *sk)
>  {
> -       const struct tcp_sock *tp = tcp_sk(sk);
> -       int avail = tp->rcv_nxt - tp->copied_seq;
> -
> -       if (avail < sk->sk_rcvlowat && !tcp_rmem_pressure(sk) &&
> -           !sock_flag(sk, SOCK_DONE) &&

Seems "!sock_flag(sk, SOCK_DONE)" is not checked in
tcp_epollin_read(). Does it matter?

> -           tcp_receive_window(tp) > inet_csk(sk)->icsk_ack.rcv_mss)
> -               return;
> -
> -       sk->sk_data_ready(sk);
> +       if (tcp_epollin_ready(sk, sk->sk_rcvlowat))
> +               sk->sk_data_ready(sk);
>  }
>
>  static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
> --
> 2.30.0.478.g8a0d178c01-goog
>
Eric Dumazet Feb. 13, 2021, 7:50 a.m. UTC | #2
On Sat, Feb 13, 2021 at 1:30 AM Wei Wang <weiwan@google.com> wrote:
>

> >  void tcp_data_ready(struct sock *sk)
> >  {
> > -       const struct tcp_sock *tp = tcp_sk(sk);
> > -       int avail = tp->rcv_nxt - tp->copied_seq;
> > -
> > -       if (avail < sk->sk_rcvlowat && !tcp_rmem_pressure(sk) &&
> > -           !sock_flag(sk, SOCK_DONE) &&
>
> Seems "!sock_flag(sk, SOCK_DONE)" is not checked in
> tcp_epollin_read(). Does it matter?
>


Yes, probably, good catch.

Not sure where tcp_poll() gets this, I have to double check.

Thanks !
Eric Dumazet Feb. 13, 2021, 8:04 a.m. UTC | #3
On Sat, Feb 13, 2021 at 8:50 AM Eric Dumazet <edumazet@google.com> wrote:
>
> On Sat, Feb 13, 2021 at 1:30 AM Wei Wang <weiwan@google.com> wrote:
> >
>
> > >  void tcp_data_ready(struct sock *sk)
> > >  {
> > > -       const struct tcp_sock *tp = tcp_sk(sk);
> > > -       int avail = tp->rcv_nxt - tp->copied_seq;
> > > -
> > > -       if (avail < sk->sk_rcvlowat && !tcp_rmem_pressure(sk) &&
> > > -           !sock_flag(sk, SOCK_DONE) &&
> >
> > Seems "!sock_flag(sk, SOCK_DONE)" is not checked in
> > tcp_epollin_read(). Does it matter?
> >
>
>
> Yes, probably, good catch.
>
> Not sure where tcp_poll() gets this, I have to double check.

It gets the info from sk->sk_hutdown & RCV_SHUTDOWN

tcp_find() sets both sk->sk_shutdown |= RCV_SHUTDOWN and
sock_set_flag(sk, SOCK_DONE);

This seems to suggest tcp_fin() could call sk->sk_data_ready() so that
we do not have to test for this unlikely condition in tcp_data_ready()
Arjun Roy Feb. 13, 2021, 5:10 p.m. UTC | #4
On Sat, Feb 13, 2021 at 12:05 AM Eric Dumazet <edumazet@google.com> wrote:
>
> On Sat, Feb 13, 2021 at 8:50 AM Eric Dumazet <edumazet@google.com> wrote:
> >
> > On Sat, Feb 13, 2021 at 1:30 AM Wei Wang <weiwan@google.com> wrote:
> > >
> >
> > > >  void tcp_data_ready(struct sock *sk)
> > > >  {
> > > > -       const struct tcp_sock *tp = tcp_sk(sk);
> > > > -       int avail = tp->rcv_nxt - tp->copied_seq;
> > > > -
> > > > -       if (avail < sk->sk_rcvlowat && !tcp_rmem_pressure(sk) &&
> > > > -           !sock_flag(sk, SOCK_DONE) &&
> > >
> > > Seems "!sock_flag(sk, SOCK_DONE)" is not checked in
> > > tcp_epollin_read(). Does it matter?
> > >
> >
> >
> > Yes, probably, good catch.
> >
> > Not sure where tcp_poll() gets this, I have to double check.
>
> It gets the info from sk->sk_hutdown & RCV_SHUTDOWN
>
> tcp_find() sets both sk->sk_shutdown |= RCV_SHUTDOWN and
> sock_set_flag(sk, SOCK_DONE);
>
> This seems to suggest tcp_fin() could call sk->sk_data_ready() so that
> we do not have to test for this unlikely condition in tcp_data_ready()

When a thread is subsequently then woken up due to sk_data_ready(),
and it calls tcp_stream_is_readable() but we had lowat > 1 set, is
there a chance of that thread then thinking that the stream is not
readable, despite SOCK_DONE being set? This is assuming that the check
is not added to the refactored logic.

Note that on a related note if the tcp memory pressure check (for
system-wide pressure) is added just to the original code in
tcp_data_ready() but not added to tcp_stream_is_readable() we had this
kind of issue (sk_data_ready() was called but tcp_stream_is_readable()
returned false).

-Arjun

-Arjun
Arjun Roy Feb. 13, 2021, 5:21 p.m. UTC | #5
On Sat, Feb 13, 2021 at 9:10 AM Arjun Roy <arjunroy@google.com> wrote:
>
> On Sat, Feb 13, 2021 at 12:05 AM Eric Dumazet <edumazet@google.com> wrote:
> >
> > On Sat, Feb 13, 2021 at 8:50 AM Eric Dumazet <edumazet@google.com> wrote:
> > >
> > > On Sat, Feb 13, 2021 at 1:30 AM Wei Wang <weiwan@google.com> wrote:
> > > >
> > >
> > > > >  void tcp_data_ready(struct sock *sk)
> > > > >  {
> > > > > -       const struct tcp_sock *tp = tcp_sk(sk);
> > > > > -       int avail = tp->rcv_nxt - tp->copied_seq;
> > > > > -
> > > > > -       if (avail < sk->sk_rcvlowat && !tcp_rmem_pressure(sk) &&
> > > > > -           !sock_flag(sk, SOCK_DONE) &&
> > > >
> > > > Seems "!sock_flag(sk, SOCK_DONE)" is not checked in
> > > > tcp_epollin_read(). Does it matter?
> > > >
> > >
> > >
> > > Yes, probably, good catch.
> > >
> > > Not sure where tcp_poll() gets this, I have to double check.
> >
> > It gets the info from sk->sk_hutdown & RCV_SHUTDOWN
> >
> > tcp_find() sets both sk->sk_shutdown |= RCV_SHUTDOWN and
> > sock_set_flag(sk, SOCK_DONE);
> >
> > This seems to suggest tcp_fin() could call sk->sk_data_ready() so that
> > we do not have to test for this unlikely condition in tcp_data_ready()
>
> When a thread is subsequently then woken up due to sk_data_ready(),
> and it calls tcp_stream_is_readable() but we had lowat > 1 set, is
> there a chance of that thread then thinking that the stream is not
> readable, despite SOCK_DONE being set? This is assuming that the check
> is not added to the refactored logic.
>
> Note that on a related note if the tcp memory pressure check (for
> system-wide pressure) is added just to the original code in
> tcp_data_ready() but not added to tcp_stream_is_readable() we had this
> kind of issue (sk_data_ready() was called but tcp_stream_is_readable()
> returned false).
>

Disregard,  I just saw your followup patch. So I guess it's fine.

-Arjun
diff mbox series

Patch

diff --git a/include/net/tcp.h b/include/net/tcp.h
index 244208f6f6c2ace87920b633e469421f557427a6..484eb2362645fd478f59b26b42457ecf4510eb14 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1442,6 +1442,18 @@  static inline bool tcp_rmem_pressure(const struct sock *sk)
 	return atomic_read(&sk->sk_rmem_alloc) > threshold;
 }
 
+static inline bool tcp_epollin_ready(const struct sock *sk, int target)
+{
+	const struct tcp_sock *tp = tcp_sk(sk);
+	int avail = READ_ONCE(tp->rcv_nxt) - READ_ONCE(tp->copied_seq);
+
+	if (avail <= 0)
+		return false;
+
+	return (avail >= target) || tcp_rmem_pressure(sk) ||
+	       (tcp_receive_window(tp) <= inet_csk(sk)->icsk_ack.rcv_mss);
+}
+
 extern void tcp_openreq_init_rwin(struct request_sock *req,
 				  const struct sock *sk_listener,
 				  const struct dst_entry *dst);
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 9896ca10bb340924b779cb6a7606d57fdd5c3357..7a6b58ae408d1fb1e5536ccfed8215be123f3b57 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -481,19 +481,11 @@  static void tcp_tx_timestamp(struct sock *sk, u16 tsflags)
 	}
 }
 
-static inline bool tcp_stream_is_readable(const struct tcp_sock *tp,
-					  int target, struct sock *sk)
+static bool tcp_stream_is_readable(struct sock *sk, int target)
 {
-	int avail = READ_ONCE(tp->rcv_nxt) - READ_ONCE(tp->copied_seq);
+	if (tcp_epollin_ready(sk, target))
+		return true;
 
-	if (avail > 0) {
-		if (avail >= target)
-			return true;
-		if (tcp_rmem_pressure(sk))
-			return true;
-		if (tcp_receive_window(tp) <= inet_csk(sk)->icsk_ack.rcv_mss)
-			return true;
-	}
 	if (sk->sk_prot->stream_memory_read)
 		return sk->sk_prot->stream_memory_read(sk);
 	return false;
@@ -568,7 +560,7 @@  __poll_t tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
 		    tp->urg_data)
 			target++;
 
-		if (tcp_stream_is_readable(tp, target, sk))
+		if (tcp_stream_is_readable(sk, target))
 			mask |= EPOLLIN | EPOLLRDNORM;
 
 		if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index a8f8f98159531e5d1c80660972148986f6acd20a..e32a7056cb7640c67ef2d6a4d9484684d2602fcd 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -4924,15 +4924,8 @@  int tcp_send_rcvq(struct sock *sk, struct msghdr *msg, size_t size)
 
 void tcp_data_ready(struct sock *sk)
 {
-	const struct tcp_sock *tp = tcp_sk(sk);
-	int avail = tp->rcv_nxt - tp->copied_seq;
-
-	if (avail < sk->sk_rcvlowat && !tcp_rmem_pressure(sk) &&
-	    !sock_flag(sk, SOCK_DONE) &&
-	    tcp_receive_window(tp) > inet_csk(sk)->icsk_ack.rcv_mss)
-		return;
-
-	sk->sk_data_ready(sk);
+	if (tcp_epollin_ready(sk, sk->sk_rcvlowat))
+		sk->sk_data_ready(sk);
 }
 
 static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)