diff mbox series

[v2,1/9] Revert "tcp: Use per-vma locking for receive zerocopy"

Message ID 20230711202047.3818697-2-willy@infradead.org (mailing list archive)
State New, archived
Headers show
Series Avoid the mmap lock for fault-around | expand

Commit Message

Matthew Wilcox July 11, 2023, 8:20 p.m. UTC
This reverts commit 7a7f094635349a7d0314364ad50bdeb770b6df4f.
---
 MAINTAINERS            |  1 -
 include/linux/net_mm.h | 17 ----------------
 include/net/tcp.h      |  1 -
 mm/memory.c            |  7 +++----
 net/ipv4/tcp.c         | 45 ++++++++----------------------------------
 5 files changed, 11 insertions(+), 60 deletions(-)
 delete mode 100644 include/linux/net_mm.h

Comments

Suren Baghdasaryan July 14, 2023, 3:02 a.m. UTC | #1
On Tue, Jul 11, 2023 at 1:21 PM Matthew Wilcox (Oracle)
<willy@infradead.org> wrote:
>
> This reverts commit 7a7f094635349a7d0314364ad50bdeb770b6df4f.

nit: some explanation and SOB would be nice.

Reviewed-by: Suren Baghdasaryan <surenb@google.com>


> ---
>  MAINTAINERS            |  1 -
>  include/linux/net_mm.h | 17 ----------------
>  include/net/tcp.h      |  1 -
>  mm/memory.c            |  7 +++----
>  net/ipv4/tcp.c         | 45 ++++++++----------------------------------
>  5 files changed, 11 insertions(+), 60 deletions(-)
>  delete mode 100644 include/linux/net_mm.h
>
> diff --git a/MAINTAINERS b/MAINTAINERS
> index 18cd0ce2c7d2..00047800cff1 100644
> --- a/MAINTAINERS
> +++ b/MAINTAINERS
> @@ -14816,7 +14816,6 @@ NETWORKING [TCP]
>  M:     Eric Dumazet <edumazet@google.com>
>  L:     netdev@vger.kernel.org
>  S:     Maintained
> -F:     include/linux/net_mm.h
>  F:     include/linux/tcp.h
>  F:     include/net/tcp.h
>  F:     include/trace/events/tcp.h
> diff --git a/include/linux/net_mm.h b/include/linux/net_mm.h
> deleted file mode 100644
> index b298998bd5a0..000000000000
> --- a/include/linux/net_mm.h
> +++ /dev/null
> @@ -1,17 +0,0 @@
> -/* SPDX-License-Identifier: GPL-2.0-or-later */
> -#ifdef CONFIG_MMU
> -
> -#ifdef CONFIG_INET
> -extern const struct vm_operations_struct tcp_vm_ops;
> -static inline bool vma_is_tcp(const struct vm_area_struct *vma)
> -{
> -       return vma->vm_ops == &tcp_vm_ops;
> -}
> -#else
> -static inline bool vma_is_tcp(const struct vm_area_struct *vma)
> -{
> -       return false;
> -}
> -#endif /* CONFIG_INET*/
> -
> -#endif /* CONFIG_MMU */
> diff --git a/include/net/tcp.h b/include/net/tcp.h
> index 226bce6d1e8c..95e4507febed 100644
> --- a/include/net/tcp.h
> +++ b/include/net/tcp.h
> @@ -45,7 +45,6 @@
>  #include <linux/memcontrol.h>
>  #include <linux/bpf-cgroup.h>
>  #include <linux/siphash.h>
> -#include <linux/net_mm.h>
>
>  extern struct inet_hashinfo tcp_hashinfo;
>
> diff --git a/mm/memory.c b/mm/memory.c
> index 0a265ac6246e..2c7967632866 100644
> --- a/mm/memory.c
> +++ b/mm/memory.c
> @@ -77,7 +77,6 @@
>  #include <linux/ptrace.h>
>  #include <linux/vmalloc.h>
>  #include <linux/sched/sysctl.h>
> -#include <linux/net_mm.h>
>
>  #include <trace/events/kmem.h>
>
> @@ -5419,12 +5418,12 @@ struct vm_area_struct *lock_vma_under_rcu(struct mm_struct *mm,
>         if (!vma)
>                 goto inval;
>
> -       /* Only anonymous and tcp vmas are supported for now */
> -       if (!vma_is_anonymous(vma) && !vma_is_tcp(vma))
> +       /* Only anonymous vmas are supported for now */
> +       if (!vma_is_anonymous(vma))
>                 goto inval;
>
>         /* find_mergeable_anon_vma uses adjacent vmas which are not locked */
> -       if (!vma->anon_vma && !vma_is_tcp(vma))
> +       if (!vma->anon_vma)
>                 goto inval;
>
>         if (!vma_start_read(vma))
> diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
> index e03e08745308..1542de3f66f7 100644
> --- a/net/ipv4/tcp.c
> +++ b/net/ipv4/tcp.c
> @@ -1739,7 +1739,7 @@ void tcp_update_recv_tstamps(struct sk_buff *skb,
>  }
>
>  #ifdef CONFIG_MMU
> -const struct vm_operations_struct tcp_vm_ops = {
> +static const struct vm_operations_struct tcp_vm_ops = {
>  };
>
>  int tcp_mmap(struct file *file, struct socket *sock,
> @@ -2038,34 +2038,6 @@ static void tcp_zc_finalize_rx_tstamp(struct sock *sk,
>         }
>  }
>
> -static struct vm_area_struct *find_tcp_vma(struct mm_struct *mm,
> -                                          unsigned long address,
> -                                          bool *mmap_locked)
> -{
> -       struct vm_area_struct *vma = NULL;
> -
> -#ifdef CONFIG_PER_VMA_LOCK
> -       vma = lock_vma_under_rcu(mm, address);
> -#endif
> -       if (vma) {
> -               if (!vma_is_tcp(vma)) {
> -                       vma_end_read(vma);
> -                       return NULL;
> -               }
> -               *mmap_locked = false;
> -               return vma;
> -       }
> -
> -       mmap_read_lock(mm);
> -       vma = vma_lookup(mm, address);
> -       if (!vma || !vma_is_tcp(vma)) {
> -               mmap_read_unlock(mm);
> -               return NULL;
> -       }
> -       *mmap_locked = true;
> -       return vma;
> -}
> -
>  #define TCP_ZEROCOPY_PAGE_BATCH_SIZE 32
>  static int tcp_zerocopy_receive(struct sock *sk,
>                                 struct tcp_zerocopy_receive *zc,
> @@ -2083,7 +2055,6 @@ static int tcp_zerocopy_receive(struct sock *sk,
>         u32 seq = tp->copied_seq;
>         u32 total_bytes_to_map;
>         int inq = tcp_inq(sk);
> -       bool mmap_locked;
>         int ret;
>
>         zc->copybuf_len = 0;
> @@ -2108,10 +2079,13 @@ static int tcp_zerocopy_receive(struct sock *sk,
>                 return 0;
>         }
>
> -       vma = find_tcp_vma(current->mm, address, &mmap_locked);
> -       if (!vma)
> -               return -EINVAL;
> +       mmap_read_lock(current->mm);
>
> +       vma = vma_lookup(current->mm, address);
> +       if (!vma || vma->vm_ops != &tcp_vm_ops) {
> +               mmap_read_unlock(current->mm);
> +               return -EINVAL;
> +       }
>         vma_len = min_t(unsigned long, zc->length, vma->vm_end - address);
>         avail_len = min_t(u32, vma_len, inq);
>         total_bytes_to_map = avail_len & ~(PAGE_SIZE - 1);
> @@ -2185,10 +2159,7 @@ static int tcp_zerocopy_receive(struct sock *sk,
>                                                    zc, total_bytes_to_map);
>         }
>  out:
> -       if (mmap_locked)
> -               mmap_read_unlock(current->mm);
> -       else
> -               vma_end_read(vma);
> +       mmap_read_unlock(current->mm);
>         /* Try to copy straggler data. */
>         if (!ret)
>                 copylen = tcp_zc_handle_leftover(zc, sk, skb, &seq, copybuf_len, tss);
> --
> 2.39.2
>
Matthew Wilcox July 14, 2023, 3:34 a.m. UTC | #2
On Thu, Jul 13, 2023 at 08:02:12PM -0700, Suren Baghdasaryan wrote:
> On Tue, Jul 11, 2023 at 1:21 PM Matthew Wilcox (Oracle)
> <willy@infradead.org> wrote:
> >
> > This reverts commit 7a7f094635349a7d0314364ad50bdeb770b6df4f.
> 
> nit: some explanation and SOB would be nice.

Well, it can't be actually applied.  What needs to happen is that the
networking people need to drop the commit from their tree.  Some review
from the networking people would be helpful to be sure that I didn't
break anything in my reworking of this patch to apply after my patches.
Jann Horn July 24, 2023, 2:49 p.m. UTC | #3
On Fri, Jul 14, 2023 at 5:34 AM Matthew Wilcox <willy@infradead.org> wrote:
> On Thu, Jul 13, 2023 at 08:02:12PM -0700, Suren Baghdasaryan wrote:
> > On Tue, Jul 11, 2023 at 1:21 PM Matthew Wilcox (Oracle)
> > <willy@infradead.org> wrote:
> > >
> > > This reverts commit 7a7f094635349a7d0314364ad50bdeb770b6df4f.
> >
> > nit: some explanation and SOB would be nice.
>
> Well, it can't be actually applied.  What needs to happen is that the
> networking people need to drop the commit from their tree.  Some review
> from the networking people would be helpful to be sure that I didn't
> break anything in my reworking of this patch to apply after my patches.

Are you saying you want them to revert it before it reaches mainline?
That commit landed in v6.5-rc1.
Matthew Wilcox July 24, 2023, 3:06 p.m. UTC | #4
On Mon, Jul 24, 2023 at 04:49:16PM +0200, Jann Horn wrote:
> On Fri, Jul 14, 2023 at 5:34 AM Matthew Wilcox <willy@infradead.org> wrote:
> > On Thu, Jul 13, 2023 at 08:02:12PM -0700, Suren Baghdasaryan wrote:
> > > On Tue, Jul 11, 2023 at 1:21 PM Matthew Wilcox (Oracle)
> > > <willy@infradead.org> wrote:
> > > >
> > > > This reverts commit 7a7f094635349a7d0314364ad50bdeb770b6df4f.
> > >
> > > nit: some explanation and SOB would be nice.
> >
> > Well, it can't be actually applied.  What needs to happen is that the
> > networking people need to drop the commit from their tree.  Some review
> > from the networking people would be helpful to be sure that I didn't
> > break anything in my reworking of this patch to apply after my patches.
> 
> Are you saying you want them to revert it before it reaches mainline?
> That commit landed in v6.5-rc1.

... what?  It was posted on June 16th.  How does it end up in rc1 on
July 9th?  6.4 was June 25th.  9 days is long enough for something
that's not an urgent fix to land in rc1?  Networking doesn't close
development at rc5/6 like most subsystem trees?
Jakub Kicinski July 24, 2023, 9:42 p.m. UTC | #5
On Mon, 24 Jul 2023 16:06:00 +0100 Matthew Wilcox wrote:
> > Are you saying you want them to revert it before it reaches mainline?
> > That commit landed in v6.5-rc1.  
> 
> ... what?  It was posted on June 16th.  How does it end up in rc1 on
> July 9th?  6.4 was June 25th.  9 days is long enough for something
> that's not an urgent fix to land in rc1?  Networking doesn't close
> development at rc5/6 like most subsystem trees?

We don't, and yeah this one was a bit risky. We close for the merge
window (the two weeks), we could definitely push back on risky changes
starting a week or two before the window... but we don't know how long
the release will last :( if we stop taking large changes at rc6 and
release goes until rc8 that's 5 out of 11 weeks of the cycle when we
can't apply substantial patches. It's way too long. The weeks after 
the merge window are already super stressful, if we shut down for longer
it'll only get worse. I'm typing all this because I was hoping we can
bring up making the release schedule even more predictable with Linus,
I'm curious if others feel the same way.

On the matter at hand - I thought the patches were just conflicting
with your upcoming work. Are they already broken in v6.5? No problem
with queuing the revert for v6.5 here if they are.
diff mbox series

Patch

diff --git a/MAINTAINERS b/MAINTAINERS
index 18cd0ce2c7d2..00047800cff1 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -14816,7 +14816,6 @@  NETWORKING [TCP]
 M:	Eric Dumazet <edumazet@google.com>
 L:	netdev@vger.kernel.org
 S:	Maintained
-F:	include/linux/net_mm.h
 F:	include/linux/tcp.h
 F:	include/net/tcp.h
 F:	include/trace/events/tcp.h
diff --git a/include/linux/net_mm.h b/include/linux/net_mm.h
deleted file mode 100644
index b298998bd5a0..000000000000
--- a/include/linux/net_mm.h
+++ /dev/null
@@ -1,17 +0,0 @@ 
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-#ifdef CONFIG_MMU
-
-#ifdef CONFIG_INET
-extern const struct vm_operations_struct tcp_vm_ops;
-static inline bool vma_is_tcp(const struct vm_area_struct *vma)
-{
-	return vma->vm_ops == &tcp_vm_ops;
-}
-#else
-static inline bool vma_is_tcp(const struct vm_area_struct *vma)
-{
-	return false;
-}
-#endif /* CONFIG_INET*/
-
-#endif /* CONFIG_MMU */
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 226bce6d1e8c..95e4507febed 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -45,7 +45,6 @@ 
 #include <linux/memcontrol.h>
 #include <linux/bpf-cgroup.h>
 #include <linux/siphash.h>
-#include <linux/net_mm.h>
 
 extern struct inet_hashinfo tcp_hashinfo;
 
diff --git a/mm/memory.c b/mm/memory.c
index 0a265ac6246e..2c7967632866 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -77,7 +77,6 @@ 
 #include <linux/ptrace.h>
 #include <linux/vmalloc.h>
 #include <linux/sched/sysctl.h>
-#include <linux/net_mm.h>
 
 #include <trace/events/kmem.h>
 
@@ -5419,12 +5418,12 @@  struct vm_area_struct *lock_vma_under_rcu(struct mm_struct *mm,
 	if (!vma)
 		goto inval;
 
-	/* Only anonymous and tcp vmas are supported for now */
-	if (!vma_is_anonymous(vma) && !vma_is_tcp(vma))
+	/* Only anonymous vmas are supported for now */
+	if (!vma_is_anonymous(vma))
 		goto inval;
 
 	/* find_mergeable_anon_vma uses adjacent vmas which are not locked */
-	if (!vma->anon_vma && !vma_is_tcp(vma))
+	if (!vma->anon_vma)
 		goto inval;
 
 	if (!vma_start_read(vma))
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index e03e08745308..1542de3f66f7 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1739,7 +1739,7 @@  void tcp_update_recv_tstamps(struct sk_buff *skb,
 }
 
 #ifdef CONFIG_MMU
-const struct vm_operations_struct tcp_vm_ops = {
+static const struct vm_operations_struct tcp_vm_ops = {
 };
 
 int tcp_mmap(struct file *file, struct socket *sock,
@@ -2038,34 +2038,6 @@  static void tcp_zc_finalize_rx_tstamp(struct sock *sk,
 	}
 }
 
-static struct vm_area_struct *find_tcp_vma(struct mm_struct *mm,
-					   unsigned long address,
-					   bool *mmap_locked)
-{
-	struct vm_area_struct *vma = NULL;
-
-#ifdef CONFIG_PER_VMA_LOCK
-	vma = lock_vma_under_rcu(mm, address);
-#endif
-	if (vma) {
-		if (!vma_is_tcp(vma)) {
-			vma_end_read(vma);
-			return NULL;
-		}
-		*mmap_locked = false;
-		return vma;
-	}
-
-	mmap_read_lock(mm);
-	vma = vma_lookup(mm, address);
-	if (!vma || !vma_is_tcp(vma)) {
-		mmap_read_unlock(mm);
-		return NULL;
-	}
-	*mmap_locked = true;
-	return vma;
-}
-
 #define TCP_ZEROCOPY_PAGE_BATCH_SIZE 32
 static int tcp_zerocopy_receive(struct sock *sk,
 				struct tcp_zerocopy_receive *zc,
@@ -2083,7 +2055,6 @@  static int tcp_zerocopy_receive(struct sock *sk,
 	u32 seq = tp->copied_seq;
 	u32 total_bytes_to_map;
 	int inq = tcp_inq(sk);
-	bool mmap_locked;
 	int ret;
 
 	zc->copybuf_len = 0;
@@ -2108,10 +2079,13 @@  static int tcp_zerocopy_receive(struct sock *sk,
 		return 0;
 	}
 
-	vma = find_tcp_vma(current->mm, address, &mmap_locked);
-	if (!vma)
-		return -EINVAL;
+	mmap_read_lock(current->mm);
 
+	vma = vma_lookup(current->mm, address);
+	if (!vma || vma->vm_ops != &tcp_vm_ops) {
+		mmap_read_unlock(current->mm);
+		return -EINVAL;
+	}
 	vma_len = min_t(unsigned long, zc->length, vma->vm_end - address);
 	avail_len = min_t(u32, vma_len, inq);
 	total_bytes_to_map = avail_len & ~(PAGE_SIZE - 1);
@@ -2185,10 +2159,7 @@  static int tcp_zerocopy_receive(struct sock *sk,
 						   zc, total_bytes_to_map);
 	}
 out:
-	if (mmap_locked)
-		mmap_read_unlock(current->mm);
-	else
-		vma_end_read(vma);
+	mmap_read_unlock(current->mm);
 	/* Try to copy straggler data. */
 	if (!ret)
 		copylen = tcp_zc_handle_leftover(zc, sk, skb, &seq, copybuf_len, tss);