diff mbox series

[v6,34/34] net: [RFC][WIP] Make __zerocopy_sg_from_iter() correctly pin or leave pages unref'd

Message ID 167391073019.2311931.11127613443740355536.stgit@warthog.procyon.org.uk (mailing list archive)
State RFC
Delegated to: Netdev Maintainers
Headers show
Series iov_iter: Improve page extraction (ref, pin or just list) | expand

Checks

Context Check Description
bpf/vmtest-bpf-PR success PR summary
bpf/vmtest-bpf-VM_Test-1 success Logs for ShellCheck
bpf/vmtest-bpf-VM_Test-2 success Logs for build for aarch64 with gcc
bpf/vmtest-bpf-VM_Test-3 success Logs for build for aarch64 with llvm-16
bpf/vmtest-bpf-VM_Test-4 success Logs for build for s390x with gcc
bpf/vmtest-bpf-VM_Test-5 success Logs for build for x86_64 with gcc
bpf/vmtest-bpf-VM_Test-6 success Logs for build for x86_64 with llvm-16
bpf/vmtest-bpf-VM_Test-7 success Logs for llvm-toolchain
bpf/vmtest-bpf-VM_Test-8 success Logs for set-matrix
bpf/vmtest-bpf-VM_Test-9 success Logs for test_maps on aarch64 with gcc
bpf/vmtest-bpf-VM_Test-10 success Logs for test_maps on aarch64 with llvm-16
bpf/vmtest-bpf-VM_Test-11 success Logs for test_maps on s390x with gcc
bpf/vmtest-bpf-VM_Test-12 success Logs for test_maps on x86_64 with gcc
bpf/vmtest-bpf-VM_Test-13 success Logs for test_maps on x86_64 with llvm-16
bpf/vmtest-bpf-VM_Test-14 success Logs for test_progs on aarch64 with gcc
bpf/vmtest-bpf-VM_Test-15 success Logs for test_progs on aarch64 with llvm-16
bpf/vmtest-bpf-VM_Test-16 success Logs for test_progs on s390x with gcc
bpf/vmtest-bpf-VM_Test-17 success Logs for test_progs on x86_64 with gcc
bpf/vmtest-bpf-VM_Test-18 success Logs for test_progs on x86_64 with llvm-16
bpf/vmtest-bpf-VM_Test-19 success Logs for test_progs_no_alu32 on aarch64 with gcc
bpf/vmtest-bpf-VM_Test-20 success Logs for test_progs_no_alu32 on aarch64 with llvm-16
bpf/vmtest-bpf-VM_Test-21 success Logs for test_progs_no_alu32 on s390x with gcc
bpf/vmtest-bpf-VM_Test-22 success Logs for test_progs_no_alu32 on x86_64 with gcc
bpf/vmtest-bpf-VM_Test-23 success Logs for test_progs_no_alu32 on x86_64 with llvm-16
bpf/vmtest-bpf-VM_Test-24 success Logs for test_progs_no_alu32_parallel on aarch64 with gcc
bpf/vmtest-bpf-VM_Test-25 success Logs for test_progs_no_alu32_parallel on aarch64 with llvm-16
bpf/vmtest-bpf-VM_Test-26 success Logs for test_progs_no_alu32_parallel on s390x with gcc
bpf/vmtest-bpf-VM_Test-27 success Logs for test_progs_no_alu32_parallel on x86_64 with gcc
bpf/vmtest-bpf-VM_Test-28 success Logs for test_progs_no_alu32_parallel on x86_64 with llvm-16
bpf/vmtest-bpf-VM_Test-29 success Logs for test_progs_parallel on aarch64 with gcc
bpf/vmtest-bpf-VM_Test-30 success Logs for test_progs_parallel on aarch64 with llvm-16
bpf/vmtest-bpf-VM_Test-31 success Logs for test_progs_parallel on s390x with gcc
bpf/vmtest-bpf-VM_Test-32 success Logs for test_progs_parallel on x86_64 with gcc
bpf/vmtest-bpf-VM_Test-33 success Logs for test_progs_parallel on x86_64 with llvm-16
bpf/vmtest-bpf-VM_Test-34 success Logs for test_verifier on aarch64 with gcc
bpf/vmtest-bpf-VM_Test-35 success Logs for test_verifier on aarch64 with llvm-16
bpf/vmtest-bpf-VM_Test-36 success Logs for test_verifier on s390x with gcc
bpf/vmtest-bpf-VM_Test-37 success Logs for test_verifier on x86_64 with gcc
bpf/vmtest-bpf-VM_Test-38 success Logs for test_verifier on x86_64 with llvm-16

Commit Message

David Howells Jan. 16, 2023, 11:12 p.m. UTC
Make __zerocopy_sg_from_iter() call iov_iter_extract_pages() to get pages
that have been ref'd, pinned or left alone as appropriate.  As this is only
used for source buffers, pinning isn't an option, but being unref'd is.

The way __zerocopy_sg_from_iter() merges fragments is also altered, such
that fragments must also match their cleanup modes to be merged.

An extra helper and wrapper, folio_put_unpin_sub() and page_put_unpin_sub()
are added to allow multiple refs to be put/unpinned.

Signed-off-by: David Howells <dhowells@redhat.com>
cc: "David S. Miller" <davem@davemloft.net>
cc: Eric Dumazet <edumazet@google.com>
cc: Jakub Kicinski <kuba@kernel.org>
cc: Paolo Abeni <pabeni@redhat.com>
cc: netdev@vger.kernel.org
---

 include/linux/mm.h  |    2 ++
 mm/gup.c            |   25 +++++++++++++++++++++++++
 net/core/datagram.c |   23 +++++++++++++----------
 3 files changed, 40 insertions(+), 10 deletions(-)
diff mbox series

Patch

diff --git a/include/linux/mm.h b/include/linux/mm.h
index f14edb192394..e3923b89c75e 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1368,7 +1368,9 @@  static inline bool is_cow_mapping(vm_flags_t flags)
 #endif
 
 void folio_put_unpin(struct folio *folio, unsigned int flags);
+void folio_put_unpin_sub(struct folio *folio, unsigned int flags, unsigned int refs);
 void page_put_unpin(struct page *page, unsigned int flags);
+void page_put_unpin_sub(struct page *page, unsigned int flags, unsigned int refs);
 
 /*
  * The identification function is mainly used by the buddy allocator for
diff --git a/mm/gup.c b/mm/gup.c
index 3ee4b4c7e0cb..49dd27ba6c13 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -213,6 +213,31 @@  void page_put_unpin(struct page *page, unsigned int flags)
 }
 EXPORT_SYMBOL_GPL(page_put_unpin);
 
+/**
+ * folio_put_unpin_sub - Unpin/put a folio as appropriate
+ * @folio: The folio to release
+ * @flags: gup flags indicating the mode of release (FOLL_*)
+ * @refs: Number of refs/pins to drop
+ *
+ * Release a folio according to the flags.  If FOLL_GET is set, the folio has a
+ * ref dropped; if FOLL_PIN is set, it is unpinned; otherwise it is left
+ * unaltered.
+ */
+void folio_put_unpin_sub(struct folio *folio, unsigned int flags,
+			 unsigned int refs)
+{
+	if (flags & (FOLL_GET | FOLL_PIN))
+		gup_put_folio(folio, refs, flags);
+}
+EXPORT_SYMBOL_GPL(folio_put_unpin_sub);
+
+void page_put_unpin_sub(struct page *page, unsigned int flags,
+			unsigned int refs)
+{
+	folio_put_unpin_sub(page_folio(page), flags, refs);
+}
+EXPORT_SYMBOL_GPL(page_put_unpin_sub);
+
 /**
  * try_grab_page() - elevate a page's refcount by a flag-dependent amount
  * @page:    pointer to page to be grabbed
diff --git a/net/core/datagram.c b/net/core/datagram.c
index 122bfb144d32..63ea1f8817e0 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -614,6 +614,7 @@  int __zerocopy_sg_from_iter(struct msghdr *msg, struct sock *sk,
 			    struct sk_buff *skb, struct iov_iter *from,
 			    size_t length)
 {
+	unsigned int cleanup_mode = iov_iter_extract_mode(from, FOLL_SOURCE_BUF);
 	int frag;
 
 	if (msg && msg->msg_ubuf && msg->sg_from_iter)
@@ -622,7 +623,7 @@  int __zerocopy_sg_from_iter(struct msghdr *msg, struct sock *sk,
 	frag = skb_shinfo(skb)->nr_frags;
 
 	while (length && iov_iter_count(from)) {
-		struct page *pages[MAX_SKB_FRAGS];
+		struct page *pages[MAX_SKB_FRAGS], **ppages = pages;
 		struct page *last_head = NULL;
 		size_t start;
 		ssize_t copied;
@@ -632,9 +633,9 @@  int __zerocopy_sg_from_iter(struct msghdr *msg, struct sock *sk,
 		if (frag == MAX_SKB_FRAGS)
 			return -EMSGSIZE;
 
-		copied = iov_iter_get_pages(from, pages, length,
-					    MAX_SKB_FRAGS - frag, &start,
-					    FOLL_SOURCE_BUF);
+		copied = iov_iter_extract_pages(from, &ppages, length,
+						MAX_SKB_FRAGS - frag,
+						FOLL_SOURCE_BUF, &start);
 		if (copied < 0)
 			return -EFAULT;
 
@@ -662,12 +663,14 @@  int __zerocopy_sg_from_iter(struct msghdr *msg, struct sock *sk,
 				skb_frag_t *last = &skb_shinfo(skb)->frags[frag - 1];
 
 				if (head == skb_frag_page(last) &&
+				    cleanup_mode == skb_frag_cleanup(last) &&
 				    start == skb_frag_off(last) + skb_frag_size(last)) {
 					skb_frag_size_add(last, size);
 					/* We combined this page, we need to release
-					 * a reference. Since compound pages refcount
-					 * is shared among many pages, batch the refcount
-					 * adjustments to limit false sharing.
+					 * a reference or a pin.  Since compound pages
+					 * refcount is shared among many pages, batch
+					 * the refcount adjustments to limit false
+					 * sharing.
 					 */
 					last_head = head;
 					refs++;
@@ -675,14 +678,14 @@  int __zerocopy_sg_from_iter(struct msghdr *msg, struct sock *sk,
 				}
 			}
 			if (refs) {
-				page_ref_sub(last_head, refs);
+				page_put_unpin_sub(last_head, cleanup_mode, refs);
 				refs = 0;
 			}
 			skb_fill_page_desc_noacc(skb, frag++, head, start, size,
-						 FOLL_GET);
+						 cleanup_mode);
 		}
 		if (refs)
-			page_ref_sub(last_head, refs);
+			page_put_unpin_sub(last_head, cleanup_mode, refs);
 	}
 	return 0;
 }