@@ -1368,7 +1368,9 @@ static inline bool is_cow_mapping(vm_flags_t flags)
#endif
void folio_put_unpin(struct folio *folio, unsigned int flags);
+void folio_put_unpin_sub(struct folio *folio, unsigned int flags, unsigned int refs);
void page_put_unpin(struct page *page, unsigned int flags);
+void page_put_unpin_sub(struct page *page, unsigned int flags, unsigned int refs);
/*
* The identification function is mainly used by the buddy allocator for
@@ -213,6 +213,31 @@ void page_put_unpin(struct page *page, unsigned int flags)
}
EXPORT_SYMBOL_GPL(page_put_unpin);
+/**
+ * folio_put_unpin_sub - Unpin/put a folio as appropriate
+ * @folio: The folio to release
+ * @flags: gup flags indicating the mode of release (FOLL_*)
+ * @refs: Number of refs/pins to drop
+ *
+ * Release a folio according to the flags. If FOLL_GET is set, the folio has a
+ * ref dropped; if FOLL_PIN is set, it is unpinned; otherwise it is left
+ * unaltered.
+ */
+void folio_put_unpin_sub(struct folio *folio, unsigned int flags,
+ unsigned int refs)
+{
+ if (flags & (FOLL_GET | FOLL_PIN))
+ gup_put_folio(folio, refs, flags);
+}
+EXPORT_SYMBOL_GPL(folio_put_unpin_sub);
+
+void page_put_unpin_sub(struct page *page, unsigned int flags,
+ unsigned int refs)
+{
+ folio_put_unpin_sub(page_folio(page), flags, refs);
+}
+EXPORT_SYMBOL_GPL(page_put_unpin_sub);
+
/**
* try_grab_page() - elevate a page's refcount by a flag-dependent amount
* @page: pointer to page to be grabbed
@@ -614,6 +614,7 @@ int __zerocopy_sg_from_iter(struct msghdr *msg, struct sock *sk,
struct sk_buff *skb, struct iov_iter *from,
size_t length)
{
+ unsigned int cleanup_mode = iov_iter_extract_mode(from, FOLL_SOURCE_BUF);
int frag;
if (msg && msg->msg_ubuf && msg->sg_from_iter)
@@ -622,7 +623,7 @@ int __zerocopy_sg_from_iter(struct msghdr *msg, struct sock *sk,
frag = skb_shinfo(skb)->nr_frags;
while (length && iov_iter_count(from)) {
- struct page *pages[MAX_SKB_FRAGS];
+ struct page *pages[MAX_SKB_FRAGS], **ppages = pages;
struct page *last_head = NULL;
size_t start;
ssize_t copied;
@@ -632,9 +633,9 @@ int __zerocopy_sg_from_iter(struct msghdr *msg, struct sock *sk,
if (frag == MAX_SKB_FRAGS)
return -EMSGSIZE;
- copied = iov_iter_get_pages(from, pages, length,
- MAX_SKB_FRAGS - frag, &start,
- FOLL_SOURCE_BUF);
+ copied = iov_iter_extract_pages(from, &ppages, length,
+ MAX_SKB_FRAGS - frag,
+ FOLL_SOURCE_BUF, &start);
if (copied < 0)
return -EFAULT;
@@ -662,12 +663,14 @@ int __zerocopy_sg_from_iter(struct msghdr *msg, struct sock *sk,
skb_frag_t *last = &skb_shinfo(skb)->frags[frag - 1];
if (head == skb_frag_page(last) &&
+ cleanup_mode == skb_frag_cleanup(last) &&
start == skb_frag_off(last) + skb_frag_size(last)) {
skb_frag_size_add(last, size);
/* We combined this page, we need to release
- * a reference. Since compound pages refcount
- * is shared among many pages, batch the refcount
- * adjustments to limit false sharing.
+ * a reference or a pin. Since compound pages
+ * refcount is shared among many pages, batch
+ * the refcount adjustments to limit false
+ * sharing.
*/
last_head = head;
refs++;
@@ -675,14 +678,14 @@ int __zerocopy_sg_from_iter(struct msghdr *msg, struct sock *sk,
}
}
if (refs) {
- page_ref_sub(last_head, refs);
+ page_put_unpin_sub(last_head, cleanup_mode, refs);
refs = 0;
}
skb_fill_page_desc_noacc(skb, frag++, head, start, size,
- FOLL_GET);
+ cleanup_mode);
}
if (refs)
- page_ref_sub(last_head, refs);
+ page_put_unpin_sub(last_head, cleanup_mode, refs);
}
return 0;
}
Make __zerocopy_sg_from_iter() call iov_iter_extract_pages() to get pages that have been ref'd, pinned or left alone as appropriate. As this is only used for source buffers, pinning isn't an option, but being unref'd is. The way __zerocopy_sg_from_iter() merges fragments is also altered, such that fragments must also match their cleanup modes to be merged. An extra helper and wrapper, folio_put_unpin_sub() and page_put_unpin_sub() are added to allow multiple refs to be put/unpinned. Signed-off-by: David Howells <dhowells@redhat.com> cc: "David S. Miller" <davem@davemloft.net> cc: Eric Dumazet <edumazet@google.com> cc: Jakub Kicinski <kuba@kernel.org> cc: Paolo Abeni <pabeni@redhat.com> cc: netdev@vger.kernel.org --- include/linux/mm.h | 2 ++ mm/gup.c | 25 +++++++++++++++++++++++++ net/core/datagram.c | 23 +++++++++++++---------- 3 files changed, 40 insertions(+), 10 deletions(-)