diff mbox

->steal semantics on anon pages

Message ID 20150401125056.GA5297@infradead.org (mailing list archive)
State New, archived
Headers show

Commit Message

Christoph Hellwig April 1, 2015, 12:50 p.m. UTC
I've started looking into resurrecting splice F_MOVE support, and it
seems ->steal for anon pages is completly bogus at the moment:

 - the page count check is incorrect
 - it doesn't isolate the mapping from the lru
 - it sets the PIPE_BUF_FLAG_LRU flag, which doesn't get the file
   added to the file lru

Currently on fuse calls ->steal, but I'm not sure it could work on
a vmspliced buffered at all.

Below is a patch that attempts to fix / paper over ->steal, and the
second is the unfinished F_MOVE resurrection patch which shows
what additional workarouns we need for ->steal from anon pages.
From 94958673ed6d0add7f5d95cc17fb0c9fa8f58c03 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 1 Apr 2015 14:28:50 +0200
Subject: fix ->steal for anon pages

---
 fs/splice.c          | 20 ++++++++++++++++++--
 include/linux/swap.h |  1 +
 mm/internal.h        |  1 -
 3 files changed, 19 insertions(+), 3 deletions(-)
diff mbox

Patch

diff --git a/fs/splice.c b/fs/splice.c
index 41cbb16..36aa4a9 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -145,11 +145,27 @@  const struct pipe_buf_operations page_cache_pipe_buf_ops = {
 static int user_page_pipe_buf_steal(struct pipe_inode_info *pipe,
 				    struct pipe_buffer *buf)
 {
+	struct page *page = buf->page;
+
 	if (!(buf->flags & PIPE_BUF_FLAG_GIFT))
 		return 1;
 
-	buf->flags |= PIPE_BUF_FLAG_LRU;
-	return generic_pipe_buf_steal(pipe, buf);
+	/*
+	 * We should have three references to the page: gup, lru, and
+	 * one for being mapped into page tables.
+	 */
+	if (page_count(page) != 3)
+		return 1;
+
+	lock_page(page);
+
+	if (!isolate_lru_page(page)) {
+		ClearPageActive(page);
+		ClearPageUnevictable(page);
+		page_cache_release(page);
+	}
+		
+	return 0;
 }
 
 static const struct pipe_buf_operations user_page_pipe_buf_ops = {
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 7067eca..a3742f3 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -318,6 +318,7 @@  extern void lru_cache_add_active_or_unevictable(struct page *page,
 /* linux/mm/vmscan.c */
 extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
 					gfp_t gfp_mask, nodemask_t *mask);
+extern int isolate_lru_page(struct page *page);
 extern int __isolate_lru_page(struct page *page, isolate_mode_t mode);
 extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg,
 						  unsigned long nr_pages,
diff --git a/mm/internal.h b/mm/internal.h
index a96da5b..48c5731 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -96,7 +96,6 @@  extern unsigned long highest_memmap_pfn;
 /*
  * in mm/vmscan.c:
  */
-extern int isolate_lru_page(struct page *page);
 extern void putback_lru_page(struct page *page);
 extern bool zone_reclaimable(struct zone *zone);