diff mbox series

[v10,2/3] fs: introduce super_drop_pagecache()

Message ID 1676645312-13-3-git-send-email-ruansy.fnst@fujitsu.com (mailing list archive)
State New, archived
Headers show
Series mm, pmem, xfs: Introduce MF_MEM_REMOVE for unbind | expand

Commit Message

Shiyang Ruan Feb. 17, 2023, 2:48 p.m. UTC
xfs_notify_failure.c requires a method to invalidate all dax mappings.
drop_pagecache_sb() can do this but it is a static function and only
build with CONFIG_SYSCTL.  Now, move its implementation into super.c and
call it super_drop_pagecache().  Use its second argument as invalidator
so that we can choose which invalidate method to use.

Signed-off-by: Shiyang Ruan <ruansy.fnst@fujitsu.com>
---
 fs/drop_caches.c        | 29 +--------------------------
 fs/super.c              | 43 +++++++++++++++++++++++++++++++++++++++++
 include/linux/fs.h      |  2 ++
 include/linux/pagemap.h |  1 +
 mm/truncate.c           | 20 +++++++++++++++++--
 5 files changed, 65 insertions(+), 30 deletions(-)

Comments

Matthew Wilcox (Oracle) Feb. 17, 2023, 4:14 p.m. UTC | #1
On Fri, Feb 17, 2023 at 02:48:31PM +0000, Shiyang Ruan wrote:
> -		invalidate_mapping_pages(inode->i_mapping, 0, -1);
> -		iput(toput_inode);
> -		toput_inode = inode;
> -
> -		cond_resched();
> -		spin_lock(&sb->s_inode_list_lock);
> -	}
> -	spin_unlock(&sb->s_inode_list_lock);
> -	iput(toput_inode);
> +	super_drop_pagecache(sb, invalidate_inode_pages);

I thought I explained last time that you can do this with
invalidate_mapping_pages() / invalidate_inode_pages2_range() ?
Then you don't need to introduce invalidate_inode_pages().

> +void super_drop_pagecache(struct super_block *sb,
> +	int (*invalidator)(struct address_space *))

void super_drop_pagecache(struct super_block *sb,
		int (*invalidate)(struct address_space *, pgoff_t, pgoff_t))

> +		invalidator(inode->i_mapping);

		invalidate(inode->i_mapping, 0, -1)

... then all the changes to mm/truncate.c and filemap.h go away.
Shiyang Ruan Feb. 18, 2023, 1:16 a.m. UTC | #2
在 2023/2/18 0:14, Matthew Wilcox 写道:
> On Fri, Feb 17, 2023 at 02:48:31PM +0000, Shiyang Ruan wrote:
>> -		invalidate_mapping_pages(inode->i_mapping, 0, -1);
>> -		iput(toput_inode);
>> -		toput_inode = inode;
>> -
>> -		cond_resched();
>> -		spin_lock(&sb->s_inode_list_lock);
>> -	}
>> -	spin_unlock(&sb->s_inode_list_lock);
>> -	iput(toput_inode);
>> +	super_drop_pagecache(sb, invalidate_inode_pages);
> 
> I thought I explained last time that you can do this with
> invalidate_mapping_pages() / invalidate_inode_pages2_range() ?
> Then you don't need to introduce invalidate_inode_pages().
> 
>> +void super_drop_pagecache(struct super_block *sb,
>> +	int (*invalidator)(struct address_space *))
> 
> void super_drop_pagecache(struct super_block *sb,
> 		int (*invalidate)(struct address_space *, pgoff_t, pgoff_t))
> 
>> +		invalidator(inode->i_mapping);
> 
> 		invalidate(inode->i_mapping, 0, -1)
> 
> ... then all the changes to mm/truncate.c and filemap.h go away.

Yes, I tried as you suggested, but I found that they don't have same 
type of return value.

int invalidate_inode_pages2_range(struct address_space *mapping,
				  pgoff_t start, pgoff_t end);

unsigned long invalidate_mapping_pages(struct address_space *mapping,
		pgoff_t start, pgoff_t end);


--
Thanks,
Ruan.
Matthew Wilcox (Oracle) Feb. 18, 2023, 6:27 p.m. UTC | #3
On Sat, Feb 18, 2023 at 09:16:43AM +0800, Shiyang Ruan wrote:
> 在 2023/2/18 0:14, Matthew Wilcox 写道:
> > On Fri, Feb 17, 2023 at 02:48:31PM +0000, Shiyang Ruan wrote:
> > > -		invalidate_mapping_pages(inode->i_mapping, 0, -1);
> > > -		iput(toput_inode);
> > > -		toput_inode = inode;
> > > -
> > > -		cond_resched();
> > > -		spin_lock(&sb->s_inode_list_lock);
> > > -	}
> > > -	spin_unlock(&sb->s_inode_list_lock);
> > > -	iput(toput_inode);
> > > +	super_drop_pagecache(sb, invalidate_inode_pages);
> > 
> > I thought I explained last time that you can do this with
> > invalidate_mapping_pages() / invalidate_inode_pages2_range() ?
> > Then you don't need to introduce invalidate_inode_pages().
> > 
> > > +void super_drop_pagecache(struct super_block *sb,
> > > +	int (*invalidator)(struct address_space *))
> > 
> > void super_drop_pagecache(struct super_block *sb,
> > 		int (*invalidate)(struct address_space *, pgoff_t, pgoff_t))
> > 
> > > +		invalidator(inode->i_mapping);
> > 
> > 		invalidate(inode->i_mapping, 0, -1)
> > 
> > ... then all the changes to mm/truncate.c and filemap.h go away.
> 
> Yes, I tried as you suggested, but I found that they don't have same type of
> return value.
> 
> int invalidate_inode_pages2_range(struct address_space *mapping,
> 				  pgoff_t start, pgoff_t end);
> 
> unsigned long invalidate_mapping_pages(struct address_space *mapping,
> 		pgoff_t start, pgoff_t end);

Oh, that's annoying.  Particularly annoying is that the return value
for invalidate_mapping_pages() is used by fs/inode.c to account for
the number of pages invalidate, and the return value for
invalidate_inode_pages2_range() is used by several filesystems
to know whether an error occurred.

Hm.  Shouldn't you be checking for an error from
invalidate_inode_pages2_range()?  Seems like it can return -EBUSY for
DAX entries.

With that in mind, the wrapper you actually want to exist is

static int invalidate_inode_pages_range(struct address_space *mapping,
				pgoff_t start, pgoff_t end)
{
	invalidate_mapping_pages(mapping, start, end);
	return 0;
}

Right?
Shiyang Ruan Feb. 20, 2023, 9:39 a.m. UTC | #4
在 2023/2/19 2:27, Matthew Wilcox 写道:
> On Sat, Feb 18, 2023 at 09:16:43AM +0800, Shiyang Ruan wrote:
>> 在 2023/2/18 0:14, Matthew Wilcox 写道:
>>> On Fri, Feb 17, 2023 at 02:48:31PM +0000, Shiyang Ruan wrote:
>>>> -		invalidate_mapping_pages(inode->i_mapping, 0, -1);
>>>> -		iput(toput_inode);
>>>> -		toput_inode = inode;
>>>> -
>>>> -		cond_resched();
>>>> -		spin_lock(&sb->s_inode_list_lock);
>>>> -	}
>>>> -	spin_unlock(&sb->s_inode_list_lock);
>>>> -	iput(toput_inode);
>>>> +	super_drop_pagecache(sb, invalidate_inode_pages);
>>>
>>> I thought I explained last time that you can do this with
>>> invalidate_mapping_pages() / invalidate_inode_pages2_range() ?
>>> Then you don't need to introduce invalidate_inode_pages().
>>>
>>>> +void super_drop_pagecache(struct super_block *sb,
>>>> +	int (*invalidator)(struct address_space *))
>>>
>>> void super_drop_pagecache(struct super_block *sb,
>>> 		int (*invalidate)(struct address_space *, pgoff_t, pgoff_t))
>>>
>>>> +		invalidator(inode->i_mapping);
>>>
>>> 		invalidate(inode->i_mapping, 0, -1)
>>>
>>> ... then all the changes to mm/truncate.c and filemap.h go away.
>>
>> Yes, I tried as you suggested, but I found that they don't have same type of
>> return value.
>>
>> int invalidate_inode_pages2_range(struct address_space *mapping,
>> 				  pgoff_t start, pgoff_t end);
>>
>> unsigned long invalidate_mapping_pages(struct address_space *mapping,
>> 		pgoff_t start, pgoff_t end);
> 
> Oh, that's annoying.  Particularly annoying is that the return value
> for invalidate_mapping_pages() is used by fs/inode.c to account for
> the number of pages invalidate, and the return value for
> invalidate_inode_pages2_range() is used by several filesystems
> to know whether an error occurred.
> 
> Hm.  Shouldn't you be checking for an error from
> invalidate_inode_pages2_range()?  Seems like it can return -EBUSY for
> DAX entries.
> 
> With that in mind, the wrapper you actually want to exist is
> 
> static int invalidate_inode_pages_range(struct address_space *mapping,
> 				pgoff_t start, pgoff_t end)
> {
> 	invalidate_mapping_pages(mapping, start, end);
> 	return 0;
> }
> 
> Right?

So, I should introduce this wrapper in fs/xfs/xfs_notify_failure.c 
because it is the only one who calls this wrapper.  Ok, got it!


--
Thanks,
Ruan.
Shiyang Ruan Feb. 20, 2023, 9:45 a.m. UTC | #5
在 2023/2/20 17:39, Shiyang Ruan 写道:
> 
> 
> 在 2023/2/19 2:27, Matthew Wilcox 写道:
>> On Sat, Feb 18, 2023 at 09:16:43AM +0800, Shiyang Ruan wrote:
>>> 在 2023/2/18 0:14, Matthew Wilcox 写道:
>>>> On Fri, Feb 17, 2023 at 02:48:31PM +0000, Shiyang Ruan wrote:
>>>>> -        invalidate_mapping_pages(inode->i_mapping, 0, -1);
>>>>> -        iput(toput_inode);
>>>>> -        toput_inode = inode;
>>>>> -
>>>>> -        cond_resched();
>>>>> -        spin_lock(&sb->s_inode_list_lock);
>>>>> -    }
>>>>> -    spin_unlock(&sb->s_inode_list_lock);
>>>>> -    iput(toput_inode);
>>>>> +    super_drop_pagecache(sb, invalidate_inode_pages);
>>>>
>>>> I thought I explained last time that you can do this with
>>>> invalidate_mapping_pages() / invalidate_inode_pages2_range() ?
>>>> Then you don't need to introduce invalidate_inode_pages().
>>>>
>>>>> +void super_drop_pagecache(struct super_block *sb,
>>>>> +    int (*invalidator)(struct address_space *))
>>>>
>>>> void super_drop_pagecache(struct super_block *sb,
>>>>         int (*invalidate)(struct address_space *, pgoff_t, pgoff_t))
>>>>
>>>>> +        invalidator(inode->i_mapping);
>>>>
>>>>         invalidate(inode->i_mapping, 0, -1)
>>>>
>>>> ... then all the changes to mm/truncate.c and filemap.h go away.
>>>
>>> Yes, I tried as you suggested, but I found that they don't have same 
>>> type of
>>> return value.
>>>
>>> int invalidate_inode_pages2_range(struct address_space *mapping,
>>>                   pgoff_t start, pgoff_t end);
>>>
>>> unsigned long invalidate_mapping_pages(struct address_space *mapping,
>>>         pgoff_t start, pgoff_t end);
>>
>> Oh, that's annoying.  Particularly annoying is that the return value
>> for invalidate_mapping_pages() is used by fs/inode.c to account for
>> the number of pages invalidate, and the return value for
>> invalidate_inode_pages2_range() is used by several filesystems
>> to know whether an error occurred.
>>
>> Hm.  Shouldn't you be checking for an error from
>> invalidate_inode_pages2_range()?  Seems like it can return -EBUSY for
>> DAX entries.
>>
>> With that in mind, the wrapper you actually want to exist is
>>
>> static int invalidate_inode_pages_range(struct address_space *mapping,
>>                 pgoff_t start, pgoff_t end)
>> {
>>     invalidate_mapping_pages(mapping, start, end);
>>     return 0;
>> }
>>
>> Right?
> 
> So, I should introduce this wrapper in fs/xfs/xfs_notify_failure.c 

Ahh... It's not fs/xfs/xfs_notify_failure.c, should be fs/drop_caches.c.

> because it is the only one who calls this wrapper.  Ok, got it!
> 
> 
> -- 
> Thanks,
> Ruan.
Dave Chinner Feb. 20, 2023, 9:25 p.m. UTC | #6
On Fri, Feb 17, 2023 at 02:48:31PM +0000, Shiyang Ruan wrote:
> xfs_notify_failure.c requires a method to invalidate all dax mappings.
> drop_pagecache_sb() can do this but it is a static function and only
> build with CONFIG_SYSCTL.  Now, move its implementation into super.c and
> call it super_drop_pagecache().  Use its second argument as invalidator
> so that we can choose which invalidate method to use.
> 
> Signed-off-by: Shiyang Ruan <ruansy.fnst@fujitsu.com>

I got no repsonse last time, so I'll just post a link to the
concerns I stated about this:

https://lore.kernel.org/linux-xfs/20230205215000.GT360264@dread.disaster.area/

-Dave.
Shiyang Ruan Feb. 21, 2023, 1:57 a.m. UTC | #7
在 2023/2/21 5:25, Dave Chinner 写道:
> On Fri, Feb 17, 2023 at 02:48:31PM +0000, Shiyang Ruan wrote:
>> xfs_notify_failure.c requires a method to invalidate all dax mappings.
>> drop_pagecache_sb() can do this but it is a static function and only
>> build with CONFIG_SYSCTL.  Now, move its implementation into super.c and
>> call it super_drop_pagecache().  Use its second argument as invalidator
>> so that we can choose which invalidate method to use.
>>
>> Signed-off-by: Shiyang Ruan <ruansy.fnst@fujitsu.com>
> 
> I got no repsonse last time, so I'll just post a link to the
> concerns I stated about this:
> 
> https://lore.kernel.org/linux-xfs/20230205215000.GT360264@dread.disaster.area/

Please see patch 3.  I changed the code: freeze the fs, then drop its 
caches.


--
Thanks,
Ruan.

> 
> -Dave.
Dave Chinner Feb. 26, 2023, 11:50 p.m. UTC | #8
On Tue, Feb 21, 2023 at 09:57:52AM +0800, Shiyang Ruan wrote:
> 
> 
> 在 2023/2/21 5:25, Dave Chinner 写道:
> > On Fri, Feb 17, 2023 at 02:48:31PM +0000, Shiyang Ruan wrote:
> > > xfs_notify_failure.c requires a method to invalidate all dax mappings.
> > > drop_pagecache_sb() can do this but it is a static function and only
> > > build with CONFIG_SYSCTL.  Now, move its implementation into super.c and
> > > call it super_drop_pagecache().  Use its second argument as invalidator
> > > so that we can choose which invalidate method to use.
> > > 
> > > Signed-off-by: Shiyang Ruan <ruansy.fnst@fujitsu.com>
> > 
> > I got no repsonse last time, so I'll just post a link to the
> > concerns I stated about this:
> > 
> > https://lore.kernel.org/linux-xfs/20230205215000.GT360264@dread.disaster.area/
> 
> Please see patch 3.  I changed the code: freeze the fs, then drop its
> caches.

Neither the patch nor the cover letter have a changelog in them
that mention you changed anything. Please, at least, keep a change
log in the cover letter so that people know what has changed from
version to version without having to look at every patch and line of
code again.

-Dave.
diff mbox series

Patch

diff --git a/fs/drop_caches.c b/fs/drop_caches.c
index e619c31b6bd9..f88ce339b635 100644
--- a/fs/drop_caches.c
+++ b/fs/drop_caches.c
@@ -17,34 +17,7 @@  int sysctl_drop_caches;
 
 static void drop_pagecache_sb(struct super_block *sb, void *unused)
 {
-	struct inode *inode, *toput_inode = NULL;
-
-	spin_lock(&sb->s_inode_list_lock);
-	list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
-		spin_lock(&inode->i_lock);
-		/*
-		 * We must skip inodes in unusual state. We may also skip
-		 * inodes without pages but we deliberately won't in case
-		 * we need to reschedule to avoid softlockups.
-		 */
-		if ((inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) ||
-		    (mapping_empty(inode->i_mapping) && !need_resched())) {
-			spin_unlock(&inode->i_lock);
-			continue;
-		}
-		__iget(inode);
-		spin_unlock(&inode->i_lock);
-		spin_unlock(&sb->s_inode_list_lock);
-
-		invalidate_mapping_pages(inode->i_mapping, 0, -1);
-		iput(toput_inode);
-		toput_inode = inode;
-
-		cond_resched();
-		spin_lock(&sb->s_inode_list_lock);
-	}
-	spin_unlock(&sb->s_inode_list_lock);
-	iput(toput_inode);
+	super_drop_pagecache(sb, invalidate_inode_pages);
 }
 
 int drop_caches_sysctl_handler(struct ctl_table *table, int write,
diff --git a/fs/super.c b/fs/super.c
index 12c08cb20405..a403243b5513 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -36,6 +36,7 @@ 
 #include <linux/lockdep.h>
 #include <linux/user_namespace.h>
 #include <linux/fs_context.h>
+#include <linux/pagemap.h>
 #include <uapi/linux/mount.h>
 #include "internal.h"
 
@@ -678,6 +679,48 @@  void drop_super_exclusive(struct super_block *sb)
 }
 EXPORT_SYMBOL(drop_super_exclusive);
 
+/**
+ *	super_drop_pagecache - drop all page caches of a filesystem
+ *	@sb: superblock to invalidate
+ *	@arg: invalidate method, such as invalidate_inode_pages(),
+ *	        invalidate_inode_pages2()
+ *
+ *	Scans the inodes of a filesystem, drop all page caches.
+ */
+void super_drop_pagecache(struct super_block *sb,
+	int (*invalidator)(struct address_space *))
+{
+	struct inode *inode, *toput_inode = NULL;
+
+	spin_lock(&sb->s_inode_list_lock);
+	list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
+		spin_lock(&inode->i_lock);
+		/*
+		 * We must skip inodes in unusual state. We may also skip
+		 * inodes without pages but we deliberately won't in case
+		 * we need to reschedule to avoid softlockups.
+		 */
+		if ((inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) ||
+		    (mapping_empty(inode->i_mapping) && !need_resched())) {
+			spin_unlock(&inode->i_lock);
+			continue;
+		}
+		__iget(inode);
+		spin_unlock(&inode->i_lock);
+		spin_unlock(&sb->s_inode_list_lock);
+
+		invalidator(inode->i_mapping);
+		iput(toput_inode);
+		toput_inode = inode;
+
+		cond_resched();
+		spin_lock(&sb->s_inode_list_lock);
+	}
+	spin_unlock(&sb->s_inode_list_lock);
+	iput(toput_inode);
+}
+EXPORT_SYMBOL(super_drop_pagecache);
+
 static void __iterate_supers(void (*f)(struct super_block *))
 {
 	struct super_block *sb, *p = NULL;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index c1769a2c5d70..fdcaa9bf85dd 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -3308,6 +3308,8 @@  extern struct super_block *get_super(struct block_device *);
 extern struct super_block *get_active_super(struct block_device *bdev);
 extern void drop_super(struct super_block *sb);
 extern void drop_super_exclusive(struct super_block *sb);
+void super_drop_pagecache(struct super_block *sb,
+			  int (*invalidator)(struct address_space *));
 extern void iterate_supers(void (*)(struct super_block *, void *), void *);
 extern void iterate_supers_type(struct file_system_type *,
 			        void (*)(struct super_block *, void *), void *);
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 29e1f9e76eb6..d0a180268baa 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -27,6 +27,7 @@  static inline void invalidate_remote_inode(struct inode *inode)
 	    S_ISLNK(inode->i_mode))
 		invalidate_mapping_pages(inode->i_mapping, 0, -1);
 }
+int invalidate_inode_pages(struct address_space *mapping);
 int invalidate_inode_pages2(struct address_space *mapping);
 int invalidate_inode_pages2_range(struct address_space *mapping,
 		pgoff_t start, pgoff_t end);
diff --git a/mm/truncate.c b/mm/truncate.c
index 7b4ea4c4a46b..131f2ab2d566 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -540,12 +540,13 @@  unsigned long invalidate_mapping_pagevec(struct address_space *mapping,
 }
 
 /**
- * invalidate_mapping_pages - Invalidate all clean, unlocked cache of one inode
+ * invalidate_mapping_pages - Invalidate range of clean, unlocked cache of one
+ *			      inode
  * @mapping: the address_space which holds the cache to invalidate
  * @start: the offset 'from' which to invalidate
  * @end: the offset 'to' which to invalidate (inclusive)
  *
- * This function removes pages that are clean, unmapped and unlocked,
+ * This function removes range of pages that are clean, unmapped and unlocked,
  * as well as shadow entries. It will not block on IO activity.
  *
  * If you want to remove all the pages of one inode, regardless of
@@ -560,6 +561,21 @@  unsigned long invalidate_mapping_pages(struct address_space *mapping,
 }
 EXPORT_SYMBOL(invalidate_mapping_pages);
 
+/**
+ * invalidate_inode_pages - Invalidate all clean, unlocked cache of one inode
+ * @mapping: the address_space which holds the cache to invalidate
+ *
+ * This function removes all pages that are clean, unmapped and unlocked,
+ * as well as shadow entries. It will not block on IO activity.
+ */
+int invalidate_inode_pages(struct address_space *mapping)
+{
+	invalidate_mapping_pages(mapping, 0, -1);
+
+	return 0;
+}
+EXPORT_SYMBOL(invalidate_inode_pages);
+
 /*
  * This is like invalidate_inode_page(), except it ignores the page's
  * refcount.  We do this because invalidate_inode_pages2() needs stronger