diff mbox series

[RFC] fuse: add new function to invalidate cache for all inodes

Message ID 20250115163253.8402-1-luis@igalia.com (mailing list archive)
State New
Headers show
Series [RFC] fuse: add new function to invalidate cache for all inodes | expand

Commit Message

Luis Henriques Jan. 15, 2025, 4:32 p.m. UTC
Currently userspace is able to notify the kernel to invalidate the cache
for an inode.  This means that, if all the inodes in a filesystem need to
be invalidated, then userspace needs to iterate through all of them and do
this kernel notification separately.

This patch adds a new option that allows userspace to invalidate all the
inodes with a single notification operation.  In addition to invalidate all
the inodes, it also shrinks the superblock dcache.

Signed-off-by: Luis Henriques <luis@igalia.com>
---
Just an additional note that this patch could eventually be simplified if
Dave Chinner patch to iterate through the superblock inodes[1] is merged.

[1] https://lore.kernel.org/r/20241002014017.3801899-3-david@fromorbit.com

 fs/fuse/inode.c           | 53 +++++++++++++++++++++++++++++++++++++++
 include/uapi/linux/fuse.h |  3 +++
 2 files changed, 56 insertions(+)

Comments

Bernd Schubert Jan. 15, 2025, 4:43 p.m. UTC | #1
On 1/15/25 17:32, Luis Henriques wrote:
> Currently userspace is able to notify the kernel to invalidate the cache
> for an inode.  This means that, if all the inodes in a filesystem need to
> be invalidated, then userspace needs to iterate through all of them and do
> this kernel notification separately.
> 
> This patch adds a new option that allows userspace to invalidate all the
> inodes with a single notification operation.  In addition to invalidate all
> the inodes, it also shrinks the superblock dcache.

Out of interest, what is the use case?

> 
> Signed-off-by: Luis Henriques <luis@igalia.com>
> ---
> Just an additional note that this patch could eventually be simplified if
> Dave Chinner patch to iterate through the superblock inodes[1] is merged.
> 
> [1] https://lore.kernel.org/r/20241002014017.3801899-3-david@fromorbit.com
> 
>  fs/fuse/inode.c           | 53 +++++++++++++++++++++++++++++++++++++++
>  include/uapi/linux/fuse.h |  3 +++
>  2 files changed, 56 insertions(+)
> 
> diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
> index 3ce4f4e81d09..1fd9a5f303da 100644
> --- a/fs/fuse/inode.c
> +++ b/fs/fuse/inode.c
> @@ -546,6 +546,56 @@ struct inode *fuse_ilookup(struct fuse_conn *fc, u64 nodeid,
>  	return NULL;
>  }
>  
> +static int fuse_reverse_inval_all(struct fuse_conn *fc)
> +{
> +	struct fuse_mount *fm;
> +	struct super_block *sb;
> +	struct inode *inode, *old_inode = NULL;
> +	struct fuse_inode *fi;
> +
> +	inode = fuse_ilookup(fc, FUSE_ROOT_ID, NULL);
> +	if (!inode)
> +		return -ENOENT;
> +
> +	fm = get_fuse_mount(inode);
> +	iput(inode);
> +	if (!fm)
> +		return -ENOENT;
> +	sb = fm->sb;
> +
> +	spin_lock(&sb->s_inode_list_lock);
> +	list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {

Maybe list_for_each_entry_safe() and then you can iput(inode) before the
next iteration?

> +		spin_lock(&inode->i_lock);
> +		if ((inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) ||
> +		    !atomic_read(&inode->i_count)) {
> +			spin_unlock(&inode->i_lock);
> +			continue;
> +		}
> +
> +		__iget(inode);
> +		spin_unlock(&inode->i_lock);
> +		spin_unlock(&sb->s_inode_list_lock);
> +		iput(old_inode);
> +
> +		fi = get_fuse_inode(inode);
> +		spin_lock(&fi->lock);
> +		fi->attr_version = atomic64_inc_return(&fm->fc->attr_version);
> +		spin_unlock(&fi->lock);
> +		fuse_invalidate_attr(inode);
> +		forget_all_cached_acls(inode);
> +
> +		old_inode = inode;
> +		cond_resched();
> +		spin_lock(&sb->s_inode_list_lock);
> +	}
> +	spin_unlock(&sb->s_inode_list_lock);
> +	iput(old_inode);


Thanks,
Bernd
Luis Henriques Jan. 15, 2025, 6:07 p.m. UTC | #2
Hi Bernd,

On Wed, Jan 15 2025, Bernd Schubert wrote:

> On 1/15/25 17:32, Luis Henriques wrote:
>> Currently userspace is able to notify the kernel to invalidate the cache
>> for an inode.  This means that, if all the inodes in a filesystem need to
>> be invalidated, then userspace needs to iterate through all of them and do
>> this kernel notification separately.
>> 
>> This patch adds a new option that allows userspace to invalidate all the
>> inodes with a single notification operation.  In addition to invalidate all
>> the inodes, it also shrinks the superblock dcache.
>
> Out of interest, what is the use case?

This is for a read-only filesystem.  However, the filesystem objects
(files, directories, ...) may change dramatically in an atomic way, so
that a totally different set of objects replaces the old one.

Obviously, this patch would help with the process of getting rid of the
old generation of the filesystem.

>> 
>> Signed-off-by: Luis Henriques <luis@igalia.com>
>> ---
>> Just an additional note that this patch could eventually be simplified if
>> Dave Chinner patch to iterate through the superblock inodes[1] is merged.
>> 
>> [1] https://lore.kernel.org/r/20241002014017.3801899-3-david@fromorbit.com
>> 
>>  fs/fuse/inode.c           | 53 +++++++++++++++++++++++++++++++++++++++
>>  include/uapi/linux/fuse.h |  3 +++
>>  2 files changed, 56 insertions(+)
>> 
>> diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
>> index 3ce4f4e81d09..1fd9a5f303da 100644
>> --- a/fs/fuse/inode.c
>> +++ b/fs/fuse/inode.c
>> @@ -546,6 +546,56 @@ struct inode *fuse_ilookup(struct fuse_conn *fc, u64 nodeid,
>>  	return NULL;
>>  }
>>  
>> +static int fuse_reverse_inval_all(struct fuse_conn *fc)
>> +{
>> +	struct fuse_mount *fm;
>> +	struct super_block *sb;
>> +	struct inode *inode, *old_inode = NULL;
>> +	struct fuse_inode *fi;
>> +
>> +	inode = fuse_ilookup(fc, FUSE_ROOT_ID, NULL);
>> +	if (!inode)
>> +		return -ENOENT;
>> +
>> +	fm = get_fuse_mount(inode);
>> +	iput(inode);
>> +	if (!fm)
>> +		return -ENOENT;
>> +	sb = fm->sb;
>> +
>> +	spin_lock(&sb->s_inode_list_lock);
>> +	list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
>
> Maybe list_for_each_entry_safe() and then you can iput(inode) before the
> next iteration?

I can rework this loop, but are you sure it's safe to use that?  (Genuine
question!)

I could only find two places where list_for_each_entry_safe() is being
used to walk through the sb inodes.  And they both use an auxiliary list
that holds the inodes to be processed later.  All other places use the
pattern I'm following here.

Or did I misunderstood your suggestion?

Cheers,
Bernd Schubert Jan. 15, 2025, 6:17 p.m. UTC | #3
On 1/15/25 19:07, Luis Henriques wrote:
> Hi Bernd,
> 
> On Wed, Jan 15 2025, Bernd Schubert wrote:
> 
>> On 1/15/25 17:32, Luis Henriques wrote:
>>> Currently userspace is able to notify the kernel to invalidate the cache
>>> for an inode.  This means that, if all the inodes in a filesystem need to
>>> be invalidated, then userspace needs to iterate through all of them and do
>>> this kernel notification separately.
>>>
>>> This patch adds a new option that allows userspace to invalidate all the
>>> inodes with a single notification operation.  In addition to invalidate all
>>> the inodes, it also shrinks the superblock dcache.
>>
>> Out of interest, what is the use case?
> 
> This is for a read-only filesystem.  However, the filesystem objects
> (files, directories, ...) may change dramatically in an atomic way, so
> that a totally different set of objects replaces the old one.
> 
> Obviously, this patch would help with the process of getting rid of the
> old generation of the filesystem.
> 
>>>
>>> Signed-off-by: Luis Henriques <luis@igalia.com>
>>> ---
>>> Just an additional note that this patch could eventually be simplified if
>>> Dave Chinner patch to iterate through the superblock inodes[1] is merged.
>>>
>>> [1] https://lore.kernel.org/r/20241002014017.3801899-3-david@fromorbit.com
>>>
>>>  fs/fuse/inode.c           | 53 +++++++++++++++++++++++++++++++++++++++
>>>  include/uapi/linux/fuse.h |  3 +++
>>>  2 files changed, 56 insertions(+)
>>>
>>> diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
>>> index 3ce4f4e81d09..1fd9a5f303da 100644
>>> --- a/fs/fuse/inode.c
>>> +++ b/fs/fuse/inode.c
>>> @@ -546,6 +546,56 @@ struct inode *fuse_ilookup(struct fuse_conn *fc, u64 nodeid,
>>>  	return NULL;
>>>  }
>>>  
>>> +static int fuse_reverse_inval_all(struct fuse_conn *fc)
>>> +{
>>> +	struct fuse_mount *fm;
>>> +	struct super_block *sb;
>>> +	struct inode *inode, *old_inode = NULL;
>>> +	struct fuse_inode *fi;
>>> +
>>> +	inode = fuse_ilookup(fc, FUSE_ROOT_ID, NULL);
>>> +	if (!inode)
>>> +		return -ENOENT;
>>> +
>>> +	fm = get_fuse_mount(inode);
>>> +	iput(inode);
>>> +	if (!fm)
>>> +		return -ENOENT;
>>> +	sb = fm->sb;
>>> +
>>> +	spin_lock(&sb->s_inode_list_lock);
>>> +	list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
>>
>> Maybe list_for_each_entry_safe() and then you can iput(inode) before the
>> next iteration?
> 
> I can rework this loop, but are you sure it's safe to use that?  (Genuine
> question!)
> 
> I could only find two places where list_for_each_entry_safe() is being
> used to walk through the sb inodes.  And they both use an auxiliary list
> that holds the inodes to be processed later.  All other places use the
> pattern I'm following here.
> 
> Or did I misunderstood your suggestion?


Actually my mistake, yeah you cannot use list_for_each_entry_safe() 
because you are giving up the list lock and the next entry, which
is already obtained by _safe might not be valid anymore.

Sorry for the noise!


Thanks,
Bernd
diff mbox series

Patch

diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 3ce4f4e81d09..1fd9a5f303da 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -546,6 +546,56 @@  struct inode *fuse_ilookup(struct fuse_conn *fc, u64 nodeid,
 	return NULL;
 }
 
+static int fuse_reverse_inval_all(struct fuse_conn *fc)
+{
+	struct fuse_mount *fm;
+	struct super_block *sb;
+	struct inode *inode, *old_inode = NULL;
+	struct fuse_inode *fi;
+
+	inode = fuse_ilookup(fc, FUSE_ROOT_ID, NULL);
+	if (!inode)
+		return -ENOENT;
+
+	fm = get_fuse_mount(inode);
+	iput(inode);
+	if (!fm)
+		return -ENOENT;
+	sb = fm->sb;
+
+	spin_lock(&sb->s_inode_list_lock);
+	list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
+		spin_lock(&inode->i_lock);
+		if ((inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) ||
+		    !atomic_read(&inode->i_count)) {
+			spin_unlock(&inode->i_lock);
+			continue;
+		}
+
+		__iget(inode);
+		spin_unlock(&inode->i_lock);
+		spin_unlock(&sb->s_inode_list_lock);
+		iput(old_inode);
+
+		fi = get_fuse_inode(inode);
+		spin_lock(&fi->lock);
+		fi->attr_version = atomic64_inc_return(&fm->fc->attr_version);
+		spin_unlock(&fi->lock);
+		fuse_invalidate_attr(inode);
+		forget_all_cached_acls(inode);
+
+		old_inode = inode;
+		cond_resched();
+		spin_lock(&sb->s_inode_list_lock);
+	}
+	spin_unlock(&sb->s_inode_list_lock);
+	iput(old_inode);
+
+	shrink_dcache_sb(sb);
+
+	return 0;
+}
+
 int fuse_reverse_inval_inode(struct fuse_conn *fc, u64 nodeid,
 			     loff_t offset, loff_t len)
 {
@@ -554,6 +604,9 @@  int fuse_reverse_inval_inode(struct fuse_conn *fc, u64 nodeid,
 	pgoff_t pg_start;
 	pgoff_t pg_end;
 
+	if (nodeid == FUSE_INVAL_ALL_INODES)
+		return fuse_reverse_inval_all(fc);
+
 	inode = fuse_ilookup(fc, nodeid, NULL);
 	if (!inode)
 		return -ENOENT;
diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h
index f1e99458e29e..e9e78292d107 100644
--- a/include/uapi/linux/fuse.h
+++ b/include/uapi/linux/fuse.h
@@ -658,6 +658,9 @@  enum fuse_notify_code {
 	FUSE_NOTIFY_CODE_MAX,
 };
 
+/* The nodeid to request to invalidate all inodes */
+#define FUSE_INVAL_ALL_INODES 0
+
 /* The read buffer is required to be at least 8k, but may be much larger */
 #define FUSE_MIN_READ_BUFFER 8192