diff mbox series

[4/7] kernfs: Skip kernfs_drain_open_files() more aggressively

Message ID 20220820000550.367085-5-tj@kernel.org (mailing list archive)
State New, archived
Headers show
Series [1/7] kernfs: Simply by replacing kernfs_deref_open_node() with of_on() | expand

Commit Message

Tejun Heo Aug. 20, 2022, 12:05 a.m. UTC
Track the number of mmapped files and files that need to be released and
skip kernfs_drain_open_file() if both are zero, which are the precise
conditions which require draining open_files. The early exit test is
factored into kernfs_should_drain_open_files() which is now tested by
kernfs_drain_open_files()'s caller - kernfs_drain().

This isn't a meaningful optimization on its own but will enable future
stand-alone kernfs_deactivate() implementation.

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 fs/kernfs/dir.c             |  3 ++-
 fs/kernfs/file.c            | 51 +++++++++++++++++++++++++------------
 fs/kernfs/kernfs-internal.h |  1 +
 3 files changed, 38 insertions(+), 17 deletions(-)

Comments

Chengming Zhou Aug. 23, 2022, 5:27 a.m. UTC | #1
On 2022/8/20 08:05, Tejun Heo wrote:
> Track the number of mmapped files and files that need to be released and
> skip kernfs_drain_open_file() if both are zero, which are the precise
> conditions which require draining open_files. The early exit test is
> factored into kernfs_should_drain_open_files() which is now tested by
> kernfs_drain_open_files()'s caller - kernfs_drain().
> 
> This isn't a meaningful optimization on its own but will enable future
> stand-alone kernfs_deactivate() implementation.
> 
> Signed-off-by: Tejun Heo <tj@kernel.org>
> ---
>  fs/kernfs/dir.c             |  3 ++-
>  fs/kernfs/file.c            | 51 +++++++++++++++++++++++++------------
>  fs/kernfs/kernfs-internal.h |  1 +
>  3 files changed, 38 insertions(+), 17 deletions(-)
> 
> diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c
> index 1cc88ba6de90..8ae44db920d4 100644
> --- a/fs/kernfs/dir.c
> +++ b/fs/kernfs/dir.c
> @@ -489,7 +489,8 @@ static void kernfs_drain(struct kernfs_node *kn)
>  		rwsem_release(&kn->dep_map, _RET_IP_);
>  	}
>  
> -	kernfs_drain_open_files(kn);
> +	if (kernfs_should_drain_open_files(kn))
> +		kernfs_drain_open_files(kn);
>  
>  	down_write(&root->kernfs_rwsem);
>  }
> diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c
> index 7060a2a714b8..b510589af427 100644
> --- a/fs/kernfs/file.c
> +++ b/fs/kernfs/file.c
> @@ -23,6 +23,8 @@ struct kernfs_open_node {
>  	atomic_t		event;
>  	wait_queue_head_t	poll;
>  	struct list_head	files; /* goes through kernfs_open_file.list */
> +	unsigned int		nr_mmapped;
> +	unsigned int		nr_to_release;
>  };
>  
>  /*
> @@ -527,6 +529,7 @@ static int kernfs_fop_mmap(struct file *file, struct vm_area_struct *vma)
>  
>  	rc = 0;
>  	of->mmapped = true;
> +	of_on(of)->nr_mmapped++;
>  	of->vm_ops = vma->vm_ops;
>  	vma->vm_ops = &kernfs_vm_ops;
>  out_put:
> @@ -574,6 +577,8 @@ static int kernfs_get_open_node(struct kernfs_node *kn,
>  	}
>  
>  	list_add_tail(&of->list, &on->files);
> +	if (kn->flags & KERNFS_HAS_RELEASE)
> +		on->nr_to_release++;
>  
>  	mutex_unlock(mutex);
>  	return 0;
> @@ -606,8 +611,12 @@ static void kernfs_unlink_open_file(struct kernfs_node *kn,
>  		return;
>  	}
>  
> -	if (of)
> +	if (of) {
> +		WARN_ON_ONCE((kn->flags & KERNFS_HAS_RELEASE) && !of->released);

kernfs_unlink_open_file() is also used in error case "err_put_node" in kernfs_fop_open(),
which should also dec the on->nr_to_release?

Thanks.

> +		if (of->mmapped)
> +			on->nr_mmapped--;
>  		list_del(&of->list);
> +	}
>  
>  	if (list_empty(&on->files)) {
>  		rcu_assign_pointer(kn->attr.open, NULL);
> @@ -766,6 +775,7 @@ static void kernfs_release_file(struct kernfs_node *kn,
>  		 */
>  		kn->attr.ops->release(of);
>  		of->released = true;
> +		of_on(of)->nr_to_release--;
>  	}
>  }
>  
> @@ -790,25 +800,30 @@ static int kernfs_fop_release(struct inode *inode, struct file *filp)
>  	return 0;
>  }
>  
> -void kernfs_drain_open_files(struct kernfs_node *kn)
> +bool kernfs_should_drain_open_files(struct kernfs_node *kn)
>  {
>  	struct kernfs_open_node *on;
> -	struct kernfs_open_file *of;
> -	struct mutex *mutex;
> -
> -	if (!(kn->flags & (KERNFS_HAS_MMAP | KERNFS_HAS_RELEASE)))
> -		return;
> +	bool ret;
>  
>  	/*
> -	 * lockless opportunistic check is safe below because no one is adding to
> -	 * ->attr.open at this point of time. This check allows early bail out
> -	 * if ->attr.open is already NULL. kernfs_unlink_open_file makes
> -	 * ->attr.open NULL only while holding kernfs_open_file_mutex so below
> -	 * check under kernfs_open_file_mutex_ptr(kn) will ensure bailing out if
> -	 * ->attr.open became NULL while waiting for the mutex.
> +	 * @kn being deactivated guarantees that @kn->attr.open can't change
> +	 * beneath us making the lockless test below safe.
>  	 */
> -	if (!rcu_access_pointer(kn->attr.open))
> -		return;
> +	WARN_ON_ONCE(atomic_read(&kn->active) != KN_DEACTIVATED_BIAS);
> +
> +	rcu_read_lock();
> +	on = rcu_dereference(kn->attr.open);
> +	ret = on && (on->nr_mmapped || on->nr_to_release);
> +	rcu_read_unlock();
> +
> +	return ret;
> +}
> +
> +void kernfs_drain_open_files(struct kernfs_node *kn)
> +{
> +	struct kernfs_open_node *on;
> +	struct kernfs_open_file *of;
> +	struct mutex *mutex;
>  
>  	mutex = kernfs_open_file_mutex_lock(kn);
>  	on = kernfs_deref_open_node_locked(kn);
> @@ -820,13 +835,17 @@ void kernfs_drain_open_files(struct kernfs_node *kn)
>  	list_for_each_entry(of, &on->files, list) {
>  		struct inode *inode = file_inode(of->file);
>  
> -		if (kn->flags & KERNFS_HAS_MMAP)
> +		if (of->mmapped) {
>  			unmap_mapping_range(inode->i_mapping, 0, 0, 1);
> +			of->mmapped = false;
> +			on->nr_mmapped--;
> +		}
>  
>  		if (kn->flags & KERNFS_HAS_RELEASE)
>  			kernfs_release_file(kn, of);
>  	}
>  
> +	WARN_ON_ONCE(on->nr_mmapped || on->nr_to_release);
>  	mutex_unlock(mutex);
>  }
>  
> diff --git a/fs/kernfs/kernfs-internal.h b/fs/kernfs/kernfs-internal.h
> index 3ae214d02d44..fc5821effd97 100644
> --- a/fs/kernfs/kernfs-internal.h
> +++ b/fs/kernfs/kernfs-internal.h
> @@ -157,6 +157,7 @@ struct kernfs_node *kernfs_new_node(struct kernfs_node *parent,
>   */
>  extern const struct file_operations kernfs_file_fops;
>  
> +bool kernfs_should_drain_open_files(struct kernfs_node *kn);
>  void kernfs_drain_open_files(struct kernfs_node *kn);
>  
>  /*
Tejun Heo Aug. 23, 2022, 7:37 p.m. UTC | #2
On Tue, Aug 23, 2022 at 01:27:22PM +0800, Chengming Zhou wrote:
> > +	if (of) {
> > +		WARN_ON_ONCE((kn->flags & KERNFS_HAS_RELEASE) && !of->released);
> 
> kernfs_unlink_open_file() is also used in error case "err_put_node" in kernfs_fop_open(),
> which should also dec the on->nr_to_release?

Ah, right you're. Will fix. Thanks for pointing it out.
Chengming Zhou Aug. 25, 2022, 12:11 p.m. UTC | #3
On 2022/8/20 08:05, Tejun Heo wrote:
> Track the number of mmapped files and files that need to be released and
> skip kernfs_drain_open_file() if both are zero, which are the precise
> conditions which require draining open_files. The early exit test is
> factored into kernfs_should_drain_open_files() which is now tested by
> kernfs_drain_open_files()'s caller - kernfs_drain().
> 
> This isn't a meaningful optimization on its own but will enable future
> stand-alone kernfs_deactivate() implementation.
> 
> Signed-off-by: Tejun Heo <tj@kernel.org>
> ---
>  fs/kernfs/dir.c             |  3 ++-
>  fs/kernfs/file.c            | 51 +++++++++++++++++++++++++------------
>  fs/kernfs/kernfs-internal.h |  1 +
>  3 files changed, 38 insertions(+), 17 deletions(-)
> 
> diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c
> index 1cc88ba6de90..8ae44db920d4 100644
> --- a/fs/kernfs/dir.c
> +++ b/fs/kernfs/dir.c
> @@ -489,7 +489,8 @@ static void kernfs_drain(struct kernfs_node *kn)
>  		rwsem_release(&kn->dep_map, _RET_IP_);
>  	}
>  
> -	kernfs_drain_open_files(kn);
> +	if (kernfs_should_drain_open_files(kn))
> +		kernfs_drain_open_files(kn);
>  
>  	down_write(&root->kernfs_rwsem);
>  }
> diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c
> index 7060a2a714b8..b510589af427 100644
> --- a/fs/kernfs/file.c
> +++ b/fs/kernfs/file.c
> @@ -23,6 +23,8 @@ struct kernfs_open_node {
>  	atomic_t		event;
>  	wait_queue_head_t	poll;
>  	struct list_head	files; /* goes through kernfs_open_file.list */
> +	unsigned int		nr_mmapped;
> +	unsigned int		nr_to_release;
>  };
>  
>  /*
> @@ -527,6 +529,7 @@ static int kernfs_fop_mmap(struct file *file, struct vm_area_struct *vma)
>  
>  	rc = 0;
>  	of->mmapped = true;
> +	of_on(of)->nr_mmapped++;
>  	of->vm_ops = vma->vm_ops;
>  	vma->vm_ops = &kernfs_vm_ops;
>  out_put:
> @@ -574,6 +577,8 @@ static int kernfs_get_open_node(struct kernfs_node *kn,
>  	}

The current code use kmalloc() to alloc kernfs_open_node, leave nr_to_release and
nr_mmapped uninitialized.

Found by below stress test:

```
cd /sys/fs/cgroup

while true; do echo 0 > cgroup.pressure; echo 1 > cgroup.pressure; done

while true; do cat *.pressure; done

```

Thanks.

>  
>  	list_add_tail(&of->list, &on->files);
> +	if (kn->flags & KERNFS_HAS_RELEASE)
> +		on->nr_to_release++;
>  
>  	mutex_unlock(mutex);
>  	return 0;
> @@ -606,8 +611,12 @@ static void kernfs_unlink_open_file(struct kernfs_node *kn,
>  		return;
>  	}
>  
> -	if (of)
> +	if (of) {
> +		WARN_ON_ONCE((kn->flags & KERNFS_HAS_RELEASE) && !of->released);
> +		if (of->mmapped)
> +			on->nr_mmapped--;
>  		list_del(&of->list);
> +	}
>  
>  	if (list_empty(&on->files)) {
>  		rcu_assign_pointer(kn->attr.open, NULL);
> @@ -766,6 +775,7 @@ static void kernfs_release_file(struct kernfs_node *kn,
>  		 */
>  		kn->attr.ops->release(of);
>  		of->released = true;
> +		of_on(of)->nr_to_release--;
>  	}
>  }
>  
> @@ -790,25 +800,30 @@ static int kernfs_fop_release(struct inode *inode, struct file *filp)
>  	return 0;
>  }
>  
> -void kernfs_drain_open_files(struct kernfs_node *kn)
> +bool kernfs_should_drain_open_files(struct kernfs_node *kn)
>  {
>  	struct kernfs_open_node *on;
> -	struct kernfs_open_file *of;
> -	struct mutex *mutex;
> -
> -	if (!(kn->flags & (KERNFS_HAS_MMAP | KERNFS_HAS_RELEASE)))
> -		return;
> +	bool ret;
>  
>  	/*
> -	 * lockless opportunistic check is safe below because no one is adding to
> -	 * ->attr.open at this point of time. This check allows early bail out
> -	 * if ->attr.open is already NULL. kernfs_unlink_open_file makes
> -	 * ->attr.open NULL only while holding kernfs_open_file_mutex so below
> -	 * check under kernfs_open_file_mutex_ptr(kn) will ensure bailing out if
> -	 * ->attr.open became NULL while waiting for the mutex.
> +	 * @kn being deactivated guarantees that @kn->attr.open can't change
> +	 * beneath us making the lockless test below safe.
>  	 */
> -	if (!rcu_access_pointer(kn->attr.open))
> -		return;
> +	WARN_ON_ONCE(atomic_read(&kn->active) != KN_DEACTIVATED_BIAS);
> +
> +	rcu_read_lock();
> +	on = rcu_dereference(kn->attr.open);
> +	ret = on && (on->nr_mmapped || on->nr_to_release);
> +	rcu_read_unlock();
> +
> +	return ret;
> +}
> +
> +void kernfs_drain_open_files(struct kernfs_node *kn)
> +{
> +	struct kernfs_open_node *on;
> +	struct kernfs_open_file *of;
> +	struct mutex *mutex;
>  
>  	mutex = kernfs_open_file_mutex_lock(kn);
>  	on = kernfs_deref_open_node_locked(kn);
> @@ -820,13 +835,17 @@ void kernfs_drain_open_files(struct kernfs_node *kn)
>  	list_for_each_entry(of, &on->files, list) {
>  		struct inode *inode = file_inode(of->file);
>  
> -		if (kn->flags & KERNFS_HAS_MMAP)
> +		if (of->mmapped) {
>  			unmap_mapping_range(inode->i_mapping, 0, 0, 1);
> +			of->mmapped = false;
> +			on->nr_mmapped--;
> +		}
>  
>  		if (kn->flags & KERNFS_HAS_RELEASE)
>  			kernfs_release_file(kn, of);
>  	}
>  
> +	WARN_ON_ONCE(on->nr_mmapped || on->nr_to_release);
>  	mutex_unlock(mutex);
>  }
>  
> diff --git a/fs/kernfs/kernfs-internal.h b/fs/kernfs/kernfs-internal.h
> index 3ae214d02d44..fc5821effd97 100644
> --- a/fs/kernfs/kernfs-internal.h
> +++ b/fs/kernfs/kernfs-internal.h
> @@ -157,6 +157,7 @@ struct kernfs_node *kernfs_new_node(struct kernfs_node *parent,
>   */
>  extern const struct file_operations kernfs_file_fops;
>  
> +bool kernfs_should_drain_open_files(struct kernfs_node *kn);
>  void kernfs_drain_open_files(struct kernfs_node *kn);
>  
>  /*
diff mbox series

Patch

diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c
index 1cc88ba6de90..8ae44db920d4 100644
--- a/fs/kernfs/dir.c
+++ b/fs/kernfs/dir.c
@@ -489,7 +489,8 @@  static void kernfs_drain(struct kernfs_node *kn)
 		rwsem_release(&kn->dep_map, _RET_IP_);
 	}
 
-	kernfs_drain_open_files(kn);
+	if (kernfs_should_drain_open_files(kn))
+		kernfs_drain_open_files(kn);
 
 	down_write(&root->kernfs_rwsem);
 }
diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c
index 7060a2a714b8..b510589af427 100644
--- a/fs/kernfs/file.c
+++ b/fs/kernfs/file.c
@@ -23,6 +23,8 @@  struct kernfs_open_node {
 	atomic_t		event;
 	wait_queue_head_t	poll;
 	struct list_head	files; /* goes through kernfs_open_file.list */
+	unsigned int		nr_mmapped;
+	unsigned int		nr_to_release;
 };
 
 /*
@@ -527,6 +529,7 @@  static int kernfs_fop_mmap(struct file *file, struct vm_area_struct *vma)
 
 	rc = 0;
 	of->mmapped = true;
+	of_on(of)->nr_mmapped++;
 	of->vm_ops = vma->vm_ops;
 	vma->vm_ops = &kernfs_vm_ops;
 out_put:
@@ -574,6 +577,8 @@  static int kernfs_get_open_node(struct kernfs_node *kn,
 	}
 
 	list_add_tail(&of->list, &on->files);
+	if (kn->flags & KERNFS_HAS_RELEASE)
+		on->nr_to_release++;
 
 	mutex_unlock(mutex);
 	return 0;
@@ -606,8 +611,12 @@  static void kernfs_unlink_open_file(struct kernfs_node *kn,
 		return;
 	}
 
-	if (of)
+	if (of) {
+		WARN_ON_ONCE((kn->flags & KERNFS_HAS_RELEASE) && !of->released);
+		if (of->mmapped)
+			on->nr_mmapped--;
 		list_del(&of->list);
+	}
 
 	if (list_empty(&on->files)) {
 		rcu_assign_pointer(kn->attr.open, NULL);
@@ -766,6 +775,7 @@  static void kernfs_release_file(struct kernfs_node *kn,
 		 */
 		kn->attr.ops->release(of);
 		of->released = true;
+		of_on(of)->nr_to_release--;
 	}
 }
 
@@ -790,25 +800,30 @@  static int kernfs_fop_release(struct inode *inode, struct file *filp)
 	return 0;
 }
 
-void kernfs_drain_open_files(struct kernfs_node *kn)
+bool kernfs_should_drain_open_files(struct kernfs_node *kn)
 {
 	struct kernfs_open_node *on;
-	struct kernfs_open_file *of;
-	struct mutex *mutex;
-
-	if (!(kn->flags & (KERNFS_HAS_MMAP | KERNFS_HAS_RELEASE)))
-		return;
+	bool ret;
 
 	/*
-	 * lockless opportunistic check is safe below because no one is adding to
-	 * ->attr.open at this point of time. This check allows early bail out
-	 * if ->attr.open is already NULL. kernfs_unlink_open_file makes
-	 * ->attr.open NULL only while holding kernfs_open_file_mutex so below
-	 * check under kernfs_open_file_mutex_ptr(kn) will ensure bailing out if
-	 * ->attr.open became NULL while waiting for the mutex.
+	 * @kn being deactivated guarantees that @kn->attr.open can't change
+	 * beneath us making the lockless test below safe.
 	 */
-	if (!rcu_access_pointer(kn->attr.open))
-		return;
+	WARN_ON_ONCE(atomic_read(&kn->active) != KN_DEACTIVATED_BIAS);
+
+	rcu_read_lock();
+	on = rcu_dereference(kn->attr.open);
+	ret = on && (on->nr_mmapped || on->nr_to_release);
+	rcu_read_unlock();
+
+	return ret;
+}
+
+void kernfs_drain_open_files(struct kernfs_node *kn)
+{
+	struct kernfs_open_node *on;
+	struct kernfs_open_file *of;
+	struct mutex *mutex;
 
 	mutex = kernfs_open_file_mutex_lock(kn);
 	on = kernfs_deref_open_node_locked(kn);
@@ -820,13 +835,17 @@  void kernfs_drain_open_files(struct kernfs_node *kn)
 	list_for_each_entry(of, &on->files, list) {
 		struct inode *inode = file_inode(of->file);
 
-		if (kn->flags & KERNFS_HAS_MMAP)
+		if (of->mmapped) {
 			unmap_mapping_range(inode->i_mapping, 0, 0, 1);
+			of->mmapped = false;
+			on->nr_mmapped--;
+		}
 
 		if (kn->flags & KERNFS_HAS_RELEASE)
 			kernfs_release_file(kn, of);
 	}
 
+	WARN_ON_ONCE(on->nr_mmapped || on->nr_to_release);
 	mutex_unlock(mutex);
 }
 
diff --git a/fs/kernfs/kernfs-internal.h b/fs/kernfs/kernfs-internal.h
index 3ae214d02d44..fc5821effd97 100644
--- a/fs/kernfs/kernfs-internal.h
+++ b/fs/kernfs/kernfs-internal.h
@@ -157,6 +157,7 @@  struct kernfs_node *kernfs_new_node(struct kernfs_node *parent,
  */
 extern const struct file_operations kernfs_file_fops;
 
+bool kernfs_should_drain_open_files(struct kernfs_node *kn);
 void kernfs_drain_open_files(struct kernfs_node *kn);
 
 /*