diff mbox series

files: Use rcu lock to get the file structures for better performance

Message ID 20200521123835.70069-1-songmuchun@bytedance.com (mailing list archive)
State New, archived
Headers show
Series files: Use rcu lock to get the file structures for better performance | expand

Commit Message

Muchun Song May 21, 2020, 12:38 p.m. UTC
There is another safe way to get the file structure without
holding the files->file_lock. That is rcu lock, and this way
has better performance. So use the rcu lock instead of the
files->file_lock.

Signed-off-by: Muchun Song <songmuchun@bytedance.com>
---
 fs/proc/fd.c         | 31 ++++++++++++++++++++++++-------
 kernel/bpf/syscall.c | 17 +++++++++++------
 kernel/kcmp.c        | 15 ++++++++++-----
 3 files changed, 45 insertions(+), 18 deletions(-)

Comments

Matthew Wilcox May 21, 2020, 3:21 p.m. UTC | #1
On Thu, May 21, 2020 at 08:38:35PM +0800, Muchun Song wrote:
> There is another safe way to get the file structure without
> holding the files->file_lock. That is rcu lock, and this way
> has better performance. So use the rcu lock instead of the
> files->file_lock.

What makes you think this is safe?  Are you actually seeing contention
on this spinlock?
Muchun Song May 21, 2020, 4:06 p.m. UTC | #2
On Thu, May 21, 2020 at 11:21 PM Matthew Wilcox <willy@infradead.org> wrote:
>
> On Thu, May 21, 2020 at 08:38:35PM +0800, Muchun Song wrote:
> > There is another safe way to get the file structure without
> > holding the files->file_lock. That is rcu lock, and this way
> > has better performance. So use the rcu lock instead of the
> > files->file_lock.
>
> What makes you think this is safe?  Are you actually seeing contention
> on this spinlock?
>

I have read the doc which is in the Documentation/filesystems/files.txt.
If my understanding is correct, I think it is safe to use rcu lock.

Thanks.
Greg KH May 21, 2020, 4:16 p.m. UTC | #3
On Fri, May 22, 2020 at 12:06:46AM +0800, Muchun Song wrote:
> On Thu, May 21, 2020 at 11:21 PM Matthew Wilcox <willy@infradead.org> wrote:
> >
> > On Thu, May 21, 2020 at 08:38:35PM +0800, Muchun Song wrote:
> > > There is another safe way to get the file structure without
> > > holding the files->file_lock. That is rcu lock, and this way
> > > has better performance. So use the rcu lock instead of the
> > > files->file_lock.
> >
> > What makes you think this is safe?  Are you actually seeing contention
> > on this spinlock?
> >
> 
> I have read the doc which is in the Documentation/filesystems/files.txt.
> If my understanding is correct, I think it is safe to use rcu lock.

Did you test this and prove that it is safe and "faster"?  If so, you
always have to show that in your changelog.  Please fix it up and
resend.

thanks,

greg k-h
Matthew Wilcox May 21, 2020, 4:47 p.m. UTC | #4
On Thu, May 21, 2020 at 08:38:35PM +0800, Muchun Song wrote:
> +++ b/fs/proc/fd.c
> @@ -34,19 +34,27 @@ static int seq_show(struct seq_file *m, void *v)
>  	if (files) {
>  		unsigned int fd = proc_fd(m->private);
>  
> -		spin_lock(&files->file_lock);
> +		rcu_read_lock();
> +again:
>  		file = fcheck_files(files, fd);
>  		if (file) {
> -			struct fdtable *fdt = files_fdtable(files);
> +			struct fdtable *fdt;
> +
> +			if (!get_file_rcu(file)) {
> +				/*
> +				 * we loop to catch the new file (or NULL
> +				 * pointer).
> +				 */
> +				goto again;
> +			}
>  
> +			fdt = files_fdtable(files);

This is unusual, and may not be safe.

fcheck_files() loads files->fdt.  Then it loads file from fdt->fd[].
Now you're loading files->fdt again here, and it could have been changed
by another thread expanding the fd table.

You have to write a changelog which convinces me you've thought about
this race and that it's safe.  Because I don't think you even realise
it's a possibility at this point.

> @@ -160,14 +168,23 @@ static int proc_fd_link(struct dentry *dentry, struct path *path)
>  		unsigned int fd = proc_fd(d_inode(dentry));
>  		struct file *fd_file;
>  
> -		spin_lock(&files->file_lock);
> +		rcu_read_lock();
> +again:
>  		fd_file = fcheck_files(files, fd);
>  		if (fd_file) {
> +			if (!get_file_rcu(fd_file)) {
> +				/*
> +				 * we loop to catch the new file
> +				 * (or NULL pointer).
> +				 */
> +				goto again;
> +			}
>  			*path = fd_file->f_path;
>  			path_get(&fd_file->f_path);
> +			fput(fd_file);
>  			ret = 0;
>  		}
> -		spin_unlock(&files->file_lock);
> +		rcu_read_unlock();

Why is it an improvement to increment/decrement the refcount on the
struct file here, rather than take/release the spinlock?
Muchun Song May 22, 2020, 7:52 a.m. UTC | #5
On Fri, May 22, 2020 at 12:47 AM Matthew Wilcox <willy@infradead.org> wrote:
>
> On Thu, May 21, 2020 at 08:38:35PM +0800, Muchun Song wrote:
> > +++ b/fs/proc/fd.c
> > @@ -34,19 +34,27 @@ static int seq_show(struct seq_file *m, void *v)
> >       if (files) {
> >               unsigned int fd = proc_fd(m->private);
> >
> > -             spin_lock(&files->file_lock);
> > +             rcu_read_lock();
> > +again:
> >               file = fcheck_files(files, fd);
> >               if (file) {
> > -                     struct fdtable *fdt = files_fdtable(files);
> > +                     struct fdtable *fdt;
> > +
> > +                     if (!get_file_rcu(file)) {
> > +                             /*
> > +                              * we loop to catch the new file (or NULL
> > +                              * pointer).
> > +                              */
> > +                             goto again;
> > +                     }
> >
> > +                     fdt = files_fdtable(files);
>
> This is unusual, and may not be safe.
>
> fcheck_files() loads files->fdt.  Then it loads file from fdt->fd[].
> Now you're loading files->fdt again here, and it could have been changed
> by another thread expanding the fd table.
>
> You have to write a changelog which convinces me you've thought about
> this race and that it's safe.  Because I don't think you even realise
> it's a possibility at this point.

Thanks for your review, it is a problem. I can fix it.

>
> > @@ -160,14 +168,23 @@ static int proc_fd_link(struct dentry *dentry, struct path *path)
> >               unsigned int fd = proc_fd(d_inode(dentry));
> >               struct file *fd_file;
> >
> > -             spin_lock(&files->file_lock);
> > +             rcu_read_lock();
> > +again:
> >               fd_file = fcheck_files(files, fd);
> >               if (fd_file) {
> > +                     if (!get_file_rcu(fd_file)) {
> > +                             /*
> > +                              * we loop to catch the new file
> > +                              * (or NULL pointer).
> > +                              */
> > +                             goto again;
> > +                     }
> >                       *path = fd_file->f_path;
> >                       path_get(&fd_file->f_path);
> > +                     fput(fd_file);
> >                       ret = 0;
> >               }
> > -             spin_unlock(&files->file_lock);
> > +             rcu_read_unlock();
>
> Why is it an improvement to increment/decrement the refcount on the
> struct file here, rather than take/release the spinlock?
>

lock-free vs spinlock.

Do you think spinlock would be better than the lock-free method?
Actually I prefer the rcu lock.
Matthew Wilcox May 22, 2020, 11:43 a.m. UTC | #6
On Fri, May 22, 2020 at 03:52:39PM +0800, Muchun Song wrote:
> On Fri, May 22, 2020 at 12:47 AM Matthew Wilcox <willy@infradead.org> wrote:
> > > @@ -160,14 +168,23 @@ static int proc_fd_link(struct dentry *dentry, struct path *path)
> > >               unsigned int fd = proc_fd(d_inode(dentry));
> > >               struct file *fd_file;
> > >
> > > -             spin_lock(&files->file_lock);
> > > +             rcu_read_lock();
> > > +again:
> > >               fd_file = fcheck_files(files, fd);
> > >               if (fd_file) {
> > > +                     if (!get_file_rcu(fd_file)) {
> > > +                             /*
> > > +                              * we loop to catch the new file
> > > +                              * (or NULL pointer).
> > > +                              */
> > > +                             goto again;
> > > +                     }
> > >                       *path = fd_file->f_path;
> > >                       path_get(&fd_file->f_path);
> > > +                     fput(fd_file);
> > >                       ret = 0;
> > >               }
> > > -             spin_unlock(&files->file_lock);
> > > +             rcu_read_unlock();
> >
> > Why is it an improvement to increment/decrement the refcount on the
> > struct file here, rather than take/release the spinlock?
> >
> 
> lock-free vs spinlock.

bananas vs oranges.

How do you think refcounts work?  How do you think spinlocks work?

> Do you think spinlock would be better than the lock-free method?
> Actually I prefer the rcu lock.

Why?  You don't seem to understand the tradeoffs.
diff mbox series

Patch

diff --git a/fs/proc/fd.c b/fs/proc/fd.c
index 81882a13212d3..5d5b0f091d32a 100644
--- a/fs/proc/fd.c
+++ b/fs/proc/fd.c
@@ -34,19 +34,27 @@  static int seq_show(struct seq_file *m, void *v)
 	if (files) {
 		unsigned int fd = proc_fd(m->private);
 
-		spin_lock(&files->file_lock);
+		rcu_read_lock();
+again:
 		file = fcheck_files(files, fd);
 		if (file) {
-			struct fdtable *fdt = files_fdtable(files);
+			struct fdtable *fdt;
+
+			if (!get_file_rcu(file)) {
+				/*
+				 * we loop to catch the new file (or NULL
+				 * pointer).
+				 */
+				goto again;
+			}
 
+			fdt = files_fdtable(files);
 			f_flags = file->f_flags;
 			if (close_on_exec(fd, fdt))
 				f_flags |= O_CLOEXEC;
-
-			get_file(file);
 			ret = 0;
 		}
-		spin_unlock(&files->file_lock);
+		rcu_read_unlock();
 		put_files_struct(files);
 	}
 
@@ -160,14 +168,23 @@  static int proc_fd_link(struct dentry *dentry, struct path *path)
 		unsigned int fd = proc_fd(d_inode(dentry));
 		struct file *fd_file;
 
-		spin_lock(&files->file_lock);
+		rcu_read_lock();
+again:
 		fd_file = fcheck_files(files, fd);
 		if (fd_file) {
+			if (!get_file_rcu(fd_file)) {
+				/*
+				 * we loop to catch the new file
+				 * (or NULL pointer).
+				 */
+				goto again;
+			}
 			*path = fd_file->f_path;
 			path_get(&fd_file->f_path);
+			fput(fd_file);
 			ret = 0;
 		}
-		spin_unlock(&files->file_lock);
+		rcu_read_unlock();
 		put_files_struct(files);
 	}
 
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 8608d6e1b0e0e..441c91378a1fc 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -3451,14 +3451,19 @@  static int bpf_task_fd_query(const union bpf_attr *attr,
 	if (!files)
 		return -ENOENT;
 
-	err = 0;
-	spin_lock(&files->file_lock);
+	rcu_read_lock();
+again:
 	file = fcheck_files(files, fd);
-	if (!file)
+	if (file) {
+		if (!get_file_rcu(file)) {
+			/* we loop to catch the new file (or NULL pointer) */
+			goto again;
+		}
+		err = 0;
+	} else {
 		err = -EBADF;
-	else
-		get_file(file);
-	spin_unlock(&files->file_lock);
+	}
+	rcu_read_unlock();
 	put_files_struct(files);
 
 	if (err)
diff --git a/kernel/kcmp.c b/kernel/kcmp.c
index b3ff9288c6cc9..3b4f2a54186f2 100644
--- a/kernel/kcmp.c
+++ b/kernel/kcmp.c
@@ -120,13 +120,18 @@  static int kcmp_epoll_target(struct task_struct *task1,
 	if (!files)
 		return -EBADF;
 
-	spin_lock(&files->file_lock);
+	rcu_read_lock();
+again:
 	filp_epoll = fcheck_files(files, slot.efd);
-	if (filp_epoll)
-		get_file(filp_epoll);
-	else
+	if (filp_epoll) {
+		if (!get_file_rcu(filp_epoll)) {
+			/* we loop to catch the new file (or NULL pointer) */
+			goto again;
+		}
+	} else {
 		filp_tgt = ERR_PTR(-EBADF);
-	spin_unlock(&files->file_lock);
+	}
+	rcu_read_unlock();
 	put_files_struct(files);
 
 	if (filp_epoll) {