Message ID | 20240703143311.2184454-2-yu.ma@intel.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | fs/file.c: optimize the critical section of file_lock in | expand |
On Wed, Jul 03, 2024 at 10:33:09AM GMT, Yu Ma wrote: > alloc_fd() has a sanity check inside to make sure the struct file mapping to the > allocated fd is NULL. Remove this sanity check since it can be assured by > exisitng zero initilization and NULL set when recycling fd. Meanwhile, add > likely/unlikely and expand_file() call avoidance to reduce the work under > file_lock. > > Reviewed-by: Tim Chen <tim.c.chen@linux.intel.com> > Signed-off-by: Yu Ma <yu.ma@intel.com> > --- > fs/file.c | 38 ++++++++++++++++---------------------- > 1 file changed, 16 insertions(+), 22 deletions(-) > > diff --git a/fs/file.c b/fs/file.c > index a3b72aa64f11..5178b246e54b 100644 > --- a/fs/file.c > +++ b/fs/file.c > @@ -515,28 +515,29 @@ static int alloc_fd(unsigned start, unsigned end, unsigned flags) > if (fd < files->next_fd) > fd = files->next_fd; > > - if (fd < fdt->max_fds) > + if (likely(fd < fdt->max_fds)) > fd = find_next_fd(fdt, fd); > > + error = -EMFILE; > + if (unlikely(fd >= fdt->max_fds)) { > + error = expand_files(files, fd); > + if (error < 0) > + goto out; > + /* > + * If we needed to expand the fs array we > + * might have blocked - try again. > + */ > + if (error) > + goto repeat; > + } So this ends up removing the expand_files() above the fd >= end check which means that you can end up expanding the files_struct even though the request fd is past the provided end. That seems odd. What's the reason for that reordering? > + > /* > * N.B. For clone tasks sharing a files structure, this test > * will limit the total number of files that can be opened. > */ > - error = -EMFILE; > - if (fd >= end) > - goto out; > - > - error = expand_files(files, fd); > - if (error < 0) > + if (unlikely(fd >= end)) > goto out; > > - /* > - * If we needed to expand the fs array we > - * might have blocked - try again. > - */ > - if (error) > - goto repeat; > - > if (start <= files->next_fd) > files->next_fd = fd + 1; > > @@ -546,13 +547,6 @@ static int alloc_fd(unsigned start, unsigned end, unsigned flags) > else > __clear_close_on_exec(fd, fdt); > error = fd; > -#if 1 > - /* Sanity check */ > - if (rcu_access_pointer(fdt->fd[fd]) != NULL) { > - printk(KERN_WARNING "alloc_fd: slot %d not NULL!\n", fd); > - rcu_assign_pointer(fdt->fd[fd], NULL); > - } > -#endif > > out: > spin_unlock(&files->file_lock); > @@ -618,7 +612,7 @@ void fd_install(unsigned int fd, struct file *file) > rcu_read_unlock_sched(); > spin_lock(&files->file_lock); > fdt = files_fdtable(files); > - BUG_ON(fdt->fd[fd] != NULL); > + WARN_ON(fdt->fd[fd] != NULL); > rcu_assign_pointer(fdt->fd[fd], file); > spin_unlock(&files->file_lock); > return; > -- > 2.43.0 >
On 7/3/2024 10:34 PM, Christian Brauner wrote: > On Wed, Jul 03, 2024 at 10:33:09AM GMT, Yu Ma wrote: >> alloc_fd() has a sanity check inside to make sure the struct file mapping to the >> allocated fd is NULL. Remove this sanity check since it can be assured by >> exisitng zero initilization and NULL set when recycling fd. Meanwhile, add >> likely/unlikely and expand_file() call avoidance to reduce the work under >> file_lock. >> >> Reviewed-by: Tim Chen <tim.c.chen@linux.intel.com> >> Signed-off-by: Yu Ma <yu.ma@intel.com> >> --- >> fs/file.c | 38 ++++++++++++++++---------------------- >> 1 file changed, 16 insertions(+), 22 deletions(-) >> >> diff --git a/fs/file.c b/fs/file.c >> index a3b72aa64f11..5178b246e54b 100644 >> --- a/fs/file.c >> +++ b/fs/file.c >> @@ -515,28 +515,29 @@ static int alloc_fd(unsigned start, unsigned end, unsigned flags) >> if (fd < files->next_fd) >> fd = files->next_fd; >> >> - if (fd < fdt->max_fds) >> + if (likely(fd < fdt->max_fds)) >> fd = find_next_fd(fdt, fd); >> >> + error = -EMFILE; >> + if (unlikely(fd >= fdt->max_fds)) { >> + error = expand_files(files, fd); >> + if (error < 0) >> + goto out; >> + /* >> + * If we needed to expand the fs array we >> + * might have blocked - try again. >> + */ >> + if (error) >> + goto repeat; >> + } > So this ends up removing the expand_files() above the fd >= end check > which means that you can end up expanding the files_struct even though > the request fd is past the provided end. That seems odd. What's the > reason for that reordering? Yes, you are right, thanks Christian. This incorrect reordering here is due to historical versions with fast path inside. I'll update the order back. >> + >> /* >> * N.B. For clone tasks sharing a files structure, this test >> * will limit the total number of files that can be opened. >> */ >> - error = -EMFILE; >> - if (fd >= end) >> - goto out; >> - >> - error = expand_files(files, fd); >> - if (error < 0) >> + if (unlikely(fd >= end)) >> goto out; >> >> - /* >> - * If we needed to expand the fs array we >> - * might have blocked - try again. >> - */ >> - if (error) >> - goto repeat; >> - >> if (start <= files->next_fd) >> files->next_fd = fd + 1; >> >> @@ -546,13 +547,6 @@ static int alloc_fd(unsigned start, unsigned end, unsigned flags) >> else >> __clear_close_on_exec(fd, fdt); >> error = fd; >> -#if 1 >> - /* Sanity check */ >> - if (rcu_access_pointer(fdt->fd[fd]) != NULL) { >> - printk(KERN_WARNING "alloc_fd: slot %d not NULL!\n", fd); >> - rcu_assign_pointer(fdt->fd[fd], NULL); >> - } >> -#endif >> >> out: >> spin_unlock(&files->file_lock); >> @@ -618,7 +612,7 @@ void fd_install(unsigned int fd, struct file *file) >> rcu_read_unlock_sched(); >> spin_lock(&files->file_lock); >> fdt = files_fdtable(files); >> - BUG_ON(fdt->fd[fd] != NULL); >> + WARN_ON(fdt->fd[fd] != NULL); >> rcu_assign_pointer(fdt->fd[fd], file); >> spin_unlock(&files->file_lock); >> return; >> -- >> 2.43.0 >>
On Wed 03-07-24 16:34:49, Christian Brauner wrote: > On Wed, Jul 03, 2024 at 10:33:09AM GMT, Yu Ma wrote: > > alloc_fd() has a sanity check inside to make sure the struct file mapping to the > > allocated fd is NULL. Remove this sanity check since it can be assured by > > exisitng zero initilization and NULL set when recycling fd. Meanwhile, add > > likely/unlikely and expand_file() call avoidance to reduce the work under > > file_lock. > > > > Reviewed-by: Tim Chen <tim.c.chen@linux.intel.com> > > Signed-off-by: Yu Ma <yu.ma@intel.com> > > --- > > fs/file.c | 38 ++++++++++++++++---------------------- > > 1 file changed, 16 insertions(+), 22 deletions(-) > > > > diff --git a/fs/file.c b/fs/file.c > > index a3b72aa64f11..5178b246e54b 100644 > > --- a/fs/file.c > > +++ b/fs/file.c > > @@ -515,28 +515,29 @@ static int alloc_fd(unsigned start, unsigned end, unsigned flags) > > if (fd < files->next_fd) > > fd = files->next_fd; > > > > - if (fd < fdt->max_fds) > > + if (likely(fd < fdt->max_fds)) > > fd = find_next_fd(fdt, fd); > > > > + error = -EMFILE; > > + if (unlikely(fd >= fdt->max_fds)) { > > + error = expand_files(files, fd); > > + if (error < 0) > > + goto out; > > + /* > > + * If we needed to expand the fs array we > > + * might have blocked - try again. > > + */ > > + if (error) > > + goto repeat; > > + } > > So this ends up removing the expand_files() above the fd >= end check > which means that you can end up expanding the files_struct even though > the request fd is past the provided end. That seems odd. What's the > reason for that reordering? Yeah, not only that but also: > > /* > > * N.B. For clone tasks sharing a files structure, this test > > * will limit the total number of files that can be opened. > > */ > > - error = -EMFILE; > > - if (fd >= end) > > - goto out; > > - > > - error = expand_files(files, fd); > > - if (error < 0) > > + if (unlikely(fd >= end)) > > goto out; We could then exit here with error set to 0 instead of -EMFILE. So this needs a bit of work. But otherwise the patch looks good to me. Honza
On 7/4/2024 6:11 PM, Jan Kara wrote: > On Wed 03-07-24 16:34:49, Christian Brauner wrote: >> On Wed, Jul 03, 2024 at 10:33:09AM GMT, Yu Ma wrote: >>> alloc_fd() has a sanity check inside to make sure the struct file mapping to the >>> allocated fd is NULL. Remove this sanity check since it can be assured by >>> exisitng zero initilization and NULL set when recycling fd. Meanwhile, add >>> likely/unlikely and expand_file() call avoidance to reduce the work under >>> file_lock. >>> >>> Reviewed-by: Tim Chen <tim.c.chen@linux.intel.com> >>> Signed-off-by: Yu Ma <yu.ma@intel.com> >>> --- >>> fs/file.c | 38 ++++++++++++++++---------------------- >>> 1 file changed, 16 insertions(+), 22 deletions(-) >>> >>> diff --git a/fs/file.c b/fs/file.c >>> index a3b72aa64f11..5178b246e54b 100644 >>> --- a/fs/file.c >>> +++ b/fs/file.c >>> @@ -515,28 +515,29 @@ static int alloc_fd(unsigned start, unsigned end, unsigned flags) >>> if (fd < files->next_fd) >>> fd = files->next_fd; >>> >>> - if (fd < fdt->max_fds) >>> + if (likely(fd < fdt->max_fds)) >>> fd = find_next_fd(fdt, fd); >>> >>> + error = -EMFILE; >>> + if (unlikely(fd >= fdt->max_fds)) { >>> + error = expand_files(files, fd); >>> + if (error < 0) >>> + goto out; >>> + /* >>> + * If we needed to expand the fs array we >>> + * might have blocked - try again. >>> + */ >>> + if (error) >>> + goto repeat; >>> + } >> So this ends up removing the expand_files() above the fd >= end check >> which means that you can end up expanding the files_struct even though >> the request fd is past the provided end. That seems odd. What's the >> reason for that reordering? > Yeah, not only that but also: > >>> /* >>> * N.B. For clone tasks sharing a files structure, this test >>> * will limit the total number of files that can be opened. >>> */ >>> - error = -EMFILE; >>> - if (fd >= end) >>> - goto out; >>> - >>> - error = expand_files(files, fd); >>> - if (error < 0) >>> + if (unlikely(fd >= end)) >>> goto out; > We could then exit here with error set to 0 instead of -EMFILE. So this > needs a bit of work. But otherwise the patch looks good to me. > > Honza Do you mean that we return 0 here is fd >=end, I'm afraid that might broke the original design of this function. And all the callers of it are using ret < 0 for error handling, if ret=0, that should mean the fd allocated is 0 ...
On Thu 04-07-24 22:45:32, Ma, Yu wrote: > > On 7/4/2024 6:11 PM, Jan Kara wrote: > > On Wed 03-07-24 16:34:49, Christian Brauner wrote: > > > On Wed, Jul 03, 2024 at 10:33:09AM GMT, Yu Ma wrote: > > > > alloc_fd() has a sanity check inside to make sure the struct file mapping to the > > > > allocated fd is NULL. Remove this sanity check since it can be assured by > > > > exisitng zero initilization and NULL set when recycling fd. Meanwhile, add > > > > likely/unlikely and expand_file() call avoidance to reduce the work under > > > > file_lock. > > > > > > > > Reviewed-by: Tim Chen <tim.c.chen@linux.intel.com> > > > > Signed-off-by: Yu Ma <yu.ma@intel.com> > > > > --- > > > > fs/file.c | 38 ++++++++++++++++---------------------- > > > > 1 file changed, 16 insertions(+), 22 deletions(-) > > > > > > > > diff --git a/fs/file.c b/fs/file.c > > > > index a3b72aa64f11..5178b246e54b 100644 > > > > --- a/fs/file.c > > > > +++ b/fs/file.c > > > > @@ -515,28 +515,29 @@ static int alloc_fd(unsigned start, unsigned end, unsigned flags) > > > > if (fd < files->next_fd) > > > > fd = files->next_fd; > > > > - if (fd < fdt->max_fds) > > > > + if (likely(fd < fdt->max_fds)) > > > > fd = find_next_fd(fdt, fd); > > > > + error = -EMFILE; > > > > + if (unlikely(fd >= fdt->max_fds)) { > > > > + error = expand_files(files, fd); > > > > + if (error < 0) > > > > + goto out; > > > > + /* > > > > + * If we needed to expand the fs array we > > > > + * might have blocked - try again. > > > > + */ > > > > + if (error) > > > > + goto repeat; > > > > + } > > > So this ends up removing the expand_files() above the fd >= end check > > > which means that you can end up expanding the files_struct even though > > > the request fd is past the provided end. That seems odd. What's the > > > reason for that reordering? > > Yeah, not only that but also: > > > > > > /* > > > > * N.B. For clone tasks sharing a files structure, this test > > > > * will limit the total number of files that can be opened. > > > > */ > > > > - error = -EMFILE; > > > > - if (fd >= end) > > > > - goto out; > > > > - > > > > - error = expand_files(files, fd); > > > > - if (error < 0) > > > > + if (unlikely(fd >= end)) > > > > goto out; > > We could then exit here with error set to 0 instead of -EMFILE. So this > > needs a bit of work. But otherwise the patch looks good to me. > > > > Honza > > Do you mean that we return 0 here is fd >=end, I'm afraid that might broke > the original design of this function. And all the callers of it are using > ret < 0 for error handling, if ret=0, that should mean the fd allocated is 0 > ... What I meant is that after your changes alloc_fd() could return 0 in fd >= end case. It could happen if we went through expand_files() which then returned 0. Anyway, please just fix the ordering of checks and we should be fine. Honza
diff --git a/fs/file.c b/fs/file.c index a3b72aa64f11..5178b246e54b 100644 --- a/fs/file.c +++ b/fs/file.c @@ -515,28 +515,29 @@ static int alloc_fd(unsigned start, unsigned end, unsigned flags) if (fd < files->next_fd) fd = files->next_fd; - if (fd < fdt->max_fds) + if (likely(fd < fdt->max_fds)) fd = find_next_fd(fdt, fd); + error = -EMFILE; + if (unlikely(fd >= fdt->max_fds)) { + error = expand_files(files, fd); + if (error < 0) + goto out; + /* + * If we needed to expand the fs array we + * might have blocked - try again. + */ + if (error) + goto repeat; + } + /* * N.B. For clone tasks sharing a files structure, this test * will limit the total number of files that can be opened. */ - error = -EMFILE; - if (fd >= end) - goto out; - - error = expand_files(files, fd); - if (error < 0) + if (unlikely(fd >= end)) goto out; - /* - * If we needed to expand the fs array we - * might have blocked - try again. - */ - if (error) - goto repeat; - if (start <= files->next_fd) files->next_fd = fd + 1; @@ -546,13 +547,6 @@ static int alloc_fd(unsigned start, unsigned end, unsigned flags) else __clear_close_on_exec(fd, fdt); error = fd; -#if 1 - /* Sanity check */ - if (rcu_access_pointer(fdt->fd[fd]) != NULL) { - printk(KERN_WARNING "alloc_fd: slot %d not NULL!\n", fd); - rcu_assign_pointer(fdt->fd[fd], NULL); - } -#endif out: spin_unlock(&files->file_lock); @@ -618,7 +612,7 @@ void fd_install(unsigned int fd, struct file *file) rcu_read_unlock_sched(); spin_lock(&files->file_lock); fdt = files_fdtable(files); - BUG_ON(fdt->fd[fd] != NULL); + WARN_ON(fdt->fd[fd] != NULL); rcu_assign_pointer(fdt->fd[fd], file); spin_unlock(&files->file_lock); return;