diff mbox series

[1/2] fs: make do_mkdirat() take struct filename

Message ID 20201116044529.1028783-2-dkadashev@gmail.com (mailing list archive)
State New, archived
Headers show
Series io_uring: add mkdirat support | expand

Commit Message

Dmitry Kadashev Nov. 16, 2020, 4:45 a.m. UTC
Pass in the struct filename pointers instead of the user string, and
update the three callers to do the same. This is heavily based on
commit dbea8d345177 ("fs: make do_renameat2() take struct filename").

This behaves like do_unlinkat() and do_renameat2().

Signed-off-by: Dmitry Kadashev <dkadashev@gmail.com>
---
 fs/internal.h |  1 +
 fs/namei.c    | 20 ++++++++++++++------
 2 files changed, 15 insertions(+), 6 deletions(-)

Comments

Jens Axboe Jan. 25, 2021, 4:38 a.m. UTC | #1
On 11/15/20 9:45 PM, Dmitry Kadashev wrote:
> Pass in the struct filename pointers instead of the user string, and
> update the three callers to do the same. This is heavily based on
> commit dbea8d345177 ("fs: make do_renameat2() take struct filename").
> 
> This behaves like do_unlinkat() and do_renameat2().

Al, are you OK with this patch? Leaving it quoted, though you should
have the original too.

> 
> Signed-off-by: Dmitry Kadashev <dkadashev@gmail.com>
> ---
>  fs/internal.h |  1 +
>  fs/namei.c    | 20 ++++++++++++++------
>  2 files changed, 15 insertions(+), 6 deletions(-)
> 
> diff --git a/fs/internal.h b/fs/internal.h
> index 6fd14ea213c3..23b8b427dbd2 100644
> --- a/fs/internal.h
> +++ b/fs/internal.h
> @@ -80,6 +80,7 @@ long do_unlinkat(int dfd, struct filename *name);
>  int may_linkat(struct path *link);
>  int do_renameat2(int olddfd, struct filename *oldname, int newdfd,
>  		 struct filename *newname, unsigned int flags);
> +long do_mkdirat(int dfd, struct filename *name, umode_t mode);
>  
>  /*
>   * namespace.c
> diff --git a/fs/namei.c b/fs/namei.c
> index 03d0e11e4f36..9d26a51f3f54 100644
> --- a/fs/namei.c
> +++ b/fs/namei.c
> @@ -3654,17 +3654,23 @@ int vfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
>  }
>  EXPORT_SYMBOL(vfs_mkdir);
>  
> -static long do_mkdirat(int dfd, const char __user *pathname, umode_t mode)
> +long do_mkdirat(int dfd, struct filename *name, umode_t mode)
>  {
>  	struct dentry *dentry;
>  	struct path path;
>  	int error;
>  	unsigned int lookup_flags = LOOKUP_DIRECTORY;
>  
> +	if (IS_ERR(name))
> +		return PTR_ERR(name);
> +
>  retry:
> -	dentry = user_path_create(dfd, pathname, &path, lookup_flags);
> -	if (IS_ERR(dentry))
> -		return PTR_ERR(dentry);
> +	name->refcnt++; /* filename_create() drops our ref */
> +	dentry = filename_create(dfd, name, &path, lookup_flags);
> +	if (IS_ERR(dentry)) {
> +		error = PTR_ERR(dentry);
> +		goto out;
> +	}
>  
>  	if (!IS_POSIXACL(path.dentry->d_inode))
>  		mode &= ~current_umask();
> @@ -3676,17 +3682,19 @@ static long do_mkdirat(int dfd, const char __user *pathname, umode_t mode)
>  		lookup_flags |= LOOKUP_REVAL;
>  		goto retry;
>  	}
> +out:
> +	putname(name);
>  	return error;
>  }
>  
>  SYSCALL_DEFINE3(mkdirat, int, dfd, const char __user *, pathname, umode_t, mode)
>  {
> -	return do_mkdirat(dfd, pathname, mode);
> +	return do_mkdirat(dfd, getname(pathname), mode);
>  }
>  
>  SYSCALL_DEFINE2(mkdir, const char __user *, pathname, umode_t, mode)
>  {
> -	return do_mkdirat(AT_FDCWD, pathname, mode);
> +	return do_mkdirat(AT_FDCWD, getname(pathname), mode);
>  }
>  
>  int vfs_rmdir(struct inode *dir, struct dentry *dentry)
>
Al Viro Jan. 26, 2021, 10:55 p.m. UTC | #2
On Sun, Jan 24, 2021 at 09:38:19PM -0700, Jens Axboe wrote:
> On 11/15/20 9:45 PM, Dmitry Kadashev wrote:
> > Pass in the struct filename pointers instead of the user string, and
> > update the three callers to do the same. This is heavily based on
> > commit dbea8d345177 ("fs: make do_renameat2() take struct filename").
> > 
> > This behaves like do_unlinkat() and do_renameat2().
> 
> Al, are you OK with this patch? Leaving it quoted, though you should
> have the original too.

> > -static long do_mkdirat(int dfd, const char __user *pathname, umode_t mode)
> > +long do_mkdirat(int dfd, struct filename *name, umode_t mode)
> >  {
> >  	struct dentry *dentry;
> >  	struct path path;
> >  	int error;
> >  	unsigned int lookup_flags = LOOKUP_DIRECTORY;
> >  
> > +	if (IS_ERR(name))
> > +		return PTR_ERR(name);
> > +
> >  retry:
> > -	dentry = user_path_create(dfd, pathname, &path, lookup_flags);
> > -	if (IS_ERR(dentry))
> > -		return PTR_ERR(dentry);
> > +	name->refcnt++; /* filename_create() drops our ref */
> > +	dentry = filename_create(dfd, name, &path, lookup_flags);
> > +	if (IS_ERR(dentry)) {
> > +		error = PTR_ERR(dentry);
> > +		goto out;
> > +	}

No.  This is going to be a source of confusion from hell.  If anything,
you want a variant of filename_create() that does not drop name on
success.  With filename_create() itself being an inlined wrapper
for it.
Dmitry Kadashev Feb. 1, 2021, 11:09 a.m. UTC | #3
On Wed, Jan 27, 2021 at 5:55 AM Al Viro <viro@zeniv.linux.org.uk> wrote:
>
> On Sun, Jan 24, 2021 at 09:38:19PM -0700, Jens Axboe wrote:
> > On 11/15/20 9:45 PM, Dmitry Kadashev wrote:
> > > Pass in the struct filename pointers instead of the user string, and
> > > update the three callers to do the same. This is heavily based on
> > > commit dbea8d345177 ("fs: make do_renameat2() take struct filename").
> > >
> > > This behaves like do_unlinkat() and do_renameat2().
> >
> > Al, are you OK with this patch? Leaving it quoted, though you should
> > have the original too.
>
> > > -static long do_mkdirat(int dfd, const char __user *pathname, umode_t mode)
> > > +long do_mkdirat(int dfd, struct filename *name, umode_t mode)
> > >  {
> > >     struct dentry *dentry;
> > >     struct path path;
> > >     int error;
> > >     unsigned int lookup_flags = LOOKUP_DIRECTORY;
> > >
> > > +   if (IS_ERR(name))
> > > +           return PTR_ERR(name);
> > > +
> > >  retry:
> > > -   dentry = user_path_create(dfd, pathname, &path, lookup_flags);
> > > -   if (IS_ERR(dentry))
> > > -           return PTR_ERR(dentry);
> > > +   name->refcnt++; /* filename_create() drops our ref */
> > > +   dentry = filename_create(dfd, name, &path, lookup_flags);
> > > +   if (IS_ERR(dentry)) {
> > > +           error = PTR_ERR(dentry);
> > > +           goto out;
> > > +   }
>
> No.  This is going to be a source of confusion from hell.  If anything,
> you want a variant of filename_create() that does not drop name on
> success.  With filename_create() itself being an inlined wrapper
> for it.

Hi Al,

I think I need more guidance here. First of all, I've based that code on
commit 7cdfa44227b0 ("vfs: Fix refcounting of filenames in fs_parser"), which
does exactly the same refcount bump in fs_parser.c for filename_lookup().  I'm
not saying it's a good excuse to introduce more code like that if that's a bad
code though.

What I _am_ saying is we probably want to make the approaches consistent (at
least eventually), which means we'd need the same "don't drop the name" variant
of filename_lookup? And given the fact filename_parentat (used from
filename_create) drops the name on error it looks like we'd need another copy of
it too? Do you think it's really worth it or maybe all of these functions will
make things more confusing? (from the looks of it right now the convention is
that the `struct filename` ownership is always transferred when it is passed as
an arg)

Also, do you have a good name for such functions that do not drop the name?

And, just for my education, can you explain why the reference counting for
struct filename exists if it's considered a bad practice to increase the
reference counter (assuming the cleanup code is correct)?

Thanks.
Al Viro Feb. 1, 2021, 3 p.m. UTC | #4
On Mon, Feb 01, 2021 at 06:09:01PM +0700, Dmitry Kadashev wrote:

> Hi Al,
> 
> I think I need more guidance here. First of all, I've based that code on
> commit 7cdfa44227b0 ("vfs: Fix refcounting of filenames in fs_parser"), which
> does exactly the same refcount bump in fs_parser.c for filename_lookup().  I'm
> not saying it's a good excuse to introduce more code like that if that's a bad
> code though.

It is a bad code.  If you look at that function, you'll see that the entire
mess around put_f is rather hard to follow and reason about.  That's a function
with no users, and I'm not sure we want to keep it long-term.

> What I _am_ saying is we probably want to make the approaches consistent (at
> least eventually), which means we'd need the same "don't drop the name" variant
> of filename_lookup?

"don't drop the name on success", similar to what filename_parentat() does.

> And given the fact filename_parentat (used from
> filename_create) drops the name on error it looks like we'd need another copy of
> it too?

No need.

> Do you think it's really worth it or maybe all of these functions will
> make things more confusing? (from the looks of it right now the convention is
> that the `struct filename` ownership is always transferred when it is passed as
> an arg)
> 
> Also, do you have a good name for such functions that do not drop the name?
> 
> And, just for my education, can you explain why the reference counting for
> struct filename exists if it's considered a bad practice to increase the
> reference counter (assuming the cleanup code is correct)?

The last one is the easiest to answer - we want to keep the imported strings
around for audit.  It's not so much a proper refcounting as it is "we might
want freeing delayed" implemented as refcount.

As for do_mkdirat(), you probably want semantics similar to do_unlinkat(), i.e.
have it consume the argument passed to it.  The main complication comes
from ESTALE retries; want -ESTALE from ->mkdir() itself to trigger "redo
filename_parentat() with LOOKUP_REVAL, then try the rest one more time".
For which you need to keep filename around.  OK, so you want a variant of
filename_create() that would _not_ consume the filename on success (i.e.
act as filename_parentat() itself does).  Which is trivial to implement -
just rename filename_create() to __filename_create() and remove one of
two putname() in there, leaving just the one in failure exits.  Then
filename_create() itself becomes simply

static inline struct dentry *filename_create(int dfd, struct filename *name,
                                struct path *path, unsigned int lookup_flags)
{
	struct dentry *res = __filename_create(dfd, name, path, lookup_flags);
	if (!IS_ERR(res))
		putname(name);
	return res;
}

and in your do_mkdirat() replacement use
	dentry = __filename_create(dfd, filename, &path, lookup_flags);
instead of
        dentry = user_path_create(dfd, pathname, &path, lookup_flags);
and add
	putname(filename);
in the very end.  All it takes...
Al Viro Feb. 1, 2021, 3:29 p.m. UTC | #5
On Mon, Feb 01, 2021 at 03:00:42PM +0000, Al Viro wrote:

> The last one is the easiest to answer - we want to keep the imported strings
> around for audit.  It's not so much a proper refcounting as it is "we might
> want freeing delayed" implemented as refcount.

BTW, regarding io_uring + audit interactions - just how is that supposed to
work if you offload any work that might lead to audit records (on permission
checks, etc.) to helper threads?
Dmitry Kadashev Feb. 2, 2021, 4:39 a.m. UTC | #6
On Mon, Feb 1, 2021 at 10:00 PM Al Viro <viro@zeniv.linux.org.uk> wrote:
>
> On Mon, Feb 01, 2021 at 06:09:01PM +0700, Dmitry Kadashev wrote:
>
> > Hi Al,
> >
> > I think I need more guidance here. First of all, I've based that code on
> > commit 7cdfa44227b0 ("vfs: Fix refcounting of filenames in fs_parser"), which
> > does exactly the same refcount bump in fs_parser.c for filename_lookup().  I'm
> > not saying it's a good excuse to introduce more code like that if that's a bad
> > code though.
>
> It is a bad code.  If you look at that function, you'll see that the entire
> mess around put_f is rather hard to follow and reason about.  That's a function
> with no users, and I'm not sure we want to keep it long-term.

But the reason for the put_f mess is the fact that the function accepts either a
string (which it resolves to a struct filename that it then owns) or a struct
filename (that it does not own), not the meddling with the refcount. I'm not
trying to argue that we should do the meddling though, I'm fine with the other
approach.

> > What I _am_ saying is we probably want to make the approaches consistent (at
> > least eventually), which means we'd need the same "don't drop the name" variant
> > of filename_lookup?
>
> "don't drop the name on success", similar to what filename_parentat() does.

OK, that makes things much simpler.

> > And given the fact filename_parentat (used from
> > filename_create) drops the name on error it looks like we'd need another copy of
> > it too?
>
> No need.

OK.

> > Do you think it's really worth it or maybe all of these functions will
> > make things more confusing? (from the looks of it right now the convention is
> > that the `struct filename` ownership is always transferred when it is passed as
> > an arg)
> >
> > Also, do you have a good name for such functions that do not drop the name?
> >
> > And, just for my education, can you explain why the reference counting for
> > struct filename exists if it's considered a bad practice to increase the
> > reference counter (assuming the cleanup code is correct)?
>
> The last one is the easiest to answer - we want to keep the imported strings
> around for audit.  It's not so much a proper refcounting as it is "we might
> want freeing delayed" implemented as refcount.
>
> As for do_mkdirat(), you probably want semantics similar to do_unlinkat(), i.e.
> have it consume the argument passed to it.  The main complication comes
> from ESTALE retries; want -ESTALE from ->mkdir() itself to trigger "redo
> filename_parentat() with LOOKUP_REVAL, then try the rest one more time".
> For which you need to keep filename around.  OK, so you want a variant of
> filename_create() that would _not_ consume the filename on success (i.e.
> act as filename_parentat() itself does).  Which is trivial to implement -
> just rename filename_create() to __filename_create() and remove one of
> two putname() in there, leaving just the one in failure exits.  Then
> filename_create() itself becomes simply
>
> static inline struct dentry *filename_create(int dfd, struct filename *name,
>                                 struct path *path, unsigned int lookup_flags)
> {
>         struct dentry *res = __filename_create(dfd, name, path, lookup_flags);
>         if (!IS_ERR(res))
>                 putname(name);
>         return res;
> }
>
> and in your do_mkdirat() replacement use
>         dentry = __filename_create(dfd, filename, &path, lookup_flags);
> instead of
>         dentry = user_path_create(dfd, pathname, &path, lookup_flags);
> and add
>         putname(filename);
> in the very end.  All it takes...

Yeah, I just was not sure about naming or whether you'd prefer for other
functions to be changed too. You've answered pretty much all my questions and
even more :)

Thanks a lot Al! I'll post v2 soon (since the audit thing you've discovered does
not affect this patch directly).

--
Dmitry Kadashev
Eric W. Biederman March 31, 2021, 4:28 p.m. UTC | #7
Al Viro <viro@zeniv.linux.org.uk> writes:

> On Mon, Feb 01, 2021 at 03:00:42PM +0000, Al Viro wrote:
>
>> The last one is the easiest to answer - we want to keep the imported strings
>> around for audit.  It's not so much a proper refcounting as it is "we might
>> want freeing delayed" implemented as refcount.
>
> BTW, regarding io_uring + audit interactions - just how is that supposed to
> work if you offload any work that might lead to audit records (on permission
> checks, etc.) to helper threads?

For people looking into these details.  Things have gotten much better
recently.

The big change is that io_uring helper threads are now proper
threads of the process that is using io_uring.  The io_uring helper
threads just happen to never execute any userspace code.

Eric
Al Viro March 31, 2021, 4:46 p.m. UTC | #8
On Wed, Mar 31, 2021 at 11:28:04AM -0500, Eric W. Biederman wrote:
> Al Viro <viro@zeniv.linux.org.uk> writes:
> 
> > On Mon, Feb 01, 2021 at 03:00:42PM +0000, Al Viro wrote:
> >
> >> The last one is the easiest to answer - we want to keep the imported strings
> >> around for audit.  It's not so much a proper refcounting as it is "we might
> >> want freeing delayed" implemented as refcount.
> >
> > BTW, regarding io_uring + audit interactions - just how is that supposed to
> > work if you offload any work that might lead to audit records (on permission
> > checks, etc.) to helper threads?
> 
> For people looking into these details.  Things have gotten much better
> recently.
> 
> The big change is that io_uring helper threads are now proper
> threads of the process that is using io_uring.  The io_uring helper
> threads just happen to never execute any userspace code.

audit context is per-thread (as it has to be, obviously - multiple threads
can have overlapping syscalls), so getname()/putname() interplay with that
is still not obvious.  I agree that these threads have gotten better,
though.
diff mbox series

Patch

diff --git a/fs/internal.h b/fs/internal.h
index 6fd14ea213c3..23b8b427dbd2 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -80,6 +80,7 @@  long do_unlinkat(int dfd, struct filename *name);
 int may_linkat(struct path *link);
 int do_renameat2(int olddfd, struct filename *oldname, int newdfd,
 		 struct filename *newname, unsigned int flags);
+long do_mkdirat(int dfd, struct filename *name, umode_t mode);
 
 /*
  * namespace.c
diff --git a/fs/namei.c b/fs/namei.c
index 03d0e11e4f36..9d26a51f3f54 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -3654,17 +3654,23 @@  int vfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
 }
 EXPORT_SYMBOL(vfs_mkdir);
 
-static long do_mkdirat(int dfd, const char __user *pathname, umode_t mode)
+long do_mkdirat(int dfd, struct filename *name, umode_t mode)
 {
 	struct dentry *dentry;
 	struct path path;
 	int error;
 	unsigned int lookup_flags = LOOKUP_DIRECTORY;
 
+	if (IS_ERR(name))
+		return PTR_ERR(name);
+
 retry:
-	dentry = user_path_create(dfd, pathname, &path, lookup_flags);
-	if (IS_ERR(dentry))
-		return PTR_ERR(dentry);
+	name->refcnt++; /* filename_create() drops our ref */
+	dentry = filename_create(dfd, name, &path, lookup_flags);
+	if (IS_ERR(dentry)) {
+		error = PTR_ERR(dentry);
+		goto out;
+	}
 
 	if (!IS_POSIXACL(path.dentry->d_inode))
 		mode &= ~current_umask();
@@ -3676,17 +3682,19 @@  static long do_mkdirat(int dfd, const char __user *pathname, umode_t mode)
 		lookup_flags |= LOOKUP_REVAL;
 		goto retry;
 	}
+out:
+	putname(name);
 	return error;
 }
 
 SYSCALL_DEFINE3(mkdirat, int, dfd, const char __user *, pathname, umode_t, mode)
 {
-	return do_mkdirat(dfd, pathname, mode);
+	return do_mkdirat(dfd, getname(pathname), mode);
 }
 
 SYSCALL_DEFINE2(mkdir, const char __user *, pathname, umode_t, mode)
 {
-	return do_mkdirat(AT_FDCWD, pathname, mode);
+	return do_mkdirat(AT_FDCWD, getname(pathname), mode);
 }
 
 int vfs_rmdir(struct inode *dir, struct dentry *dentry)