diff mbox series

[31/34] vfs: syscall: Add fspick() to select a superblock for reconfiguration [ver #12]

Message ID 153754766004.17872.9829232103614083565.stgit@warthog.procyon.org.uk (mailing list archive)
State New, archived
Headers show
Series VFS: Introduce filesystem context [ver #12] | expand

Commit Message

David Howells Sept. 21, 2018, 4:34 p.m. UTC
Provide an fspick() system call that can be used to pick an existing
mountpoint into an fs_context which can thereafter be used to reconfigure a
superblock (equivalent of the superblock side of -o remount).

This looks like:

	int fd = fspick(AT_FDCWD, "/mnt",
			FSPICK_CLOEXEC | FSPICK_NO_AUTOMOUNT);
	fsconfig(fd, FSCONFIG_SET_FLAG, "intr", NULL, 0);
	fsconfig(fd, FSCONFIG_SET_FLAG, "noac", NULL, 0);
	fsconfig(fd, FSCONFIG_CMD_RECONFIGURE, NULL, NULL, 0);

At the point of fspick being called, the file descriptor referring to the
filesystem context is in exactly the same state as the one that was created
by fsopen() after fsmount() has been successfully called.

Signed-off-by: David Howells <dhowells@redhat.com>
cc: linux-api@vger.kernel.org
---

 arch/x86/entry/syscalls/syscall_32.tbl |    1 +
 arch/x86/entry/syscalls/syscall_64.tbl |    1 +
 fs/fsopen.c                            |   54 ++++++++++++++++++++++++++++++++
 include/linux/syscalls.h               |    1 +
 include/uapi/linux/fs.h                |    5 +++
 5 files changed, 62 insertions(+)

Comments

Alan Jenkins Oct. 12, 2018, 2:49 p.m. UTC | #1
On 21/09/2018 17:34, David Howells wrote:
> Provide an fspick() system call that can be used to pick an existing
> mountpoint into an fs_context which can thereafter be used to reconfigure a
> superblock (equivalent of the superblock side of -o remount).
>
> This looks like:
>
> 	int fd = fspick(AT_FDCWD, "/mnt",
> 			FSPICK_CLOEXEC | FSPICK_NO_AUTOMOUNT);
> 	fsconfig(fd, FSCONFIG_SET_FLAG, "intr", NULL, 0);
> 	fsconfig(fd, FSCONFIG_SET_FLAG, "noac", NULL, 0);
> 	fsconfig(fd, FSCONFIG_CMD_RECONFIGURE, NULL, NULL, 0);
>
> At the point of fspick being called, the file descriptor referring to the
> filesystem context is in exactly the same state as the one that was created
> by fsopen() after fsmount() has been successfully called.
>
> Signed-off-by: David Howells <dhowells@redhat.com>
> cc: linux-api@vger.kernel.org
> ---
>
>   arch/x86/entry/syscalls/syscall_32.tbl |    1 +
>   arch/x86/entry/syscalls/syscall_64.tbl |    1 +
>   fs/fsopen.c                            |   54 ++++++++++++++++++++++++++++++++
>   include/linux/syscalls.h               |    1 +
>   include/uapi/linux/fs.h                |    5 +++
>   5 files changed, 62 insertions(+)
>
> diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl
> index c78b68256f8a..d1eb6c815790 100644
> --- a/arch/x86/entry/syscalls/syscall_32.tbl
> +++ b/arch/x86/entry/syscalls/syscall_32.tbl
> @@ -403,3 +403,4 @@
>   389	i386	fsopen			sys_fsopen			__ia32_sys_fsopen
>   390	i386	fsconfig		sys_fsconfig			__ia32_sys_fsconfig
>   391	i386	fsmount			sys_fsmount			__ia32_sys_fsmount
> +392	i386	fspick			sys_fspick			__ia32_sys_fspick
> diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl
> index d44ead5d4368..d3ab703c02bb 100644
> --- a/arch/x86/entry/syscalls/syscall_64.tbl
> +++ b/arch/x86/entry/syscalls/syscall_64.tbl
> @@ -348,6 +348,7 @@
>   337	common	fsopen			__x64_sys_fsopen
>   338	common	fsconfig		__x64_sys_fsconfig
>   339	common	fsmount			__x64_sys_fsmount
> +340	common	fspick			__x64_sys_fspick
>   
>   #
>   # x32-specific system call numbers start at 512 to avoid cache impact
> diff --git a/fs/fsopen.c b/fs/fsopen.c
> index 5955a6b65596..9ead9220e2cb 100644
> --- a/fs/fsopen.c
> +++ b/fs/fsopen.c
> @@ -155,6 +155,60 @@ SYSCALL_DEFINE2(fsopen, const char __user *, _fs_name, unsigned int, flags)
>   	return ret;
>   }
>   
> +/*
> + * Pick a superblock into a context for reconfiguration.
> + */
> +SYSCALL_DEFINE3(fspick, int, dfd, const char __user *, path, unsigned int, flags)
> +{
> +	struct fs_context *fc;
> +	struct path target;
> +	unsigned int lookup_flags;
> +	int ret;
> +
> +	if (!ns_capable(current->nsproxy->mnt_ns->user_ns, CAP_SYS_ADMIN))
> +		return -EPERM;


This seems to accept basically any mount.  Specifically: are you sure 
it's OK to return a handle to a SB_NO_USER superblock?

# strace -f -v -e trace=154 \
     ./fspick 3</proc/self/ns/mnt 3 \
     stat -f /dev/fd/3

syscall_0x154(0x3, 0x4009a1, 0x8, ...) = 0x4
   File: "/dev/fd/3"
     ID: 0        Namelen: 255     Type: anon-inode FS
Block size: 4096       Fundamental block size: 4096
Blocks: Total: 0          Free: 0          Available: 0
Inodes: Total: 0          Free: 0
+++ exited with 0 +++
Al Viro Oct. 13, 2018, 6:11 a.m. UTC | #2
On Fri, Oct 12, 2018 at 03:49:50PM +0100, Alan Jenkins wrote:
> > +SYSCALL_DEFINE3(fspick, int, dfd, const char __user *, path, unsigned int, flags)
> > +{
> > +	struct fs_context *fc;
> > +	struct path target;
> > +	unsigned int lookup_flags;
> > +	int ret;
> > +
> > +	if (!ns_capable(current->nsproxy->mnt_ns->user_ns, CAP_SYS_ADMIN))
> > +		return -EPERM;
> 
> 
> This seems to accept basically any mount.  Specifically: are you sure it's
> OK to return a handle to a SB_NO_USER superblock?

Umm...  As long as we don't try to do pathname resolution from its ->s_root,
shouldn't be a problem and I don't see anything that would do that.  I might've
missed something, but...
Alan Jenkins Oct. 13, 2018, 9:45 a.m. UTC | #3
On 13/10/2018 07:11, Al Viro wrote:
> On Fri, Oct 12, 2018 at 03:49:50PM +0100, Alan Jenkins wrote:
>>> +SYSCALL_DEFINE3(fspick, int, dfd, const char __user *, path, unsigned int, flags)
>>> +{
>>> +	struct fs_context *fc;
>>> +	struct path target;
>>> +	unsigned int lookup_flags;
>>> +	int ret;
>>> +
>>> +	if (!ns_capable(current->nsproxy->mnt_ns->user_ns, CAP_SYS_ADMIN))
>>> +		return -EPERM;
>>
>> This seems to accept basically any mount.  Specifically: are you sure it's
>> OK to return a handle to a SB_NO_USER superblock?
> Umm...  As long as we don't try to do pathname resolution from its ->s_root,
> shouldn't be a problem and I don't see anything that would do that.  I might've
> missed something, but...

Sorry, I guess SB_NOUSER was the wrong word.  I was trying find if 
anything stopped things like

int memfd = memfd_create("foo", 0);
int fsfd = fspick(memfd, "", FSPICK_EMPTY_PATH);

fsconfig(fsfd, FSCONFIG_SET_FLAG, "ro", NULL, 0);
fsconfig(fsfd, FSCONFIG_SET_STRING, "size", "100M", 0);
fsconfig(fsfd, FSCONFIG_CMD_RECONFIGURE, NULL, NULL, 0);

So far I'm getting -EBUSY if I try to apply the "ro", -EINVAL if I try 
to apply the "size=100M".  But if I don't apply either, then 
FSCONFIG_CMD_RECONFIGURE succeeds.

It seems worrying that it might let me set options on shm_mnt. Or at 
least letting me get as far as the -EBUSY check for the "ro" superblock 
flag.

I'm not sure why I'm getting the -EINVAL setting the "size" option.  But 
it would be much more reassuring if I was getting -EPERM :-).

Alan
Andy Lutomirski Oct. 13, 2018, 11:04 p.m. UTC | #4
On Sat, Oct 13, 2018 at 2:45 AM Alan Jenkins
<alan.christopher.jenkins@gmail.com> wrote:
>
> On 13/10/2018 07:11, Al Viro wrote:
> > On Fri, Oct 12, 2018 at 03:49:50PM +0100, Alan Jenkins wrote:
> >>> +SYSCALL_DEFINE3(fspick, int, dfd, const char __user *, path, unsigned int, flags)
> >>> +{
> >>> +   struct fs_context *fc;
> >>> +   struct path target;
> >>> +   unsigned int lookup_flags;
> >>> +   int ret;
> >>> +
> >>> +   if (!ns_capable(current->nsproxy->mnt_ns->user_ns, CAP_SYS_ADMIN))
> >>> +           return -EPERM;
> >>
> >> This seems to accept basically any mount.  Specifically: are you sure it's
> >> OK to return a handle to a SB_NO_USER superblock?
> > Umm...  As long as we don't try to do pathname resolution from its ->s_root,
> > shouldn't be a problem and I don't see anything that would do that.  I might've
> > missed something, but...
>
> Sorry, I guess SB_NOUSER was the wrong word.  I was trying find if
> anything stopped things like
>
> int memfd = memfd_create("foo", 0);
> int fsfd = fspick(memfd, "", FSPICK_EMPTY_PATH);
>
> fsconfig(fsfd, FSCONFIG_SET_FLAG, "ro", NULL, 0);
> fsconfig(fsfd, FSCONFIG_SET_STRING, "size", "100M", 0);
> fsconfig(fsfd, FSCONFIG_CMD_RECONFIGURE, NULL, NULL, 0);
>
> So far I'm getting -EBUSY if I try to apply the "ro", -EINVAL if I try
> to apply the "size=100M".  But if I don't apply either, then
> FSCONFIG_CMD_RECONFIGURE succeeds.
>
> It seems worrying that it might let me set options on shm_mnt. Or at
> least letting me get as far as the -EBUSY check for the "ro" superblock
> flag.
>
> I'm not sure why I'm getting the -EINVAL setting the "size" option.  But
> it would be much more reassuring if I was getting -EPERM :-).
>

I would argue that the filesystem associated with a memfd, and even
the fact that there *is* a filesystem, is none of user code's
business.  So that fspick() call should return -EINVAL or similar.
David Howells Oct. 17, 2018, 1:15 p.m. UTC | #5
Alan Jenkins <alan.christopher.jenkins@gmail.com> wrote:

> Sorry, I guess SB_NOUSER was the wrong word.  I was trying find if anything
> stopped things like
> 
> int memfd = memfd_create("foo", 0);
> int fsfd = fspick(memfd, "", FSPICK_EMPTY_PATH);
> 
> fsconfig(fsfd, FSCONFIG_SET_FLAG, "ro", NULL, 0);
> fsconfig(fsfd, FSCONFIG_SET_STRING, "size", "100M", 0);
> fsconfig(fsfd, FSCONFIG_CMD_RECONFIGURE, NULL, NULL, 0);
> 
> So far I'm getting -EBUSY if I try to apply the "ro", -EINVAL if I try to
> apply the "size=100M".  But if I don't apply either, then
> FSCONFIG_CMD_RECONFIGURE succeeds.

I should probably check that the picked point is actually a mountpoint.

David
David Howells Oct. 17, 2018, 1:20 p.m. UTC | #6
David Howells <dhowells@redhat.com> wrote:

> I should probably check that the picked point is actually a mountpoint.

The root of the mount object at the path specified, that is, perhaps with
something like the attached.

David
---
diff --git a/fs/fsopen.c b/fs/fsopen.c
index f673e93ac456..aaaaa17a233c 100644
--- a/fs/fsopen.c
+++ b/fs/fsopen.c
@@ -186,6 +186,10 @@ SYSCALL_DEFINE3(fspick, int, dfd, const char __user *, path, unsigned int, flags
 	if (ret < 0)
 		goto err;
 
+	ret = -EINVAL;
+	if (target.mnt->mnt_root != target.dentry)
+		goto err_path;
+
 	fc = vfs_new_fs_context(target.dentry->d_sb->s_type, target.dentry,
 				0, 0, FS_CONTEXT_FOR_RECONFIGURE);
 	if (IS_ERR(fc)) {
Alan Jenkins Oct. 17, 2018, 2:31 p.m. UTC | #7
On 17/10/2018 14:20, David Howells wrote:
> David Howells <dhowells@redhat.com> wrote:
>
>> I should probably check that the picked point is actually a mountpoint.
> The root of the mount object at the path specified, that is, perhaps with
> something like the attached.
>
> David


I agree.  I'm happy to see this is using the same check as do_remount().


* change filesystem flags. dir should be a physical root of filesystem.
* If you've mounted a non-root directory somewhere and want to do remount
* on it - tough luck.
*/


Thanks

Alan


> ---
> diff --git a/fs/fsopen.c b/fs/fsopen.c
> index f673e93ac456..aaaaa17a233c 100644
> --- a/fs/fsopen.c
> +++ b/fs/fsopen.c
> @@ -186,6 +186,10 @@ SYSCALL_DEFINE3(fspick, int, dfd, const char __user *, path, unsigned int, flags
>   	if (ret < 0)
>   		goto err;
>   
> +	ret = -EINVAL;
> +	if (target.mnt->mnt_root != target.dentry)
> +		goto err_path;
> +
>   	fc = vfs_new_fs_context(target.dentry->d_sb->s_type, target.dentry,
>   				0, 0, FS_CONTEXT_FOR_RECONFIGURE);
>   	if (IS_ERR(fc)) {
>
Eric W. Biederman Oct. 17, 2018, 2:35 p.m. UTC | #8
Alan Jenkins <alan.christopher.jenkins@gmail.com> writes:

> On 17/10/2018 14:20, David Howells wrote:
>> David Howells <dhowells@redhat.com> wrote:
>>
>>> I should probably check that the picked point is actually a mountpoint.
>> The root of the mount object at the path specified, that is, perhaps with
>> something like the attached.
>>
>> David
>
>
> I agree.  I'm happy to see this is using the same check as do_remount().
>
>
> * change filesystem flags. dir should be a physical root of filesystem.
> * If you've mounted a non-root directory somewhere and want to do remount
> * on it - tough luck.
> */

Davids check will work for bind mounts as well.  It just won't work it
just won't work for files or subdirectories of some mountpoint.

Eric

>> ---
>> diff --git a/fs/fsopen.c b/fs/fsopen.c
>> index f673e93ac456..aaaaa17a233c 100644
>> --- a/fs/fsopen.c
>> +++ b/fs/fsopen.c
>> @@ -186,6 +186,10 @@ SYSCALL_DEFINE3(fspick, int, dfd, const char __user *, path, unsigned int, flags
>>   	if (ret < 0)
>>   		goto err;
>>   +	ret = -EINVAL;
>> +	if (target.mnt->mnt_root != target.dentry)
>> +		goto err_path;
>> +
>>   	fc = vfs_new_fs_context(target.dentry->d_sb->s_type, target.dentry,
>>   				0, 0, FS_CONTEXT_FOR_RECONFIGURE);
>>   	if (IS_ERR(fc)) {
>>
Alan Jenkins Oct. 17, 2018, 2:55 p.m. UTC | #9
On 17/10/2018 15:35, Eric W. Biederman wrote:
> Alan Jenkins <alan.christopher.jenkins@gmail.com> writes:
>
>> On 17/10/2018 14:20, David Howells wrote:
>>> David Howells <dhowells@redhat.com> wrote:
>>>
>>>> I should probably check that the picked point is actually a mountpoint.
>>> The root of the mount object at the path specified, that is, perhaps with
>>> something like the attached.
>>>
>>> David
>>
>> I agree.  I'm happy to see this is using the same check as do_remount().
>>
>>
>> * change filesystem flags. dir should be a physical root of filesystem.
>> * If you've mounted a non-root directory somewhere and want to do remount
>> * on it - tough luck.
>> */
> Davids check will work for bind mounts as well.  It just won't work it
> just won't work for files or subdirectories of some mountpoint.
>
> Eric


I see.  Then I am still happy to see the fspick() check match a check in 
do_remount() (and it still solves the problem I was worried about).

I cannot blame David for the do_remount() comment being out of date :-).

# uname -r
4.18.10-200.fc.28.x86_64
# mount --bind /mnt /mnt
# mount -oremount,debug /mnt
# findmnt /mnt; findmnt /
[findmnt shows / has been remounted, adding the ext4 "debug" mount option]


>
>>> ---
>>> diff --git a/fs/fsopen.c b/fs/fsopen.c
>>> index f673e93ac456..aaaaa17a233c 100644
>>> --- a/fs/fsopen.c
>>> +++ b/fs/fsopen.c
>>> @@ -186,6 +186,10 @@ SYSCALL_DEFINE3(fspick, int, dfd, const char __user *, path, unsigned int, flags
>>>    	if (ret < 0)
>>>    		goto err;
>>>    +	ret = -EINVAL;
>>> +	if (target.mnt->mnt_root != target.dentry)
>>> +		goto err_path;
>>> +
>>>    	fc = vfs_new_fs_context(target.dentry->d_sb->s_type, target.dentry,
>>>    				0, 0, FS_CONTEXT_FOR_RECONFIGURE);
>>>    	if (IS_ERR(fc)) {
>>>
David Howells Oct. 17, 2018, 3:24 p.m. UTC | #10
Eric W. Biederman <ebiederm@xmission.com> wrote:

> Davids check will work for bind mounts as well.  It just won't work it
> just won't work for files or subdirectories of some mountpoint.

Bind mounts have to be done with open_tree() and move_mount().  You can't now
do fsmount() on something fspick()'d.

David
Eric W. Biederman Oct. 17, 2018, 3:38 p.m. UTC | #11
David Howells <dhowells@redhat.com> writes:

> Eric W. Biederman <ebiederm@xmission.com> wrote:
>
>> Davids check will work for bind mounts as well.  It just won't work it
>> just won't work for files or subdirectories of some mountpoint.
>
> Bind mounts have to be done with open_tree() and move_mount().  You can't now
> do fsmount() on something fspick()'d.

But a bind mount will have mnt_root set to the the dentry that was
bound.

Therefore fspick as you are proposing modifying will work for the root
of bind mounts, as well as the root of regular mounts.  My apologies for
not being clear.

Eric
David Howells Oct. 17, 2018, 3:45 p.m. UTC | #12
Alan Jenkins <alan.christopher.jenkins@gmail.com> wrote:

> I agree.  I'm happy to see this is using the same check as do_remount().
> 
> 
> * change filesystem flags. dir should be a physical root of filesystem.
> * If you've mounted a non-root directory somewhere and want to do remount
> * on it - tough luck.
> */

Are you suggesting that it should only work at the ultimate root of a
superblock and not a bind mount somewhere within?

That's tricky to make work for NFS because s_root is a dummy dentry.

David
Alan Jenkins Oct. 17, 2018, 5:41 p.m. UTC | #13
On 17/10/2018 16:45, David Howells wrote:
> Alan Jenkins <alan.christopher.jenkins@gmail.com> wrote:
>
>> I agree.  I'm happy to see this is using the same check as do_remount().
>>
>>
>> * change filesystem flags. dir should be a physical root of filesystem.
>> * If you've mounted a non-root directory somewhere and want to do remount
>> * on it - tough luck.
>> */
> Are you suggesting that it should only work at the ultimate root of a
> superblock and not a bind mount somewhere within?
>
> That's tricky to make work for NFS because s_root is a dummy dentry.
>
> David


Retro-actively: I do not suggest that.

I tried to answer this question in my reply to Eric correcting me.  Eric 
was right to correct me.  I now understand the comment above 
do_remount() is incorrect.  I re-reviewed your diff in light of that.  I 
re-endorse your diff as a way to solve the problem I raised.

(I think it would be useful to remove the misleading comment above 
do_remount(), to avoid future confusion.)

 > @@ -186,6 +186,10 @@ SYSCALL_DEFINE3(fspick, int, dfd, const char 
__user *, path, unsigned int, flags

>  	if (ret < 0)
>  		goto err;
>  
> +	ret = -EINVAL;
> +	if (target.mnt->mnt_root != target.dentry)
> +		goto err_path;
> +

( the "if" statement it adds to fspick() is equivalent to the second 
"if" statement in do_remount(): )

static  int  do_remount <https://elixir.bootlin.com/linux/v4.18/ident/do_remount>(struct  path <https://elixir.bootlin.com/linux/v4.18/ident/path>  *path <https://elixir.bootlin.com/linux/v4.18/ident/path>,  int  ms_flags,  int  sb_flags,
		int  mnt_flags,  void  *data)
{
	int  err;
	struct  super_block <https://elixir.bootlin.com/linux/v4.18/ident/super_block>  *sb  =  path <https://elixir.bootlin.com/linux/v4.18/ident/path>->mnt 
<https://elixir.bootlin.com/linux/v4.18/ident/mnt>->mnt_sb;
	struct  mount <https://elixir.bootlin.com/linux/v4.18/ident/mount>  *mnt <https://elixir.bootlin.com/linux/v4.18/ident/mnt>  =  real_mount 
<https://elixir.bootlin.com/linux/v4.18/ident/real_mount>(path 
<https://elixir.bootlin.com/linux/v4.18/ident/path>->mnt 
<https://elixir.bootlin.com/linux/v4.18/ident/mnt>);

	if  (!check_mnt <https://elixir.bootlin.com/linux/v4.18/ident/check_mnt>(mnt 
<https://elixir.bootlin.com/linux/v4.18/ident/mnt>))
		return  -EINVAL <https://elixir.bootlin.com/linux/v4.18/ident/EINVAL>;

	if  (path <https://elixir.bootlin.com/linux/v4.18/ident/path>->dentry  !=  path <https://elixir.bootlin.com/linux/v4.18/ident/path>->mnt 
<https://elixir.bootlin.com/linux/v4.18/ident/mnt>->mnt_root)
		return  -EINVAL <https://elixir.bootlin.com/linux/v4.18/ident/EINVAL>;

Thanks

Alan
David Howells Oct. 17, 2018, 9:20 p.m. UTC | #14
Alan Jenkins <alan.christopher.jenkins@gmail.com> wrote:

> static  int  do_remount <https://elixir.bootlin.com/linux/v4.18/ident/do_remount>(struct  path <https://elixir.bootlin.com/linux/v4.18/ident/path>  *path <https://elixir.bootlin.com/linux/v4.18/ident/path>,  int  ms_flags,  int  sb_flags,
> 		int  mnt_flags,  void  *data)

What happened there?  You seem to have had a load of URLs substituted in.

David
Alan Jenkins Oct. 17, 2018, 10:13 p.m. UTC | #15
[resent, hopefully with slightly less formatting damage]

On 17/10/2018 16:45, David Howells wrote:

> Alan Jenkins <alan.christopher.jenkins@gmail.com> wrote:
>
>> I agree. I'm happy to see this is using the same check as do_remount().
>>
>>
>> * change filesystem flags. dir should be a physical root of filesystem.
>> * If you've mounted a non-root directory somewhere and want to do remount
>> * on it - tough luck.
>> */
> Are you suggesting that it should only work at the ultimate root of a
> superblock and not a bind mount somewhere within?
>
> That's tricky to make work for NFS because s_root is a dummy dentry.
>
> David


Retro-actively: I do not suggest that.

I tried to answer this question in my reply to Eric correcting me. Eric 
was right to correct me.  I now understand the comment above 
do_remount() is incorrect.  I re-reviewed your diff in light of that.  I 
re-endorse your diff as a way to solve the problem I raised.

(I think it would be useful to remove the misleading comment above 
do_remount(), to avoid future confusion.)


> @@ -186,6 +186,10 @@ SYSCALL_DEFINE3(fspick, int, dfd, const char __user *, path, unsigned int, flags
>   	if (ret < 0)
>   		goto err;
>   
> +	ret = -EINVAL;
> +	if (target.mnt->mnt_root != target.dentry)
> +		goto err_path;
> +
>   	fc = vfs_new_fs_context(target.dentry->d_sb->s_type, target.dentry,
>   				0, 0, FS_CONTEXT_FOR_RECONFIGURE);
>   	if (IS_ERR(fc)) {


( the "if" statement it adds to fspick() is equivalent to the second 
"if" statement in do_remount(): )

static int do_remount(struct path *path, int ms_flags, int sb_flags,
		      int mnt_flags, void *data)
{
	int err;
	struct super_block *sb = path->mnt->mnt_sb;
	struct mount *mnt = real_mount(path->mnt);

	if (!check_mnt(mnt))
		return -EINVAL;

	if (path->dentry != path->mnt->mnt_root)
		return -EINVAL;

Thanks

Alan
diff mbox series

Patch

diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl
index c78b68256f8a..d1eb6c815790 100644
--- a/arch/x86/entry/syscalls/syscall_32.tbl
+++ b/arch/x86/entry/syscalls/syscall_32.tbl
@@ -403,3 +403,4 @@ 
 389	i386	fsopen			sys_fsopen			__ia32_sys_fsopen
 390	i386	fsconfig		sys_fsconfig			__ia32_sys_fsconfig
 391	i386	fsmount			sys_fsmount			__ia32_sys_fsmount
+392	i386	fspick			sys_fspick			__ia32_sys_fspick
diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl
index d44ead5d4368..d3ab703c02bb 100644
--- a/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/arch/x86/entry/syscalls/syscall_64.tbl
@@ -348,6 +348,7 @@ 
 337	common	fsopen			__x64_sys_fsopen
 338	common	fsconfig		__x64_sys_fsconfig
 339	common	fsmount			__x64_sys_fsmount
+340	common	fspick			__x64_sys_fspick
 
 #
 # x32-specific system call numbers start at 512 to avoid cache impact
diff --git a/fs/fsopen.c b/fs/fsopen.c
index 5955a6b65596..9ead9220e2cb 100644
--- a/fs/fsopen.c
+++ b/fs/fsopen.c
@@ -155,6 +155,60 @@  SYSCALL_DEFINE2(fsopen, const char __user *, _fs_name, unsigned int, flags)
 	return ret;
 }
 
+/*
+ * Pick a superblock into a context for reconfiguration.
+ */
+SYSCALL_DEFINE3(fspick, int, dfd, const char __user *, path, unsigned int, flags)
+{
+	struct fs_context *fc;
+	struct path target;
+	unsigned int lookup_flags;
+	int ret;
+
+	if (!ns_capable(current->nsproxy->mnt_ns->user_ns, CAP_SYS_ADMIN))
+		return -EPERM;
+
+	if ((flags & ~(FSPICK_CLOEXEC |
+		       FSPICK_SYMLINK_NOFOLLOW |
+		       FSPICK_NO_AUTOMOUNT |
+		       FSPICK_EMPTY_PATH)) != 0)
+		return -EINVAL;
+
+	lookup_flags = LOOKUP_FOLLOW | LOOKUP_AUTOMOUNT;
+	if (flags & FSPICK_SYMLINK_NOFOLLOW)
+		lookup_flags &= ~LOOKUP_FOLLOW;
+	if (flags & FSPICK_NO_AUTOMOUNT)
+		lookup_flags &= ~LOOKUP_AUTOMOUNT;
+	if (flags & FSPICK_EMPTY_PATH)
+		lookup_flags |= LOOKUP_EMPTY;
+	ret = user_path_at(dfd, path, lookup_flags, &target);
+	if (ret < 0)
+		goto err;
+
+	fc = vfs_new_fs_context(target.dentry->d_sb->s_type, target.dentry,
+				0, 0, FS_CONTEXT_FOR_RECONFIGURE);
+	if (IS_ERR(fc)) {
+		ret = PTR_ERR(fc);
+		goto err_path;
+	}
+
+	fc->phase = FS_CONTEXT_RECONF_PARAMS;
+
+	ret = fscontext_alloc_log(fc);
+	if (ret < 0)
+		goto err_fc;
+
+	path_put(&target);
+	return fscontext_create_fd(fc, flags & FSPICK_CLOEXEC ? O_CLOEXEC : 0);
+
+err_fc:
+	put_fs_context(fc);
+err_path:
+	path_put(&target);
+err:
+	return ret;
+}
+
 /*
  * Check the state and apply the configuration.  Note that this function is
  * allowed to 'steal' the value by setting param->xxx to NULL before returning.
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 4697fad47789..eb8d62f4ee24 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -914,6 +914,7 @@  asmlinkage long sys_fsopen(const char __user *fs_name, unsigned int flags);
 asmlinkage long sys_fsconfig(int fs_fd, unsigned int cmd, const char __user *key,
 			     const void __user *value, int aux);
 asmlinkage long sys_fsmount(int fs_fd, unsigned int flags, unsigned int ms_flags);
+asmlinkage long sys_fspick(int dfd, const char __user *path, unsigned int flags);
 
 /*
  * Architecture-specific system calls
diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h
index 10281d582e28..7f01503a9e9b 100644
--- a/include/uapi/linux/fs.h
+++ b/include/uapi/linux/fs.h
@@ -351,6 +351,11 @@  typedef int __bitwise __kernel_rwf_t;
 
 #define FSMOUNT_CLOEXEC		0x00000001
 
+#define FSPICK_CLOEXEC		0x00000001
+#define FSPICK_SYMLINK_NOFOLLOW	0x00000002
+#define FSPICK_NO_AUTOMOUNT	0x00000004
+#define FSPICK_EMPTY_PATH	0x00000008
+
 /*
  * The type of fsconfig() call made.
  */