diff mbox series

[14/33] vfs: Implement a filesystem superblock creation/configuration context [ver #11]

Message ID 153313714181.13253.304098108512966976.stgit@warthog.procyon.org.uk (mailing list archive)
State New, archived
Headers show
Series VFS: Introduce filesystem context [ver #11] | expand

Commit Message

David Howells Aug. 1, 2018, 3:25 p.m. UTC
Implement a filesystem context concept to be used during superblock
creation for mount and superblock reconfiguration for remount.

The mounting procedure then becomes:

 (1) Allocate new fs_context context.

 (2) Configure the context.

 (3) Create superblock.

 (4) Query the superblock.

 (5) Create a mount for the superblock.

 (6) Destroy the context.

Rather than calling fs_type->mount(), an fs_context struct is created and
fs_type->init_fs_context() is called to set it up.  Pointers exist for the
filesystem and LSM to hang their private data off.

A set of operations has to be set by ->init_fs_context() to provide
freeing, duplication, option parsing, binary data parsing, validation,
mounting and superblock filling.

Legacy filesystems are supported by the provision of a set of legacy
fs_context operations that build up a list of mount options and then invoke
fs_type->mount() from within the fs_context ->get_tree() operation.  This
allows all filesystems to be accessed using fs_context.

It should be noted that, whilst this patch adds a lot of lines of code,
there is quite a bit of duplication with existing code that can be
eliminated should all filesystems be converted over.

Signed-off-by: David Howells <dhowells@redhat.com>
---

 fs/Makefile                |    2 
 fs/filesystems.c           |    4 
 fs/fs_context.c            |  668 ++++++++++++++++++++++++++++++++++++++++++++
 fs/internal.h              |    8 -
 fs/libfs.c                 |   19 +
 fs/namespace.c             |  351 +++++++++++++++--------
 fs/super.c                 |  303 +++++++++++++++++++-
 include/linux/fs.h         |   17 +
 include/linux/fs_context.h |   45 +++
 include/linux/mount.h      |    3 
 10 files changed, 1278 insertions(+), 142 deletions(-)
 create mode 100644 fs/fs_context.c

Comments

Guenter Roeck Sept. 11, 2018, 5:46 p.m. UTC | #1
On Wed, Aug 01, 2018 at 04:25:41PM +0100, David Howells wrote:
> Implement a filesystem context concept to be used during superblock
> creation for mount and superblock reconfiguration for remount.
> 
> The mounting procedure then becomes:
> 
>  (1) Allocate new fs_context context.
> 
>  (2) Configure the context.
> 
>  (3) Create superblock.
> 
>  (4) Query the superblock.
> 
>  (5) Create a mount for the superblock.
> 
>  (6) Destroy the context.
> 
> Rather than calling fs_type->mount(), an fs_context struct is created and
> fs_type->init_fs_context() is called to set it up.  Pointers exist for the
> filesystem and LSM to hang their private data off.
> 
> A set of operations has to be set by ->init_fs_context() to provide
> freeing, duplication, option parsing, binary data parsing, validation,
> mounting and superblock filling.
> 
> Legacy filesystems are supported by the provision of a set of legacy
> fs_context operations that build up a list of mount options and then invoke
> fs_type->mount() from within the fs_context ->get_tree() operation.  This
> allows all filesystems to be accessed using fs_context.
> 
> It should be noted that, whilst this patch adds a lot of lines of code,
> there is quite a bit of duplication with existing code that can be
> eliminated should all filesystems be converted over.
> 
> Signed-off-by: David Howells <dhowells@redhat.com>

I don't find a more recent version of this patch in patchwork on kernel.org,
so I am replying to this one. My apologies if there are more recent versions.

This patch is causing widespread crashes in next-20180910 and next-20180911.
Example and bisect log (for x86_64) attached.

Guenter

---
Rebooting.
[    4.894299] random: dd: uninitialized urandom read (512 bytes read)
[    5.055206] BUG: unable to handle kernel NULL pointer dereference at 0000000000000030
[    5.055518] PGD 800000000c025067 P4D 800000000c025067 PUD c7a3067 PMD 0 
[    5.055941] Oops: 0000 [#1] SMP PTI
[    5.056191] CPU: 0 PID: 1208 Comm: umount Not tainted 4.19.0-rc3-next-20180911 #1
[    5.056367] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.11.2-0-gf9626ccb91-prebuilt.qemu-project.org 04/01/2014
[    5.057003] RIP: 0010:reconfigure_super+0x47/0x210
[    5.057214] Code: d4 01 00 00 44 8b a3 30 02 00 00 45 85 e4 0f 85 9d 01 00 00 a8 01 48 89 fd 75 4f 48 89 df 45 31 ed e8 ad 4f 01 00 48 8b 45 00 <48> 8b 40 30 48 85 c0 0f 84 d3 00 00 00 48 89 ef ff d0 85 c0 0f 84
[    5.057573] RSP: 0018:ffffa8560011bdd0 EFLAGS: 00000246
[    5.057709] RAX: 0000000000000000 RBX: ffffa2cd4c72c000 RCX: ffffa2cd4c72c0b8
[    5.057850] RDX: ffffa2cd4c72c048 RSI: 0000000000000000 RDI: ffffffff98b49e28
[    5.057991] RBP: ffffa8560011be00 R08: 000000000000019b R09: 0000000000000000
[    5.058132] R10: ffffa856000bfd08 R11: 0000000000000001 R12: 0000000000000000
[    5.058274] R13: 0000000000000001 R14: ffffa2cd4f38f920 R15: 0000000000000000
[    5.058451] FS:  00007fa2f1a15500(0000) GS:ffffa2cd4f600000(0000) knlGS:0000000000000000
[    5.059178] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[    5.059302] CR2: 0000000000000030 CR3: 000000000c7b4000 CR4: 00000000003406f0
[    5.059496] Call Trace:
[    5.060118]  do_umount_root+0x7b/0xb0
[    5.060244]  ksys_umount+0x250/0x3e0
[    5.060345]  ? vfs_write+0x13f/0x190
[    5.060439]  __x64_sys_umount+0xd/0x10
[    5.060537]  do_syscall_64+0x39/0xe0
[    5.060635]  entry_SYSCALL_64_after_hwframe+0x44/0xa9
[    5.060874] RIP: 0033:0x7fa2f1534b47
[    5.060970] Code: 73 2b 00 f7 d8 64 89 01 48 83 c8 ff c3 66 0f 1f 44 00 00 31 f6 e9 09 00 00 00 66 0f 1f 84 00 00 00 00 00 b8 a6 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 11 73 2b 00 f7 d8 64 89 01 48
[    5.061311] RSP: 002b:00007ffeb7926e68 EFLAGS: 00000206 ORIG_RAX: 00000000000000a6
[    5.061478] RAX: ffffffffffffffda RBX: 0000000000d078e0 RCX: 00007fa2f1534b47
[    5.061612] RDX: 00007ffeb7927050 RSI: 0000000000000000 RDI: 0000000000d078e0
[    5.061744] RBP: 0000000000d07b40 R08: 0000000000d07920 R09: 00007fa2f15726c0
[    5.061875] R10: 000000000000089e R11: 0000000000000206 R12: 0000000000d078a0
[    5.062006] R13: 0000000000d07ba0 R14: 0000000000000000 R15: 00007ffeb7927050
[    5.062188] Modules linked in:
[    5.062375] CR2: 0000000000000030
[    5.062675] ---[ end trace c42a74534e5e2f3f ]---
[    5.062816] RIP: 0010:reconfigure_super+0x47/0x210
[    5.062924] Code: d4 01 00 00 44 8b a3 30 02 00 00 45 85 e4 0f 85 9d 01 00 00 a8 01 48 89 fd 75 4f 48 89 df 45 31 ed e8 ad 4f 01 00 48 8b 45 00 <48> 8b 40 30 48 85 c0 0f 84 d3 00 00 00 48 89 ef ff d0 85 c0 0f 84
[    5.063263] RSP: 0018:ffffa8560011bdd0 EFLAGS: 00000246
[    5.063372] RAX: 0000000000000000 RBX: ffffa2cd4c72c000 RCX: ffffa2cd4c72c0b8
[    5.063526] RDX: ffffa2cd4c72c048 RSI: 0000000000000000 RDI: ffffffff98b49e28
[    5.063665] RBP: ffffa8560011be00 R08: 000000000000019b R09: 0000000000000000
[    5.063797] R10: ffffa856000bfd08 R11: 0000000000000001 R12: 0000000000000000
[    5.063928] R13: 0000000000000001 R14: ffffa2cd4f38f920 R15: 0000000000000000
[    5.064060] FS:  00007fa2f1a15500(0000) GS:ffffa2cd4f600000(0000) knlGS:0000000000000000
[    5.064224] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[    5.064337] CR2: 0000000000000030 CR3: 000000000c7b4000 CR4: 00000000003406f0

---

# bad: [09c0888767529cdb382f34452819e42d1a66a114] Add linux-next specific files for 20180911
# good: [11da3a7f84f19c26da6f86af878298694ede0804] Linux 4.19-rc3
git bisect start 'HEAD' 'v4.19-rc3'
# bad: [a2ebc71cf97bed9b453318418e4a281434565e8b] Merge remote-tracking branch 'nfc-next/master'
git bisect bad a2ebc71cf97bed9b453318418e4a281434565e8b
# good: [6fde463b32bf4105c28c0a297a5b66aca5d6ecd4] Merge remote-tracking branch 's390/features'
git bisect good 6fde463b32bf4105c28c0a297a5b66aca5d6ecd4
# bad: [136fd6d530a3ae0dd003984f683345cfe88c01f3] Merge remote-tracking branch 'v4l-dvb/master'
git bisect bad 136fd6d530a3ae0dd003984f683345cfe88c01f3
# good: [c7ae95368af43c08f5f615b00f2f7bf2e9c45788] Merge remote-tracking branch 'v9fs/9p-next'
git bisect good c7ae95368af43c08f5f615b00f2f7bf2e9c45788
# good: [4c640c41381e47b328c6507bcf534812761256cd] Merge branch 'for-4.19/fixes' into for-next
git bisect good 4c640c41381e47b328c6507bcf534812761256cd
# bad: [5bc91f70c5ecc2bc5967b98ce7fa4e55ad230d99] Merge remote-tracking branch 'hid/for-next'
git bisect bad 5bc91f70c5ecc2bc5967b98ce7fa4e55ad230d99
# bad: [88abb54c46648cf25930133fbdeb145bf8537673] vfs: syscall: Add fsconfig() for configuring and managing a context
git bisect bad 88abb54c46648cf25930133fbdeb145bf8537673
# good: [b2bbd433151748c5268769f560c926343dece319] vfs: Separate changing mount flags full remount
git bisect good b2bbd433151748c5268769f560c926343dece319
# bad: [b348b6230aac28ffe555000831966d45529ab3b0] kernfs, sysfs, cgroup, intel_rdt: Support fs_context
git bisect bad b348b6230aac28ffe555000831966d45529ab3b0
# bad: [bf090f3c0282903ad55bca27a482180c70627bd5] procfs: Move proc_fill_super() to fs/proc/root.c
git bisect bad bf090f3c0282903ad55bca27a482180c70627bd5
# bad: [d3f3eaba540acf5b521865dec5634e3a1e138f1d] vfs: Remove unused code after filesystem context changes
git bisect bad d3f3eaba540acf5b521865dec5634e3a1e138f1d
# bad: [5d5eb529715b5a7a4caf10825e2a330608dcd1ef] vfs: Implement a filesystem superblock creation/configuration context
git bisect bad 5d5eb529715b5a7a4caf10825e2a330608dcd1ef
# first bad commit: [5d5eb529715b5a7a4caf10825e2a330608dcd1ef] vfs: Implement a filesystem superblock creation/configuration context
David Howells Sept. 11, 2018, 9:52 p.m. UTC | #2
> [    5.057003] RIP: 0010:reconfigure_super+0x47/0x210

Can you tell me what file and line this is?

Also, do you know which filesystem was involved?

> I don't find a more recent version of this patch in patchwork on kernel.org,
> so I am replying to this one. My apologies if there are more recent versions.

I've just updated my tree with some fixes.  Can you try:

https://git.kernel.org/pub/scm/linux/kernel/git/dhowells/linux-fs.git/log/

branch "fsinfo"?

Thanks,
David
Guenter Roeck Sept. 11, 2018, 10:07 p.m. UTC | #3
On Tue, Sep 11, 2018 at 10:52:26PM +0100, David Howells wrote:
> > [    5.057003] RIP: 0010:reconfigure_super+0x47/0x210
> 
> Can you tell me what file and line this is?
> 
> Also, do you know which filesystem was involved?
> 
> > I don't find a more recent version of this patch in patchwork on kernel.org,
> > so I am replying to this one. My apologies if there are more recent versions.
> 
> I've just updated my tree with some fixes.  Can you try:
> 
> https://git.kernel.org/pub/scm/linux/kernel/git/dhowells/linux-fs.git/log/
> 
> branch "fsinfo"?
> 

Unfortunately, that does not work either.
With v4.19-rc3-40-g09f0a401de37:

[    8.505130] BUG: unable to handle kernel NULL pointer dereference at 0000000000000030
[    8.506237] PGD 800000001d81e067 P4D 800000001d81e067 PUD 1dfb1067 PMD 0 
[    8.506669] Oops: 0000 [#1] SMP PTI
[    8.506915] CPU: 0 PID: 1180 Comm: umount Not tainted 4.19.0-rc3+ #1
[    8.507052] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.11.2-0-gf9626ccb91-prebuilt.qemu-project.org 04/01/2014
[    8.507672] RIP: 0010:reconfigure_super+0x47/0x210
[    8.507877] Code: d4 01 00 00 44 8b a3 30 02 00 00 45 85 e4 0f 85 9d 01 00 00 a8 01 48 89 fd 75 4f 48 89 df 45 31 ed e8 ad 4f 01 00 48 8b 45 00 <48> 4
[    8.508222] RSP: 0018:ffffb3794015bdd0 EFLAGS: 00000246
[    8.508345] RAX: 0000000000000000 RBX: ffff9d855df27800 RCX: ffff9d855df278b8
[    8.508479] RDX: ffff9d855df27848 RSI: 0000000000000000 RDI: ffffffff8d54a9a8
[    8.508617] RBP: ffffb3794015be00 R08: 00000000000000d8 R09: 0000000000000000
[    8.508752] R10: ffffb3794011fce8 R11: 0000000000000001 R12: 0000000000000000
[    8.508885] R13: 0000000000000001 R14: ffff9d855effc920 R15: 0000000000000000
[    8.509056] FS:  00007f6712967500(0000) GS:ffff9d855f200000(0000) knlGS:0000000000000000
[    8.509217] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[    8.509332] CR2: 0000000000000030 CR3: 000000001dfe0000 CR4: 00000000000006f0
[    8.509516] Call Trace:
[    8.510121]  do_umount_root+0x7b/0xb0
[    8.510244]  ksys_umount+0x250/0x3e0
[    8.510535]  ? vfs_write+0x13f/0x190
[    8.510629]  __x64_sys_umount+0xd/0x10
[    8.510722]  do_syscall_64+0x39/0xe0
[    8.510810]  entry_SYSCALL_64_after_hwframe+0x44/0xa9

Guenter
David Howells Sept. 11, 2018, 11:17 p.m. UTC | #4
Guenter Roeck <linux@roeck-us.net> wrote:

> [    8.507672] RIP: 0010:reconfigure_super+0x47/0x210

Can you tell me the file and line this corresponds to?

Thanks,
David
Guenter Roeck Sept. 11, 2018, 11:54 p.m. UTC | #5
On Wed, Sep 12, 2018 at 12:17:35AM +0100, David Howells wrote:
> Guenter Roeck <linux@roeck-us.net> wrote:
> 
> > [    8.507672] RIP: 0010:reconfigure_super+0x47/0x210
> 
> Can you tell me the file and line this corresponds to?
> 
I don't know, but some debugging shows that fc->ops == NULL.

Guenter
Sergey Senozhatsky Sept. 18, 2018, 9:07 a.m. UTC | #6
Hi,

On (09/11/18 16:54), Guenter Roeck wrote:
> On Wed, Sep 12, 2018 at 12:17:35AM +0100, David Howells wrote:
> > Guenter Roeck <linux@roeck-us.net> wrote:
> > 
> > > [    8.507672] RIP: 0010:reconfigure_super+0x47/0x210
> > 
> > Can you tell me the file and line this corresponds to?
> > 
> I don't know, but some debugging shows that fc->ops == NULL.

This NULL derefs linux-next.

Emergency (sysrq remount/reboot):

emergency_remount()
 do_emergency_remount()
  do_emergency_remount_callback()
   reconfigure_super()

At fc->ops dereference:

 981         if (fc->ops->reconfigure) {
		^^^^^^^^^
 982                 retval = fc->ops->reconfigure(fc);
 983                 if (retval == 0) {
 984                         security_sb_reconfigure(fc);


So the check either better be

	if (fc->ops && fc->ops->reconfigure)

Or, we need to set ->ops properly. But I'm not sure if invoking
->init_fs_context() from emergency-reboot path is going to work
well all the time.

---

 fs/super.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/fs/super.c b/fs/super.c
index efb0567c8389..e2e03c47c817 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -1017,6 +1017,7 @@ int reconfigure_super(struct fs_context *fc)
 static void do_emergency_remount_callback(struct super_block *sb)
 {
 	struct fs_context fc = {
+		.ops		= &legacy_fs_context_ops,
 		.purpose	= FS_CONTEXT_FOR_EMERGENCY_RO,
 		.fs_type	= sb->s_type,
 		.root		= sb->s_root,

---

	-ss
Sergey Senozhatsky Sept. 18, 2018, 9:40 a.m. UTC | #7
On (09/18/18 18:07), Sergey Senozhatsky wrote:
> emergency_remount()
>  do_emergency_remount()
>   do_emergency_remount_callback()
>    reconfigure_super()
> 
> At fc->ops dereference:
> 
>  981         if (fc->ops->reconfigure) {
> 		^^^^^^^^^
>  982                 retval = fc->ops->reconfigure(fc);
>  983                 if (retval == 0) {
>  984                         security_sb_reconfigure(fc);
> 
> 
> So the check either better be
> 
> 	if (fc->ops && fc->ops->reconfigure)

I guess I was pretty lucky to have leading zeroes in that fc.

David, do you want to add a macro which would make `struct fs_context fc'
misuse less possible? There are 3 users right now who don't use
vfs_new_fs_context(), and none of them appear to properly set all of
`struct fs_context fc' members. This can cause problems in the future,
right?

fs/namespace.c: struct fs_context fc = {
fs/super.c:                     struct fs_context fc = {
fs/super.c:     struct fs_context fc = {

	-ss
Sergey Senozhatsky Sept. 18, 2018, 9:54 a.m. UTC | #8
On (08/01/18 16:25), David Howells wrote:
[..]
> @@ -2460,18 +2428,41 @@ static int do_remount(struct path *path, int ms_flags, int sb_flags,
>  	if (!can_change_locked_flags(mnt, mnt_flags))
>  		return -EPERM;
>  
> -	err = security_sb_remount(sb, data, data_size);
> -	if (err)
> -		return err;
> +	if (type->init_fs_context) {
> +		fc = vfs_sb_reconfig(path, sb_flags);
> +		if (IS_ERR(fc))
> +			return PTR_ERR(fc);
> +
> +		err = parse_monolithic_mount_data(fc, data, data_size);
> +		if (err < 0)
> +			goto err_fc;
> +
> +		if (fc->ops->validate) {
> +			err = fc->ops->validate(fc);
> +			if (err < 0)
> +				goto err_fc;
> +		}
> +
> +		err = security_fs_context_validate(fc);
> +		if (err)
> +			return err;

		goto err_fc?

> +	} else {
> +		err = security_sb_remount(sb, data, data_size);
> +		if (err)
> +			return err;
> +	}

		goto err_fc?

	-ss
Guenter Roeck Sept. 18, 2018, 2:06 p.m. UTC | #9
On 09/18/2018 02:07 AM, Sergey Senozhatsky wrote:
> Hi,
> 
> On (09/11/18 16:54), Guenter Roeck wrote:
>> On Wed, Sep 12, 2018 at 12:17:35AM +0100, David Howells wrote:
>>> Guenter Roeck <linux@roeck-us.net> wrote:
>>>
>>>> [    8.507672] RIP: 0010:reconfigure_super+0x47/0x210
>>>
>>> Can you tell me the file and line this corresponds to?
>>>
>> I don't know, but some debugging shows that fc->ops == NULL.
> 
> This NULL derefs linux-next.
> 
> Emergency (sysrq remount/reboot):
> 
> emergency_remount()
>   do_emergency_remount()
>    do_emergency_remount_callback()
>     reconfigure_super()
> 
> At fc->ops dereference:
> 
>   981         if (fc->ops->reconfigure) {
> 		^^^^^^^^^
>   982                 retval = fc->ops->reconfigure(fc);
>   983                 if (retval == 0) {
>   984                         security_sb_reconfigure(fc);
> 
> 
> So the check either better be
> 
> 	if (fc->ops && fc->ops->reconfigure)
> 

Since there are multiple instances of fs_context where fc->ops isn't set,
this check would be needed wherever fc->ops is dereferenced.

Guenter

> Or, we need to set ->ops properly. But I'm not sure if invoking
> ->init_fs_context() from emergency-reboot path is going to work
> well all the time.
> 
> ---
> 
>   fs/super.c | 1 +
>   1 file changed, 1 insertion(+)
> 
> diff --git a/fs/super.c b/fs/super.c
> index efb0567c8389..e2e03c47c817 100644
> --- a/fs/super.c
> +++ b/fs/super.c
> @@ -1017,6 +1017,7 @@ int reconfigure_super(struct fs_context *fc)
>   static void do_emergency_remount_callback(struct super_block *sb)
>   {
>   	struct fs_context fc = {
> +		.ops		= &legacy_fs_context_ops,
>   		.purpose	= FS_CONTEXT_FOR_EMERGENCY_RO,
>   		.fs_type	= sb->s_type,
>   		.root		= sb->s_root,
> 
> ---
> 
> 	-ss
>
David Howells Sept. 18, 2018, 3:28 p.m. UTC | #10
Sergey Senozhatsky <sergey.senozhatsky.work@gmail.com> wrote:

> > +		err = security_fs_context_validate(fc);
> > +		if (err)
> > +			return err;
> 
> 		goto err_fc?

Fixed thanks.

> > +	} else {
> > +		err = security_sb_remount(sb, data, data_size);
> > +		if (err)
> > +			return err;
> > +	}
> 
> 		goto err_fc?

This no longer exists.  I need to repost my patchset.  I was hoping to fix the
mqueue bug there first, though, but maybe I should just post anyway.

David
David Howells Sept. 18, 2018, 3:34 p.m. UTC | #11
Sergey Senozhatsky <sergey.senozhatsky.work@gmail.com> wrote:

>  static void do_emergency_remount_callback(struct super_block *sb)
>  {
>  	struct fs_context fc = {
> +		.ops		= &legacy_fs_context_ops,
>  		.purpose	= FS_CONTEXT_FOR_EMERGENCY_RO,
>  		.fs_type	= sb->s_type,
>  		.root		= sb->s_root,

Actually, we do need to call ->init_fs_context() or legacy_init_fs_context()
to set the ops pointer.

David
David Howells Sept. 18, 2018, 4:39 p.m. UTC | #12
I think I need to include something like the attached change in this patch.
Changes also need to be made to proc and cgroup.

David
---
commit 1a336480a0f664b3e1988a581bccd4fa10d98764
Author: David Howells <dhowells@redhat.com>
Date:   Tue Sep 18 17:25:23 2018 +0100

    fix remount

diff --git a/fs/fs_context.c b/fs/fs_context.c
index a82679441031..1f36b72e3ee9 100644
--- a/fs/fs_context.c
+++ b/fs/fs_context.c
@@ -42,8 +42,6 @@ struct legacy_fs_context {
 	enum legacy_fs_param	param_type;
 };
 
-static int legacy_init_fs_context(struct fs_context *fc, struct dentry *dentry);
-
 static const struct constant_table common_set_sb_flag[] = {
 	{ "dirsync",	SB_DIRSYNC },
 	{ "lazytime",	SB_LAZYTIME },
@@ -293,7 +291,6 @@ struct fs_context *vfs_new_fs_context(struct file_system_type *fs_type,
 		break;
 	}
 
-
 	/* TODO: Make all filesystems support this unconditionally */
 	init_fs_context = fc->fs_type->init_fs_context;
 	if (!init_fs_context)
@@ -662,12 +659,20 @@ const struct fs_context_operations legacy_fs_context_ops = {
  * Initialise a legacy context for a filesystem that doesn't support
  * fs_context.
  */
-static int legacy_init_fs_context(struct fs_context *fc, struct dentry *dentry)
+int legacy_init_fs_context(struct fs_context *fc, struct dentry *dentry)
 {
+	switch (fc->purpose) {
+	default:
+		fc->fs_private = kzalloc(sizeof(struct legacy_fs_context),
+					 GFP_KERNEL);
+		if (!fc->fs_private)
+			return -ENOMEM;
+		break;
 
-	fc->fs_private = kzalloc(sizeof(struct legacy_fs_context), GFP_KERNEL);
-	if (!fc->fs_private)
-		return -ENOMEM;
+	case FS_CONTEXT_FOR_UMOUNT:
+	case FS_CONTEXT_FOR_EMERGENCY_RO:
+		break;
+	}
 
 	fc->ops = &legacy_fs_context_ops;
 	return 0;
diff --git a/fs/internal.h b/fs/internal.h
index d25cb82af69d..fc2da60abbcd 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -55,6 +55,7 @@ extern void __init chrdev_init(void);
  * fs_context.c
  */
 extern const struct fs_context_operations legacy_fs_context_ops;
+extern int legacy_init_fs_context(struct fs_context *fc, struct dentry *dentry);
 
 /*
  * fsopen.c
diff --git a/fs/namespace.c b/fs/namespace.c
index ddb2c3b88cd6..86b60566fcdf 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1429,9 +1429,19 @@ static int do_umount_root(struct super_block *sb)
 	};
 
 	down_write(&sb->s_umount);
-	if (!sb_rdonly(sb))
-		/* Might want to call ->init_fs_context(). */
-		ret = reconfigure_super(&fc);
+	if (!sb_rdonly(sb)) {
+		int ret;
+
+		if (fc.fs_type->init_fs_context)
+			ret = fc.fs_type->init_fs_context(&fc, NULL);
+		else
+			ret = legacy_init_fs_context(&fc, NULL);
+
+		if (ret == 0) {
+			ret = reconfigure_super(&fc);
+			fc.ops->free(&fc);
+		}
+	}
 	up_write(&sb->s_umount);
 	return ret;
 }
@@ -2396,7 +2406,7 @@ static int do_remount(struct path *path, int ms_flags, int sb_flags,
 
 	err = security_fs_context_validate(fc);
 	if (err)
-		return err;
+		goto err_fc;
 
 	down_write(&sb->s_umount);
 	err = -EPERM;
diff --git a/fs/super.c b/fs/super.c
index e00b03249bfa..df8c4cebd000 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -1026,12 +1026,20 @@ static void do_emergency_remount_callback(struct super_block *sb)
 
 	down_write(&sb->s_umount);
 	if (sb->s_root && sb->s_bdev && (sb->s_flags & SB_BORN) &&
-	    !sb_rdonly(sb))
-		/* Might want to call ->init_fs_context(). */
+	    !sb_rdonly(sb)) {
+		int ret;
+
+		if (fc.fs_type->init_fs_context)
+			ret = fc.fs_type->init_fs_context(&fc, NULL);
+		else
+			ret = legacy_init_fs_context(&fc, NULL);
+
 		/*
 		 * What lock protects sb->s_flags??
 		 */
-		reconfigure_super(&fc);
+		if (ret == 0)
+			reconfigure_super(&fc);
+	}
 	up_write(&sb->s_umount);
 }
David Howells Sept. 18, 2018, 5:43 p.m. UTC | #13
Actually, I can do better and allow ->init_fs_context() to return -EOPNOTSUPP
in the FS_CONTEXT_FOR_UMOUNT and FS_CONTEXT_FOR_EMERGENCY_RO cases if the
filesystem has no interest in them.

David
Sergey Senozhatsky Sept. 19, 2018, 1:12 a.m. UTC | #14
On (09/18/18 07:06), Guenter Roeck wrote:
> > So the check either better be
> > 
> > 	if (fc->ops && fc->ops->reconfigure)
> > 
> 
> Since there are multiple instances of fs_context where fc->ops isn't set,
> this check would be needed wherever fc->ops is dereferenced.

Right. If fc is always guaranteed to be properly zeroed-out. This is
true for kzalloc-ed fc's, but not necessarily so in any other case.

	-ss
Sergey Senozhatsky Sept. 19, 2018, 1:15 a.m. UTC | #15
On (09/18/18 17:39), David Howells wrote:
[..]
> -static int legacy_init_fs_context(struct fs_context *fc, struct dentry *dentry)
> +int legacy_init_fs_context(struct fs_context *fc, struct dentry *dentry)
>  {
> +	switch (fc->purpose) {
> +	default:
> +		fc->fs_private = kzalloc(sizeof(struct legacy_fs_context),
> +					 GFP_KERNEL);
> +		if (!fc->fs_private)
> +			return -ENOMEM;

ops->reconfigure() invoked for FS_CONTEXT_FOR_UMOUNT or
FS_CONTEXT_FOR_EMERGENCY_RO will never access fc->fs_private?

> +		break;
>  
> -	fc->fs_private = kzalloc(sizeof(struct legacy_fs_context), GFP_KERNEL);
> -	if (!fc->fs_private)
> -		return -ENOMEM;
> +	case FS_CONTEXT_FOR_UMOUNT:
> +	case FS_CONTEXT_FOR_EMERGENCY_RO:
> +		break;
> +	}

So `fc' can either be zeroed-out, when it comes from fc = kzalloc(),
or contain some garbage otherwise. Would it make sense to zero-out `fc'
regardless of its origin?

>  	down_write(&sb->s_umount);
> -	if (!sb_rdonly(sb))
> -		/* Might want to call ->init_fs_context(). */
> -		ret = reconfigure_super(&fc);
> +	if (!sb_rdonly(sb)) {
> +		int ret;
> +
> +		if (fc.fs_type->init_fs_context)
> +			ret = fc.fs_type->init_fs_context(&fc, NULL);
> +		else
> +			ret = legacy_init_fs_context(&fc, NULL);
> +
> +		if (ret == 0) {
> +			ret = reconfigure_super(&fc);
> +			fc.ops->free(&fc);
			^^^^^^^
Is ops->free() always !NULL?

	-ss
Sergey Senozhatsky Sept. 19, 2018, 1:26 a.m. UTC | #16
On (09/19/18 10:12), Sergey Senozhatsky wrote:
> On (09/18/18 07:06), Guenter Roeck wrote:
> > > So the check either better be
> > > 
> > > 	if (fc->ops && fc->ops->reconfigure)
> > > 
> > 
> > Since there are multiple instances of fs_context where fc->ops isn't set,
> > this check would be needed wherever fc->ops is dereferenced.
> 
> Right. If fc is always guaranteed to be properly zeroed-out. This is
> true for kzalloc-ed fc's, but not necessarily so in any other case.

What I mean was something like this

	void foo(void)
	{
		struct fs_context fc;

		fc.purpose   = ...;
		fc.fs_type   = ...;
		fc.root      = ...;
		fc.sb_flags  = ...;

		reconfigure_super(&fc);
	}

	-ss
diff mbox series

Patch

diff --git a/fs/Makefile b/fs/Makefile
index 07b894227dce..9a0b8003f069 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -13,7 +13,7 @@  obj-y :=	open.o read_write.o file_table.o super.o \
 		seq_file.o xattr.o libfs.o fs-writeback.o \
 		pnode.o splice.o sync.o utimes.o d_path.o \
 		stack.o fs_struct.o statfs.o fs_pin.o nsfs.o \
-		fs_parser.o
+		fs_context.o fs_parser.o
 
 ifeq ($(CONFIG_BLOCK),y)
 obj-y +=	buffer.o block_dev.o direct-io.o mpage.o
diff --git a/fs/filesystems.c b/fs/filesystems.c
index b03f57b1105b..9135646e41ac 100644
--- a/fs/filesystems.c
+++ b/fs/filesystems.c
@@ -16,6 +16,7 @@ 
 #include <linux/module.h>
 #include <linux/slab.h>
 #include <linux/uaccess.h>
+#include <linux/fs_parser.h>
 
 /*
  * Handling of filesystem drivers list.
@@ -73,6 +74,9 @@  int register_filesystem(struct file_system_type * fs)
 	int res = 0;
 	struct file_system_type ** p;
 
+	if (fs->parameters && !fs_validate_description(fs->parameters))
+		return -EINVAL;
+
 	BUG_ON(strchr(fs->name, '.'));
 	if (fs->next)
 		return -EBUSY;
diff --git a/fs/fs_context.c b/fs/fs_context.c
new file mode 100644
index 000000000000..8f040a20b320
--- /dev/null
+++ b/fs/fs_context.c
@@ -0,0 +1,668 @@ 
+/* Provide a way to create a superblock configuration context within the kernel
+ * that allows a superblock to be set up prior to mounting.
+ *
+ * Copyright (C) 2017 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/fs_context.h>
+#include <linux/fs_parser.h>
+#include <linux/fs.h>
+#include <linux/mount.h>
+#include <linux/nsproxy.h>
+#include <linux/slab.h>
+#include <linux/magic.h>
+#include <linux/security.h>
+#include <linux/mnt_namespace.h>
+#include <linux/pid_namespace.h>
+#include <linux/user_namespace.h>
+#include <linux/bsearch.h>
+#include <net/net_namespace.h>
+#include "mount.h"
+#include "internal.h"
+
+enum legacy_fs_param {
+	LEGACY_FS_UNSET_PARAMS,
+	LEGACY_FS_NO_PARAMS,
+	LEGACY_FS_MONOLITHIC_PARAMS,
+	LEGACY_FS_INDIVIDUAL_PARAMS,
+	LEGACY_FS_MAGIC_PARAMS,
+};
+
+struct legacy_fs_context {
+	char			*legacy_data;	/* Data page for legacy filesystems */
+	char			*secdata;
+	size_t			data_size;
+	enum legacy_fs_param	param_type;
+};
+
+static int legacy_init_fs_context(struct fs_context *fc, struct dentry *dentry);
+
+static const struct constant_table common_set_sb_flag[] = {
+	{ "dirsync",	SB_DIRSYNC },
+	{ "lazytime",	SB_LAZYTIME },
+	{ "mand",	SB_MANDLOCK },
+	{ "posixacl",	SB_POSIXACL },
+	{ "ro",		SB_RDONLY },
+	{ "sync",	SB_SYNCHRONOUS },
+};
+
+static const struct constant_table common_clear_sb_flag[] = {
+	{ "async",	SB_SYNCHRONOUS },
+	{ "nolazytime",	SB_LAZYTIME },
+	{ "nomand",	SB_MANDLOCK },
+	{ "rw",		SB_RDONLY },
+	{ "silent",	SB_SILENT },
+};
+
+static const char *forbidden_sb_flag[] = {
+	"bind",
+	"dev",
+	"exec",
+	"move",
+	"noatime",
+	"nodev",
+	"nodiratime",
+	"noexec",
+	"norelatime",
+	"nostrictatime",
+	"nosuid",
+	"private",
+	"rec",
+	"relatime",
+	"remount",
+	"shared",
+	"slave",
+	"strictatime",
+	"suid",
+	"unbindable",
+};
+
+static int cmp_flag_name(const void *name, const void *entry)
+{
+	const char **e = (const char **)entry;
+	return strcmp(name, *e);
+}
+
+/*
+ * Check for a common mount option that manipulates s_flags.
+ */
+static int vfs_parse_sb_flag(struct fs_context *fc, const char *key)
+{
+	unsigned int token;
+
+	if (bsearch(key, forbidden_sb_flag, ARRAY_SIZE(forbidden_sb_flag),
+		    sizeof(forbidden_sb_flag[0]), cmp_flag_name))
+		return -EINVAL;
+
+	token = lookup_constant(common_set_sb_flag, key, 0);
+	if (token) {
+		fc->sb_flags |= token;
+		return 1;
+	}
+
+	token = lookup_constant(common_clear_sb_flag, key, 0);
+	if (token) {
+		fc->sb_flags &= ~token;
+		return 1;
+	}
+
+	return 0;
+}
+
+/**
+ * vfs_parse_fs_param - Add a single parameter to a superblock config
+ * @fc: The filesystem context to modify
+ * @param: The parameter
+ *
+ * A single mount option in string form is applied to the filesystem context
+ * being set up.  Certain standard options (for example "ro") are translated
+ * into flag bits without going to the filesystem.  The active security module
+ * is allowed to observe and poach options.  Any other options are passed over
+ * to the filesystem to parse.
+ *
+ * This may be called multiple times for a context.
+ *
+ * Returns 0 on success and a negative error code on failure.  In the event of
+ * failure, supplementary error information may have been set.
+ */
+int vfs_parse_fs_param(struct fs_context *fc, struct fs_parameter *param)
+{
+	int ret;
+
+	if (!param->key) {
+		pr_err("Unnamed parameter\n");
+		return -EINVAL;
+	}
+
+	ret = vfs_parse_sb_flag(fc, param->key);
+	if (ret < 0)
+		goto out;
+	if (ret == 1)
+		return 0;
+
+	ret = security_fs_context_parse_param(fc, param);
+	if (ret != 0) {
+		if (ret == 1)
+			/* Param belongs to the LSM; don't pass to the FS */
+			ret = 0;
+		goto out;
+	}
+
+	ret = -EINVAL;
+	if (fc->ops->parse_param)
+		ret = fc->ops->parse_param(fc, param);
+	else if (strcmp(param->key, "source") == 0)
+		ret = 0; /* Ignore the source spec */
+
+out:
+	return ret;
+}
+EXPORT_SYMBOL(vfs_parse_fs_param);
+
+/**
+ * vfs_parse_fs_string - Convenience function to just parse a string.
+ */
+int vfs_parse_fs_string(struct fs_context *fc, const char *key,
+			const char *value, size_t v_size)
+{
+	int ret;
+
+	struct fs_parameter param = {
+		.key	= key,
+		.type	= fs_value_is_string,
+		.size	= v_size,
+	};
+
+	if (v_size > 0) {
+		param.string = kmemdup_nul(value, v_size, GFP_KERNEL);
+		if (!param.string)
+			return -ENOMEM;
+	}
+
+	ret = vfs_parse_fs_param(fc, &param);
+	kfree(param.string);
+	return ret;
+}
+EXPORT_SYMBOL(vfs_parse_fs_string);
+
+/**
+ * generic_parse_monolithic - Parse key[=val][,key[=val]]* mount data
+ * @ctx: The superblock configuration to fill in.
+ * @data: The data to parse
+ * @data_size: The amount of data
+ *
+ * Parse a blob of data that's in key[=val][,key[=val]]* form.  This can be
+ * called from the ->monolithic_mount_data() fs_context operation.
+ *
+ * Returns 0 on success or the error returned by the ->parse_option() fs_context
+ * operation on failure.
+ */
+int generic_parse_monolithic(struct fs_context *fc, void *data, size_t data_size)
+{
+	char *options = data, *key;
+	int ret = 0;
+
+	if (!options)
+		return 0;
+
+	while ((key = strsep(&options, ",")) != NULL) {
+		if (*key) {
+			size_t v_len = 0;
+			char *value = strchr(key, '=');
+
+			if (value) {
+				if (value == key)
+					continue;
+				*value++ = 0;
+				v_len = strlen(value);
+			}
+			ret = vfs_parse_fs_string(fc, key, value, v_len);
+			if (ret < 0)
+				break;
+		}
+	}
+
+	return ret;
+}
+EXPORT_SYMBOL(generic_parse_monolithic);
+
+/**
+ * vfs_new_fs_context - Create a filesystem context.
+ * @fs_type: The filesystem type.
+ * @reference: The dentry from which this one derives (or NULL)
+ * @sb_flags: Filesystem/superblock flags (SB_*)
+ * @purpose: The purpose that this configuration shall be used for.
+ *
+ * Open a filesystem and create a mount context.  The mount context is
+ * initialised with the supplied flags and, if a submount/automount from
+ * another superblock (referred to by @reference) is supplied, may have
+ * parameters such as namespaces copied across from that superblock.
+ */
+struct fs_context *vfs_new_fs_context(struct file_system_type *fs_type,
+				      struct dentry *reference,
+				      unsigned int sb_flags,
+				      enum fs_context_purpose purpose)
+{
+	int (*init_fs_context)(struct fs_context *, struct dentry *);
+	struct fs_context *fc;
+	int ret = -ENOMEM;
+
+	fc = kzalloc(sizeof(struct fs_context), GFP_KERNEL);
+	if (!fc)
+		return ERR_PTR(-ENOMEM);
+
+	fc->purpose	= purpose;
+	fc->sb_flags	= sb_flags;
+	fc->fs_type	= get_filesystem(fs_type);
+	fc->cred	= get_current_cred();
+
+	switch (purpose) {
+	case FS_CONTEXT_FOR_KERNEL_MOUNT:
+		fc->sb_flags |= SB_KERNMOUNT;
+		/* Fallthrough */
+	case FS_CONTEXT_FOR_USER_MOUNT:
+		fc->user_ns = get_user_ns(fc->cred->user_ns);
+		fc->net_ns = get_net(current->nsproxy->net_ns);
+		break;
+	case FS_CONTEXT_FOR_SUBMOUNT:
+		fc->user_ns = get_user_ns(reference->d_sb->s_user_ns);
+		fc->net_ns = get_net(current->nsproxy->net_ns);
+		break;
+	case FS_CONTEXT_FOR_RECONFIGURE:
+		/* We don't pin any namespaces as the superblock's
+		 * subscriptions cannot be changed at this point.
+		 */
+		atomic_inc(&reference->d_sb->s_active);
+		fc->root = dget(reference);
+		break;
+	}
+
+
+	/* TODO: Make all filesystems support this unconditionally */
+	init_fs_context = fc->fs_type->init_fs_context;
+	if (!init_fs_context)
+		init_fs_context = legacy_init_fs_context;
+
+	ret = (*init_fs_context)(fc, reference);
+	if (ret < 0)
+		goto err_fc;
+	fc->need_free = true;
+
+	/* Do the security check last because ->init_fs_context may change the
+	 * namespace subscriptions.
+	 */
+	ret = security_fs_context_alloc(fc, reference);
+	if (ret < 0)
+		goto err_fc;
+
+	return fc;
+
+err_fc:
+	put_fs_context(fc);
+	return ERR_PTR(ret);
+}
+EXPORT_SYMBOL(vfs_new_fs_context);
+
+/**
+ * vfs_sb_reconfig - Create a filesystem context for remount/reconfiguration
+ * @mountpoint: The mountpoint to open
+ * @sb_flags: Filesystem/superblock flags (SB_*)
+ *
+ * Open a mounted filesystem and create a filesystem context such that a
+ * remount can be effected.
+ */
+struct fs_context *vfs_sb_reconfig(struct path *mountpoint,
+				   unsigned int sb_flags)
+{
+	struct fs_context *fc;
+
+	fc = vfs_new_fs_context(mountpoint->dentry->d_sb->s_type,
+				mountpoint->dentry,
+				sb_flags, FS_CONTEXT_FOR_RECONFIGURE);
+	if (IS_ERR(fc))
+		return fc;
+
+	return fc;
+}
+
+/**
+ * vfs_dup_fc_config: Duplicate a filesytem context.
+ * @src_fc: The context to copy.
+ */
+struct fs_context *vfs_dup_fs_context(struct fs_context *src_fc)
+{
+	struct fs_context *fc;
+	int ret;
+
+	if (!src_fc->ops->dup)
+		return ERR_PTR(-EOPNOTSUPP);
+
+	fc = kmemdup(src_fc, sizeof(struct legacy_fs_context), GFP_KERNEL);
+	if (!fc)
+		return ERR_PTR(-ENOMEM);
+
+	fc->fs_private	= NULL;
+	fc->s_fs_info	= NULL;
+	fc->source	= NULL;
+	fc->security	= NULL;
+	get_filesystem(fc->fs_type);
+	get_net(fc->net_ns);
+	get_user_ns(fc->user_ns);
+	get_cred(fc->cred);
+
+	/* Can't call put until we've called ->dup */
+	ret = fc->ops->dup(fc, src_fc);
+	if (ret < 0)
+		goto err_fc;
+
+	ret = security_fs_context_dup(fc, src_fc);
+	if (ret < 0)
+		goto err_fc;
+	return fc;
+
+err_fc:
+	put_fs_context(fc);
+	return ERR_PTR(ret);
+}
+EXPORT_SYMBOL(vfs_dup_fs_context);
+
+/**
+ * put_fs_context - Dispose of a superblock configuration context.
+ * @fc: The context to dispose of.
+ */
+void put_fs_context(struct fs_context *fc)
+{
+	struct super_block *sb;
+
+	if (fc->root) {
+		sb = fc->root->d_sb;
+		dput(fc->root);
+		fc->root = NULL;
+		deactivate_super(sb);
+	}
+
+	if (fc->need_free && fc->ops && fc->ops->free)
+		fc->ops->free(fc);
+
+	security_fs_context_free(fc);
+	if (fc->net_ns)
+		put_net(fc->net_ns);
+	put_user_ns(fc->user_ns);
+	if (fc->cred)
+		put_cred(fc->cred);
+	kfree(fc->subtype);
+	put_filesystem(fc->fs_type);
+	kfree(fc->source);
+	kfree(fc);
+}
+EXPORT_SYMBOL(put_fs_context);
+
+/*
+ * Free the config for a filesystem that doesn't support fs_context.
+ */
+static void legacy_fs_context_free(struct fs_context *fc)
+{
+	struct legacy_fs_context *ctx = fc->fs_private;
+
+	if (ctx) {
+		free_secdata(ctx->secdata);
+		switch (ctx->param_type) {
+		case LEGACY_FS_UNSET_PARAMS:
+		case LEGACY_FS_NO_PARAMS:
+			break;
+		case LEGACY_FS_MAGIC_PARAMS:
+			break; /* ctx->data is a weird pointer */
+		default:
+			kfree(ctx->legacy_data);
+			break;
+		}
+
+		kfree(ctx);
+	}
+}
+
+/*
+ * Duplicate a legacy config.
+ */
+static int legacy_fs_context_dup(struct fs_context *fc, struct fs_context *src_fc)
+{
+	struct legacy_fs_context *ctx;
+	struct legacy_fs_context *src_ctx = src_fc->fs_private;
+
+	ctx = kmemdup(src_ctx, sizeof(*src_ctx), GFP_KERNEL);
+	if (!ctx)
+		return -ENOMEM;
+
+	switch (ctx->param_type) {
+	case LEGACY_FS_MONOLITHIC_PARAMS:
+	case LEGACY_FS_INDIVIDUAL_PARAMS:
+		ctx->legacy_data = kmemdup(src_ctx->legacy_data,
+					   src_ctx->data_size, GFP_KERNEL);
+		if (!ctx->legacy_data) {
+			kfree(ctx);
+			return -ENOMEM;
+		}
+		/* Fall through */
+	default:
+		break;
+	}
+
+	fc->fs_private = ctx;
+	return 0;
+}
+
+/*
+ * Add a parameter to a legacy config.  We build up a comma-separated list of
+ * options.
+ */
+static int legacy_parse_param(struct fs_context *fc, struct fs_parameter *param)
+{
+	struct legacy_fs_context *ctx = fc->fs_private;
+	unsigned int size = ctx->data_size;
+	size_t len = 0;
+
+	if (strcmp(param->key, "source") == 0) {
+		if (param->type != fs_value_is_string)
+			return invalf(fc, "VFS: Legacy: Non-string source");
+		if (fc->source)
+			return invalf(fc, "VFS: Legacy: Multiple sources");
+		fc->source = param->string;
+		param->string = NULL;
+		return 0;
+	}
+
+	if (ctx->param_type != LEGACY_FS_UNSET_PARAMS &&
+	    ctx->param_type != LEGACY_FS_INDIVIDUAL_PARAMS)
+		return invalf(fc, "VFS: Legacy: Can't mix monolithic and individual options");
+
+	switch (param->type) {
+	case fs_value_is_string:
+		len = 1 + param->size;
+		/* Fall through */
+	case fs_value_is_flag:
+		len += strlen(param->key);
+		break;
+	default:
+		return invalf(fc, "VFS: Legacy: Parameter type for '%s' not supported",
+			      param->key);
+	}
+
+	if (len > PAGE_SIZE - 2 - size)
+		return invalf(fc, "VFS: Legacy: Cumulative options too large");
+	if (strchr(param->key, ',') ||
+	    (param->type == fs_value_is_string &&
+	     memchr(param->string, ',', param->size)))
+		return invalf(fc, "VFS: Legacy: Option '%s' contained comma",
+			      param->key);
+	if (!ctx->legacy_data) {
+		ctx->legacy_data = kmalloc(PAGE_SIZE, GFP_KERNEL);
+		if (!ctx->legacy_data)
+			return -ENOMEM;
+	}
+
+	ctx->legacy_data[size++] = ',';
+	len = strlen(param->key);
+	memcpy(ctx->legacy_data + size, param->key, len);
+	size += len;
+	if (param->type == fs_value_is_string) {
+		ctx->legacy_data[size++] = '=';
+		memcpy(ctx->legacy_data + size, param->string, param->size);
+		size += len;
+	}
+	ctx->legacy_data[size] = '\0';
+	ctx->data_size = size;
+	ctx->param_type = LEGACY_FS_INDIVIDUAL_PARAMS;
+	return 0;
+}
+
+/*
+ * Add monolithic mount data.
+ */
+static int legacy_parse_monolithic(struct fs_context *fc, void *data, size_t data_size)
+{
+	struct legacy_fs_context *ctx = fc->fs_private;
+
+	if (ctx->param_type != LEGACY_FS_UNSET_PARAMS) {
+		pr_warn("VFS: Can't mix monolithic and individual options\n");
+		return -EINVAL;
+	}
+
+	if (!data) {
+		ctx->param_type = LEGACY_FS_NO_PARAMS;
+		return 0;
+	}
+
+	ctx->data_size = data_size;
+	if (data_size > 0) {
+		ctx->legacy_data = kmemdup(data, data_size, GFP_KERNEL);
+		if (!ctx->legacy_data)
+			return -ENOMEM;
+		ctx->param_type = LEGACY_FS_MONOLITHIC_PARAMS;
+	} else {
+		/* Some filesystems pass weird pointers through that we don't
+		 * want to copy.  They can indicate this by setting data_size
+		 * to 0.
+		 */
+		ctx->legacy_data = data;
+		ctx->param_type = LEGACY_FS_MAGIC_PARAMS;
+	}
+
+	return 0;
+}
+
+/*
+ * Use the legacy mount validation step to strip out and process security
+ * config options.
+ */
+static int legacy_validate(struct fs_context *fc)
+{
+	struct legacy_fs_context *ctx = fc->fs_private;
+
+	switch (ctx->param_type) {
+	case LEGACY_FS_UNSET_PARAMS:
+		ctx->param_type = LEGACY_FS_NO_PARAMS;
+		/* Fall through */
+	case LEGACY_FS_NO_PARAMS:
+	case LEGACY_FS_MAGIC_PARAMS:
+		return 0;
+	default:
+		break;
+	}
+
+	if (fc->fs_type->fs_flags & FS_BINARY_MOUNTDATA)
+		return 0;
+
+	ctx->secdata = alloc_secdata();
+	if (!ctx->secdata)
+		return -ENOMEM;
+
+	return security_sb_copy_data(ctx->legacy_data, ctx->data_size,
+				     ctx->secdata);
+}
+
+/*
+ * Determine the superblock subtype.
+ */
+static int legacy_set_subtype(struct fs_context *fc)
+{
+	const char *subtype = strchr(fc->fs_type->name, '.');
+
+	if (subtype) {
+		subtype++;
+		if (!subtype[0])
+			return -EINVAL;
+	} else {
+		subtype = "";
+	}
+
+	fc->subtype = kstrdup(subtype, GFP_KERNEL);
+	if (!fc->subtype)
+		return -ENOMEM;
+	return 0;
+}
+
+/*
+ * Get a mountable root with the legacy mount command.
+ */
+static int legacy_get_tree(struct fs_context *fc)
+{
+	struct legacy_fs_context *ctx = fc->fs_private;
+	struct super_block *sb;
+	struct dentry *root;
+	int ret;
+
+	root = fc->fs_type->mount(fc->fs_type, fc->sb_flags,
+				      fc->source, ctx->legacy_data,
+				      ctx->data_size);
+	if (IS_ERR(root))
+		return PTR_ERR(root);
+
+	sb = root->d_sb;
+	BUG_ON(!sb);
+
+	if ((fc->fs_type->fs_flags & FS_HAS_SUBTYPE) &&
+	    !fc->subtype) {
+		ret = legacy_set_subtype(fc);
+		if (ret < 0)
+			goto err_sb;
+	}
+
+	fc->root = root;
+	return 0;
+
+err_sb:
+	dput(root);
+	deactivate_locked_super(sb);
+	return ret;
+}
+
+const struct fs_context_operations legacy_fs_context_ops = {
+	.free			= legacy_fs_context_free,
+	.dup			= legacy_fs_context_dup,
+	.parse_param		= legacy_parse_param,
+	.parse_monolithic	= legacy_parse_monolithic,
+	.validate		= legacy_validate,
+	.get_tree		= legacy_get_tree,
+};
+
+/*
+ * Initialise a legacy context for a filesystem that doesn't support
+ * fs_context.
+ */
+static int legacy_init_fs_context(struct fs_context *fc, struct dentry *dentry)
+{
+
+	fc->fs_private = kzalloc(sizeof(struct legacy_fs_context), GFP_KERNEL);
+	if (!fc->fs_private)
+		return -ENOMEM;
+
+	fc->ops = &legacy_fs_context_ops;
+	return 0;
+}
diff --git a/fs/internal.h b/fs/internal.h
index f11b834ff1e6..546302e98a04 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -49,6 +49,11 @@  extern int __block_write_begin_int(struct page *page, loff_t pos, unsigned len,
  */
 extern void __init chrdev_init(void);
 
+/*
+ * fs_context.c
+ */
+extern const struct fs_context_operations legacy_fs_context_ops;
+
 /*
  * namei.c
  */
@@ -101,7 +106,8 @@  extern struct file *get_empty_filp(void);
 /*
  * super.c
  */
-extern int do_remount_sb(struct super_block *, int, void *, size_t, int);
+extern int do_remount_sb(struct super_block *, int, void *, size_t, int,
+			 struct fs_context *);
 extern bool trylock_super(struct super_block *sb);
 extern struct dentry *mount_fs(struct file_system_type *,
 			       int, const char *, void *, size_t);
diff --git a/fs/libfs.c b/fs/libfs.c
index 9f1f4884b7cc..d9a5d883dc3f 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -9,6 +9,7 @@ 
 #include <linux/slab.h>
 #include <linux/cred.h>
 #include <linux/mount.h>
+#include <linux/fs_context.h>
 #include <linux/vfs.h>
 #include <linux/quotaops.h>
 #include <linux/mutex.h>
@@ -574,13 +575,29 @@  static DEFINE_SPINLOCK(pin_fs_lock);
 
 int simple_pin_fs(struct file_system_type *type, struct vfsmount **mount, int *count)
 {
+	struct fs_context *fc;
 	struct vfsmount *mnt = NULL;
+	int ret;
+
 	spin_lock(&pin_fs_lock);
 	if (unlikely(!*mount)) {
 		spin_unlock(&pin_fs_lock);
-		mnt = vfs_kern_mount(type, SB_KERNMOUNT, type->name, NULL, 0);
+
+		fc = vfs_new_fs_context(type, NULL, 0, FS_CONTEXT_FOR_KERNEL_MOUNT);
+		if (IS_ERR(fc))
+			return PTR_ERR(fc);
+
+		ret = vfs_get_tree(fc);
+		if (ret < 0) {
+			put_fs_context(fc);
+			return ret;
+		}
+
+		mnt = vfs_create_mount(fc, 0);
+		put_fs_context(fc);
 		if (IS_ERR(mnt))
 			return PTR_ERR(mnt);
+
 		spin_lock(&pin_fs_lock);
 		if (!*mount)
 			*mount = mnt;
diff --git a/fs/namespace.c b/fs/namespace.c
index 859dc473e2ad..51a6799c3f61 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -26,8 +26,10 @@ 
 #include <linux/magic.h>
 #include <linux/bootmem.h>
 #include <linux/task_work.h>
+#include <linux/file.h>
 #include <linux/sched/task.h>
 #include <uapi/linux/mount.h>
+#include <linux/fs_context.h>
 
 #include "pnode.h"
 #include "internal.h"
@@ -1017,56 +1019,6 @@  static struct mount *skip_mnt_tree(struct mount *p)
 	return p;
 }
 
-struct vfsmount *
-vfs_kern_mount(struct file_system_type *type, int flags, const char *name,
-	       void *data, size_t data_size)
-{
-	struct mount *mnt;
-	struct dentry *root;
-
-	if (!type)
-		return ERR_PTR(-ENODEV);
-
-	mnt = alloc_vfsmnt(name);
-	if (!mnt)
-		return ERR_PTR(-ENOMEM);
-
-	if (flags & SB_KERNMOUNT)
-		mnt->mnt.mnt_flags = MNT_INTERNAL;
-
-	root = mount_fs(type, flags, name, data, data_size);
-	if (IS_ERR(root)) {
-		mnt_free_id(mnt);
-		free_vfsmnt(mnt);
-		return ERR_CAST(root);
-	}
-
-	mnt->mnt.mnt_root = root;
-	mnt->mnt.mnt_sb = root->d_sb;
-	mnt->mnt_mountpoint = mnt->mnt.mnt_root;
-	mnt->mnt_parent = mnt;
-	lock_mount_hash();
-	list_add_tail(&mnt->mnt_instance, &root->d_sb->s_mounts);
-	unlock_mount_hash();
-	return &mnt->mnt;
-}
-EXPORT_SYMBOL_GPL(vfs_kern_mount);
-
-struct vfsmount *
-vfs_submount(const struct dentry *mountpoint, struct file_system_type *type,
-	     const char *name, void *data, size_t data_size)
-{
-	/* Until it is worked out how to pass the user namespace
-	 * through from the parent mount to the submount don't support
-	 * unprivileged mounts with submounts.
-	 */
-	if (mountpoint->d_sb->s_user_ns != &init_user_ns)
-		return ERR_PTR(-EPERM);
-
-	return vfs_kern_mount(type, SB_SUBMOUNT, name, data, data_size);
-}
-EXPORT_SYMBOL_GPL(vfs_submount);
-
 static struct mount *clone_mnt(struct mount *old, struct dentry *root,
 					int flag)
 {
@@ -1594,7 +1546,7 @@  static int do_umount(struct mount *mnt, int flags)
 			return -EPERM;
 		down_write(&sb->s_umount);
 		if (!sb_rdonly(sb))
-			retval = do_remount_sb(sb, SB_RDONLY, NULL, 0, 0);
+			retval = do_remount_sb(sb, SB_RDONLY, NULL, 0, 0, NULL);
 		up_write(&sb->s_umount);
 		return retval;
 	}
@@ -2439,6 +2391,20 @@  static int do_reconfigure_mnt(struct path *path, unsigned int mnt_flags)
 	return ret;
 }
 
+/*
+ * Parse the monolithic page of mount data given to sys_mount().
+ */
+static int parse_monolithic_mount_data(struct fs_context *fc, void *data, size_t data_size)
+{
+	int (*monolithic_mount_data)(struct fs_context *, void *, size_t);
+
+	monolithic_mount_data = fc->ops->parse_monolithic;
+	if (!monolithic_mount_data)
+		monolithic_mount_data = generic_parse_monolithic;
+
+	return monolithic_mount_data(fc, data, data_size);
+}
+
 /*
  * change filesystem flags. dir should be a physical root of filesystem.
  * If you've mounted a non-root directory somewhere and want to do remount
@@ -2447,9 +2413,11 @@  static int do_reconfigure_mnt(struct path *path, unsigned int mnt_flags)
 static int do_remount(struct path *path, int ms_flags, int sb_flags,
 		      int mnt_flags, void *data, size_t data_size)
 {
+	struct fs_context *fc = NULL;
 	int err;
 	struct super_block *sb = path->mnt->mnt_sb;
 	struct mount *mnt = real_mount(path->mnt);
+	struct file_system_type *type = sb->s_type;
 
 	if (!check_mnt(mnt))
 		return -EINVAL;
@@ -2460,18 +2428,41 @@  static int do_remount(struct path *path, int ms_flags, int sb_flags,
 	if (!can_change_locked_flags(mnt, mnt_flags))
 		return -EPERM;
 
-	err = security_sb_remount(sb, data, data_size);
-	if (err)
-		return err;
+	if (type->init_fs_context) {
+		fc = vfs_sb_reconfig(path, sb_flags);
+		if (IS_ERR(fc))
+			return PTR_ERR(fc);
+
+		err = parse_monolithic_mount_data(fc, data, data_size);
+		if (err < 0)
+			goto err_fc;
+
+		if (fc->ops->validate) {
+			err = fc->ops->validate(fc);
+			if (err < 0)
+				goto err_fc;
+		}
+
+		err = security_fs_context_validate(fc);
+		if (err)
+			return err;
+	} else {
+		err = security_sb_remount(sb, data, data_size);
+		if (err)
+			return err;
+	}
 
 	down_write(&sb->s_umount);
 	err = -EPERM;
 	if (ns_capable(sb->s_user_ns, CAP_SYS_ADMIN)) {
-		err = do_remount_sb(sb, sb_flags, data, data_size, 0);
+		err = do_remount_sb(sb, sb_flags, data, data_size, 0, fc);
 		if (!err)
 			set_mount_attributes(mnt, mnt_flags);
 	}
 	up_write(&sb->s_umount);
+err_fc:
+	if (fc)
+		put_fs_context(fc);
 	return err;
 }
 
@@ -2576,29 +2567,6 @@  static int do_move_mount_old(struct path *path, const char *old_name)
 	return err;
 }
 
-static struct vfsmount *fs_set_subtype(struct vfsmount *mnt, const char *fstype)
-{
-	int err;
-	const char *subtype = strchr(fstype, '.');
-	if (subtype) {
-		subtype++;
-		err = -EINVAL;
-		if (!subtype[0])
-			goto err;
-	} else
-		subtype = "";
-
-	mnt->mnt_sb->s_subtype = kstrdup(subtype, GFP_KERNEL);
-	err = -ENOMEM;
-	if (!mnt->mnt_sb->s_subtype)
-		goto err;
-	return mnt;
-
- err:
-	mntput(mnt);
-	return ERR_PTR(err);
-}
-
 /*
  * add a mount into a namespace's mount tree
  */
@@ -2643,44 +2611,88 @@  static int do_add_mount(struct mount *newmnt, struct path *path, int mnt_flags)
 	return err;
 }
 
-static bool mount_too_revealing(struct vfsmount *mnt, int *new_mnt_flags);
+static bool mount_too_revealing(const struct super_block *sb, int *new_mnt_flags);
+
+/*
+ * Create a new mount using a superblock configuration and request it
+ * be added to the namespace tree.
+ */
+static int do_new_mount_fc(struct fs_context *fc, struct path *mountpoint,
+			   unsigned int mnt_flags)
+{
+	struct vfsmount *mnt;
+	int ret;
+
+	ret = security_sb_mountpoint(fc, mountpoint,
+				     mnt_flags & ~MNT_INTERNAL_FLAGS);
+	if (ret < 0)
+		return ret;
+
+	if (mount_too_revealing(fc->root->d_sb, &mnt_flags)) {
+		pr_warn("VFS: Mount too revealing\n");
+		return -EPERM;
+	}
+
+	mnt = vfs_create_mount(fc, mnt_flags);
+	if (IS_ERR(mnt))
+		return PTR_ERR(mnt);
+
+	ret = do_add_mount(real_mount(mnt), mountpoint, mnt_flags);
+	if (ret < 0)
+		goto err_mnt;
+	return ret;
+
+err_mnt:
+	mntput(mnt);
+	return ret;
+}
 
 /*
  * create a new mount for userspace and request it to be added into the
  * namespace's tree
  */
-static int do_new_mount(struct path *path, const char *fstype, int sb_flags,
-			int mnt_flags, const char *name,
+static int do_new_mount(struct path *mountpoint, const char *fstype,
+			int sb_flags, int mnt_flags, const char *name,
 			void *data, size_t data_size)
 {
-	struct file_system_type *type;
-	struct vfsmount *mnt;
+	struct file_system_type *fs_type;
+	struct fs_context *fc;
 	int err;
 
 	if (!fstype)
 		return -EINVAL;
 
-	type = get_fs_type(fstype);
-	if (!type)
-		return -ENODEV;
-
-	mnt = vfs_kern_mount(type, sb_flags, name, data, data_size);
-	if (!IS_ERR(mnt) && (type->fs_flags & FS_HAS_SUBTYPE) &&
-	    !mnt->mnt_sb->s_subtype)
-		mnt = fs_set_subtype(mnt, fstype);
+	err = -ENODEV;
+	fs_type = get_fs_type(fstype);
+	if (!fs_type)
+		goto out;
 
-	put_filesystem(type);
-	if (IS_ERR(mnt))
-		return PTR_ERR(mnt);
+	fc = vfs_new_fs_context(fs_type, NULL, sb_flags,
+				FS_CONTEXT_FOR_USER_MOUNT);
+	put_filesystem(fs_type);
+	if (IS_ERR(fc)) {
+		err = PTR_ERR(fc);
+		goto out;
+	}
 
-	if (mount_too_revealing(mnt, &mnt_flags)) {
-		mntput(mnt);
-		return -EPERM;
+	if (name) {
+		err = vfs_parse_fs_string(fc, "source", name, strlen(name));
+		if (err < 0)
+			goto out_fc;
 	}
 
-	err = do_add_mount(real_mount(mnt), path, mnt_flags);
-	if (err)
-		mntput(mnt);
+	err = parse_monolithic_mount_data(fc, data, data_size);
+	if (err < 0)
+		goto out_fc;
+
+	err = vfs_get_tree(fc);
+	if (err < 0)
+		goto out_fc;
+
+	err = do_new_mount_fc(fc, mountpoint, mnt_flags);
+out_fc:
+	put_fs_context(fc);
+out:
 	return err;
 }
 
@@ -3230,6 +3242,118 @@  SYSCALL_DEFINE5(mount, char __user *, dev_name, char __user *, dir_name,
 	return ksys_mount(dev_name, dir_name, type, flags, data);
 }
 
+/**
+ * vfs_create_mount - Create a mount for a configured superblock
+ * @fc: The configuration context with the superblock attached
+ * @mnt_flags: The mount flags to apply
+ *
+ * Create a mount to an already configured superblock.  If necessary, the
+ * caller should invoke vfs_get_tree() before calling this.
+ *
+ * Note that this does not attach the mount to anything.
+ */
+struct vfsmount *vfs_create_mount(struct fs_context *fc, unsigned int mnt_flags)
+{
+	struct mount *mnt;
+
+	if (!fc->root)
+		return ERR_PTR(-EINVAL);
+
+	mnt = alloc_vfsmnt(fc->source ?: "none");
+	if (!mnt)
+		return ERR_PTR(-ENOMEM);
+
+	if (fc->purpose == FS_CONTEXT_FOR_KERNEL_MOUNT)
+		/* It's a longterm mount, don't release mnt until we unmount
+		 * before file sys is unregistered
+		 */
+		mnt_flags |= MNT_INTERNAL;
+
+	atomic_inc(&fc->root->d_sb->s_active);
+	mnt->mnt.mnt_flags	= mnt_flags;
+	mnt->mnt.mnt_sb		= fc->root->d_sb;
+	mnt->mnt.mnt_root	= dget(fc->root);
+	mnt->mnt_mountpoint	= mnt->mnt.mnt_root;
+	mnt->mnt_parent		= mnt;
+
+	lock_mount_hash();
+	list_add_tail(&mnt->mnt_instance, &mnt->mnt.mnt_sb->s_mounts);
+	unlock_mount_hash();
+	return &mnt->mnt;
+}
+EXPORT_SYMBOL(vfs_create_mount);
+
+struct vfsmount *vfs_kern_mount(struct file_system_type *type,
+				int sb_flags, const char *devname,
+				void *data, size_t data_size)
+{
+	struct fs_context *fc;
+	struct vfsmount *mnt;
+	int ret;
+
+	if (!type)
+		return ERR_PTR(-EINVAL);
+
+	fc = vfs_new_fs_context(type, NULL, sb_flags,
+				sb_flags & SB_KERNMOUNT ?
+				FS_CONTEXT_FOR_KERNEL_MOUNT :
+				FS_CONTEXT_FOR_USER_MOUNT);
+	if (IS_ERR(fc))
+		return ERR_CAST(fc);
+
+	if (devname) {
+		ret = vfs_parse_fs_string(fc, "source",
+					  devname, strlen(devname));
+		if (ret < 0)
+			goto err_fc;
+	}
+
+	ret = parse_monolithic_mount_data(fc, data, data_size);
+	if (ret < 0)
+		goto err_fc;
+
+	ret = vfs_get_tree(fc);
+	if (ret < 0)
+		goto err_fc;
+
+	mnt = vfs_create_mount(fc, 0);
+out:
+	put_fs_context(fc);
+	return mnt;
+err_fc:
+	mnt = ERR_PTR(ret);
+	goto out;
+}
+EXPORT_SYMBOL_GPL(vfs_kern_mount);
+
+struct vfsmount *
+vfs_submount(const struct dentry *mountpoint, struct file_system_type *type,
+	     const char *name, void *data, size_t data_size)
+{
+	/* Until it is worked out how to pass the user namespace
+	 * through from the parent mount to the submount don't support
+	 * unprivileged mounts with submounts.
+	 */
+	if (mountpoint->d_sb->s_user_ns != &init_user_ns)
+		return ERR_PTR(-EPERM);
+
+	return vfs_kern_mount(type, MS_SUBMOUNT, name, data, data_size);
+}
+EXPORT_SYMBOL_GPL(vfs_submount);
+
+struct vfsmount *kern_mount(struct file_system_type *type)
+{
+	return vfs_kern_mount(type, SB_KERNMOUNT, type->name, NULL, 0);
+}
+EXPORT_SYMBOL_GPL(kern_mount);
+
+struct vfsmount *kern_mount_data(struct file_system_type *type,
+				 void *data, size_t data_size)
+{
+	return vfs_kern_mount(type, SB_KERNMOUNT, type->name, data, data_size);
+}
+EXPORT_SYMBOL_GPL(kern_mount_data);
+
 /*
  * Move a mount from one place to another.
  * In combination with open_tree(OPEN_TREE_CLONE [| AT_RECURSIVE]) it can be
@@ -3507,22 +3631,6 @@  void put_mnt_ns(struct mnt_namespace *ns)
 	free_mnt_ns(ns);
 }
 
-struct vfsmount *kern_mount_data(struct file_system_type *type,
-				 void *data, size_t data_size)
-{
-	struct vfsmount *mnt;
-	mnt = vfs_kern_mount(type, SB_KERNMOUNT, type->name, data, data_size);
-	if (!IS_ERR(mnt)) {
-		/*
-		 * it is a longterm mount, don't release mnt until
-		 * we unmount before file sys is unregistered
-		*/
-		real_mount(mnt)->mnt_ns = MNT_NS_INTERNAL;
-	}
-	return mnt;
-}
-EXPORT_SYMBOL_GPL(kern_mount_data);
-
 void kern_unmount(struct vfsmount *mnt)
 {
 	/* release long term mount so mount point can be released */
@@ -3563,7 +3671,8 @@  bool current_chrooted(void)
 	return chrooted;
 }
 
-static bool mnt_already_visible(struct mnt_namespace *ns, struct vfsmount *new,
+static bool mnt_already_visible(struct mnt_namespace *ns,
+				const struct super_block *sb,
 				int *new_mnt_flags)
 {
 	int new_flags = *new_mnt_flags;
@@ -3575,7 +3684,7 @@  static bool mnt_already_visible(struct mnt_namespace *ns, struct vfsmount *new,
 		struct mount *child;
 		int mnt_flags;
 
-		if (mnt->mnt.mnt_sb->s_type != new->mnt_sb->s_type)
+		if (mnt->mnt.mnt_sb->s_type != sb->s_type)
 			continue;
 
 		/* This mount is not fully visible if it's root directory
@@ -3626,7 +3735,7 @@  static bool mnt_already_visible(struct mnt_namespace *ns, struct vfsmount *new,
 	return visible;
 }
 
-static bool mount_too_revealing(struct vfsmount *mnt, int *new_mnt_flags)
+static bool mount_too_revealing(const struct super_block *sb, int *new_mnt_flags)
 {
 	const unsigned long required_iflags = SB_I_NOEXEC | SB_I_NODEV;
 	struct mnt_namespace *ns = current->nsproxy->mnt_ns;
@@ -3636,7 +3745,7 @@  static bool mount_too_revealing(struct vfsmount *mnt, int *new_mnt_flags)
 		return false;
 
 	/* Can this filesystem be too revealing? */
-	s_iflags = mnt->mnt_sb->s_iflags;
+	s_iflags = sb->s_iflags;
 	if (!(s_iflags & SB_I_USERNS_VISIBLE))
 		return false;
 
@@ -3646,7 +3755,7 @@  static bool mount_too_revealing(struct vfsmount *mnt, int *new_mnt_flags)
 		return true;
 	}
 
-	return !mnt_already_visible(ns, mnt, new_mnt_flags);
+	return !mnt_already_visible(ns, sb, new_mnt_flags);
 }
 
 bool mnt_may_suid(struct vfsmount *mnt)
diff --git a/fs/super.c b/fs/super.c
index c9d208b7999e..7c5541453081 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -36,6 +36,7 @@ 
 #include <linux/lockdep.h>
 #include <linux/user_namespace.h>
 #include <uapi/linux/mount.h>
+#include <linux/fs_context.h>
 #include "internal.h"
 
 static int thaw_super_locked(struct super_block *sb);
@@ -184,16 +185,13 @@  static void destroy_unused_super(struct super_block *s)
 }
 
 /**
- *	alloc_super	-	create new superblock
- *	@type:	filesystem type superblock should belong to
- *	@flags: the mount flags
- *	@user_ns: User namespace for the super_block
+ *	alloc_super - Create new superblock
+ *	@fc: The filesystem configuration context
  *
  *	Allocates and initializes a new &struct super_block.  alloc_super()
  *	returns a pointer new superblock or %NULL if allocation had failed.
  */
-static struct super_block *alloc_super(struct file_system_type *type, int flags,
-				       struct user_namespace *user_ns)
+static struct super_block *alloc_super(struct fs_context *fc)
 {
 	struct super_block *s = kzalloc(sizeof(struct super_block),  GFP_USER);
 	static const struct super_operations default_op;
@@ -203,9 +201,9 @@  static struct super_block *alloc_super(struct file_system_type *type, int flags,
 		return NULL;
 
 	INIT_LIST_HEAD(&s->s_mounts);
-	s->s_user_ns = get_user_ns(user_ns);
+	s->s_user_ns = get_user_ns(fc->user_ns);
 	init_rwsem(&s->s_umount);
-	lockdep_set_class(&s->s_umount, &type->s_umount_key);
+	lockdep_set_class(&s->s_umount, &fc->fs_type->s_umount_key);
 	/*
 	 * sget() can have s_umount recursion.
 	 *
@@ -229,12 +227,12 @@  static struct super_block *alloc_super(struct file_system_type *type, int flags,
 	for (i = 0; i < SB_FREEZE_LEVELS; i++) {
 		if (__percpu_init_rwsem(&s->s_writers.rw_sem[i],
 					sb_writers_name[i],
-					&type->s_writers_key[i]))
+					&fc->fs_type->s_writers_key[i]))
 			goto fail;
 	}
 	init_waitqueue_head(&s->s_writers.wait_unfrozen);
 	s->s_bdi = &noop_backing_dev_info;
-	s->s_flags = flags;
+	s->s_flags = fc->sb_flags;
 	if (s->s_user_ns != &init_user_ns)
 		s->s_iflags |= SB_I_NODEV;
 	INIT_HLIST_NODE(&s->s_instances);
@@ -252,7 +250,7 @@  static struct super_block *alloc_super(struct file_system_type *type, int flags,
 	s->s_count = 1;
 	atomic_set(&s->s_active, 1);
 	mutex_init(&s->s_vfs_rename_mutex);
-	lockdep_set_class(&s->s_vfs_rename_mutex, &type->s_vfs_rename_key);
+	lockdep_set_class(&s->s_vfs_rename_mutex, &fc->fs_type->s_vfs_rename_key);
 	init_rwsem(&s->s_dquot.dqio_sem);
 	s->s_maxbytes = MAX_NON_LFS;
 	s->s_op = &default_op;
@@ -472,6 +470,89 @@  void generic_shutdown_super(struct super_block *sb)
 
 EXPORT_SYMBOL(generic_shutdown_super);
 
+/**
+ * sget_fc - Find or create a superblock
+ * @fc:	Filesystem context.
+ * @test: Comparison callback
+ * @set: Setup callback
+ *
+ * Find or create a superblock using the parameters stored in the filesystem
+ * context and the two callback functions.
+ *
+ * If an extant superblock is matched, then that will be returned with an
+ * elevated reference count that the caller must transfer or discard.
+ *
+ * If no match is made, a new superblock will be allocated and basic
+ * initialisation will be performed (s_type, s_fs_info and s_id will be set and
+ * the set() callback will be invoked), the superblock will be published and it
+ * will be returned in a partially constructed state with SB_BORN and SB_ACTIVE
+ * as yet unset.
+ */
+struct super_block *sget_fc(struct fs_context *fc,
+			    int (*test)(struct super_block *, struct fs_context *),
+			    int (*set)(struct super_block *, struct fs_context *))
+{
+	struct super_block *s = NULL;
+	struct super_block *old;
+	int err;
+
+	if (!(fc->sb_flags & SB_KERNMOUNT) &&
+	    fc->purpose != FS_CONTEXT_FOR_SUBMOUNT) {
+		/* Don't allow mounting unless the caller has CAP_SYS_ADMIN
+		 * over the namespace.
+		 */
+		if (!(fc->fs_type->fs_flags & FS_USERNS_MOUNT) &&
+		    !capable(CAP_SYS_ADMIN))
+			return ERR_PTR(-EPERM);
+		else if (!ns_capable(fc->user_ns, CAP_SYS_ADMIN))
+			return ERR_PTR(-EPERM);
+	}
+
+retry:
+	spin_lock(&sb_lock);
+	if (test) {
+		hlist_for_each_entry(old, &fc->fs_type->fs_supers, s_instances) {
+			if (!test(old, fc))
+				continue;
+			if (fc->user_ns != old->s_user_ns) {
+				spin_unlock(&sb_lock);
+				destroy_unused_super(s);
+				return ERR_PTR(-EBUSY);
+			}
+			if (!grab_super(old))
+				goto retry;
+			destroy_unused_super(s);
+			return old;
+		}
+	}
+	if (!s) {
+		spin_unlock(&sb_lock);
+		s = alloc_super(fc);
+		if (!s)
+			return ERR_PTR(-ENOMEM);
+		goto retry;
+	}
+
+	s->s_fs_info = fc->s_fs_info;
+	err = set(s, fc);
+	if (err) {
+		s->s_fs_info = NULL;
+		spin_unlock(&sb_lock);
+		destroy_unused_super(s);
+		return ERR_PTR(err);
+	}
+	fc->s_fs_info = NULL;
+	s->s_type = fc->fs_type;
+	strlcpy(s->s_id, s->s_type->name, sizeof(s->s_id));
+	list_add_tail(&s->s_list, &super_blocks);
+	hlist_add_head(&s->s_instances, &s->s_type->fs_supers);
+	spin_unlock(&sb_lock);
+	get_filesystem(s->s_type);
+	register_shrinker_prepared(&s->s_shrink);
+	return s;
+}
+EXPORT_SYMBOL(sget_fc);
+
 /**
  *	sget_userns -	find or create a superblock
  *	@type:	filesystem type superblock should belong to
@@ -514,7 +595,14 @@  struct super_block *sget_userns(struct file_system_type *type,
 	}
 	if (!s) {
 		spin_unlock(&sb_lock);
-		s = alloc_super(type, (flags & ~SB_SUBMOUNT), user_ns);
+		{
+			struct fs_context fc = {
+				.fs_type	= type,
+				.sb_flags	= flags & ~SB_SUBMOUNT,
+				.user_ns	= user_ns,
+			};
+			s = alloc_super(&fc);
+		}
 		if (!s)
 			return ERR_PTR(-ENOMEM);
 		goto retry;
@@ -838,11 +926,13 @@  struct super_block *user_get_super(dev_t dev)
  *	@data:	the rest of options
  *	@data_size: The size of the data
  *      @force: whether or not to force the change
+ *	@fc:	the superblock config for filesystems that support it
+ *		(NULL if called from emergency or umount)
  *
  *	Alters the mount options of a mounted file system.
  */
 int do_remount_sb(struct super_block *sb, int sb_flags, void *data,
-		  size_t data_size, int force)
+		  size_t data_size, int force, struct fs_context *fc)
 {
 	int retval;
 	int remount_ro;
@@ -884,8 +974,17 @@  int do_remount_sb(struct super_block *sb, int sb_flags, void *data,
 		}
 	}
 
-	if (sb->s_op->remount_fs) {
-		retval = sb->s_op->remount_fs(sb, &sb_flags, data, data_size);
+	if (sb->s_op->reconfigure ||
+	    sb->s_op->remount_fs) {
+		if (sb->s_op->reconfigure) {
+			retval = sb->s_op->reconfigure(sb, fc);
+			sb_flags = fc->sb_flags;
+			if (retval == 0)
+				security_sb_reconfigure(fc);
+		} else {
+			retval = sb->s_op->remount_fs(sb, &sb_flags,
+						      data, data_size);
+		}
 		if (retval) {
 			if (!force)
 				goto cancel_readonly;
@@ -924,7 +1023,7 @@  static void do_emergency_remount_callback(struct super_block *sb)
 		/*
 		 * What lock protects sb->s_flags??
 		 */
-		do_remount_sb(sb, SB_RDONLY, NULL, 0, 1);
+		do_remount_sb(sb, SB_RDONLY, NULL, 0, 1, NULL);
 	}
 	up_write(&sb->s_umount);
 }
@@ -1106,6 +1205,89 @@  struct dentry *mount_ns(struct file_system_type *fs_type,
 
 EXPORT_SYMBOL(mount_ns);
 
+int set_anon_super_fc(struct super_block *sb, struct fs_context *fc)
+{
+	return set_anon_super(sb, NULL);
+}
+EXPORT_SYMBOL(set_anon_super_fc);
+
+static int test_keyed_super(struct super_block *sb, struct fs_context *fc)
+{
+	return sb->s_fs_info == fc->s_fs_info;
+}
+
+static int test_single_super(struct super_block *s, struct fs_context *fc)
+{
+	return 1;
+}
+
+/**
+ * vfs_get_super - Get a superblock with a search key set in s_fs_info.
+ * @fc: The filesystem context holding the parameters
+ * @keying: How to distinguish superblocks
+ * @fill_super: Helper to initialise a new superblock
+ *
+ * Search for a superblock and create a new one if not found.  The search
+ * criterion is controlled by @keying.  If the search fails, a new superblock
+ * is created and @fill_super() is called to initialise it.
+ *
+ * @keying can take one of a number of values:
+ *
+ * (1) vfs_get_single_super - Only one superblock of this type may exist on the
+ *     system.  This is typically used for special system filesystems.
+ *
+ * (2) vfs_get_keyed_super - Multiple superblocks may exist, but they must have
+ *     distinct keys (where the key is in s_fs_info).  Searching for the same
+ *     key again will turn up the superblock for that key.
+ *
+ * (3) vfs_get_independent_super - Multiple superblocks may exist and are
+ *     unkeyed.  Each call will get a new superblock.
+ *
+ * A permissions check is made by sget_fc() unless we're getting a superblock
+ * for a kernel-internal mount or a submount.
+ */
+int vfs_get_super(struct fs_context *fc,
+		  enum vfs_get_super_keying keying,
+		  int (*fill_super)(struct super_block *sb,
+				    struct fs_context *fc))
+{
+	int (*test)(struct super_block *, struct fs_context *);
+	struct super_block *sb;
+
+	switch (keying) {
+	case vfs_get_single_super:
+		test = test_single_super;
+		break;
+	case vfs_get_keyed_super:
+		test = test_keyed_super;
+		break;
+	case vfs_get_independent_super:
+		test = NULL;
+		break;
+	default:
+		BUG();
+	}
+
+	sb = sget_fc(fc, test, set_anon_super_fc);
+	if (IS_ERR(sb))
+		return PTR_ERR(sb);
+
+	if (!sb->s_root) {
+		int err = fill_super(sb, fc);
+		if (err) {
+			deactivate_locked_super(sb);
+			return err;
+		}
+
+		sb->s_flags |= SB_ACTIVE;
+	}
+
+	BUG_ON(fc->root);
+	fc->root = dget(sb->s_root);
+	return 0;
+}
+EXPORT_SYMBOL(vfs_get_super);
+
 #ifdef CONFIG_BLOCK
 static int set_bdev_super(struct super_block *s, void *data)
 {
@@ -1254,7 +1436,7 @@  struct dentry *mount_single(struct file_system_type *fs_type,
 		}
 		s->s_flags |= SB_ACTIVE;
 	} else {
-		do_remount_sb(s, flags, data, data_size, 0);
+		do_remount_sb(s, flags, data, data_size, 0, NULL);
 	}
 	return dget(s->s_root);
 }
@@ -1601,3 +1783,90 @@  int thaw_super(struct super_block *sb)
 	return thaw_super_locked(sb);
 }
 EXPORT_SYMBOL(thaw_super);
+
+/**
+ * vfs_get_tree - Get the mountable root
+ * @fc: The superblock configuration context.
+ *
+ * The filesystem is invoked to get or create a superblock which can then later
+ * be used for mounting.  The filesystem places a pointer to the root to be
+ * used for mounting in @fc->root.
+ */
+int vfs_get_tree(struct fs_context *fc)
+{
+	struct super_block *sb;
+	int ret;
+
+	if (fc->fs_type->fs_flags & FS_REQUIRES_DEV && !fc->source)
+		return -ENOENT;
+
+	if (fc->root)
+		return -EBUSY;
+
+	if (fc->ops->validate) {
+		ret = fc->ops->validate(fc);
+		if (ret < 0)
+			return ret;
+	}
+
+	ret = security_fs_context_validate(fc);
+	if (ret < 0)
+		return ret;
+
+	/* Get the mountable root in fc->root, with a ref on the root and a ref
+	 * on the superblock.
+	 */
+	ret = fc->ops->get_tree(fc);
+	if (ret < 0)
+		return ret;
+
+	if (!fc->root) {
+		pr_err("Filesystem %s get_tree() didn't set fc->root\n",
+		       fc->fs_type->name);
+		/* We don't know what the locking state of the superblock is -
+		 * if there is a superblock.
+		 */
+		BUG();
+	}
+
+	sb = fc->root->d_sb;
+	WARN_ON(!sb->s_bdi);
+
+	ret = security_sb_get_tree(fc);
+	if (ret < 0)
+		goto err_sb;
+
+	ret = -ENOMEM;
+	if (fc->subtype && !sb->s_subtype) {
+		sb->s_subtype = kstrdup(fc->subtype, GFP_KERNEL);
+		if (!sb->s_subtype)
+			goto err_sb;
+	}
+
+	/* Write barrier is for super_cache_count(). We place it before setting
+	 * SB_BORN as the data dependency between the two functions is the
+	 * superblock structure contents that we just set up, not the SB_BORN
+	 * flag.
+	 */
+	smp_wmb();
+	sb->s_flags |= SB_BORN;
+
+	/* Filesystems should never set s_maxbytes larger than MAX_LFS_FILESIZE
+	 * but s_maxbytes was an unsigned long long for many releases.  Throw
+	 * this warning for a little while to try and catch filesystems that
+	 * violate this rule.
+	 */
+	WARN(sb->s_maxbytes < 0,
+	     "%s set sb->s_maxbytes to negative value (%lld)\n",
+	     fc->fs_type->name, sb->s_maxbytes);
+
+	up_write(&sb->s_umount);
+	return 0;
+
+err_sb:
+	dput(fc->root);
+	fc->root = NULL;
+	deactivate_locked_super(sb);
+	return ret;
+}
+EXPORT_SYMBOL(vfs_get_tree);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 067f0e31aec7..00a24f4b2f0b 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -61,6 +61,9 @@  struct workqueue_struct;
 struct iov_iter;
 struct fscrypt_info;
 struct fscrypt_operations;
+struct fs_context;
+struct fsconfig_parser;
+struct fsconfig_param;
 
 extern void __init inode_init(void);
 extern void __init inode_init_early(void);
@@ -723,6 +726,11 @@  static inline void inode_unlock(struct inode *inode)
 	up_write(&inode->i_rwsem);
 }
 
+static inline int inode_lock_killable(struct inode *inode)
+{
+	return down_write_killable(&inode->i_rwsem);
+}
+
 static inline void inode_lock_shared(struct inode *inode)
 {
 	down_read(&inode->i_rwsem);
@@ -1842,6 +1850,7 @@  struct super_operations {
 	int (*unfreeze_fs) (struct super_block *);
 	int (*statfs) (struct dentry *, struct kstatfs *);
 	int (*remount_fs) (struct super_block *, int *, char *, size_t);
+	int (*reconfigure) (struct super_block *, struct fs_context *);
 	void (*umount_begin) (struct super_block *);
 
 	int (*show_options)(struct seq_file *, struct dentry *);
@@ -2098,6 +2107,8 @@  struct file_system_type {
 #define FS_HAS_SUBTYPE		4
 #define FS_USERNS_MOUNT		8	/* Can be mounted by userns root */
 #define FS_RENAME_DOES_D_MOVE	32768	/* FS will handle d_move() during rename() internally. */
+	int (*init_fs_context)(struct fs_context *, struct dentry *);
+	const struct fs_parameter_description *parameters;
 	struct dentry *(*mount) (struct file_system_type *, int,
 				 const char *, void *, size_t);
 	void (*kill_sb) (struct super_block *);
@@ -2154,8 +2165,12 @@  void kill_litter_super(struct super_block *sb);
 void deactivate_super(struct super_block *sb);
 void deactivate_locked_super(struct super_block *sb);
 int set_anon_super(struct super_block *s, void *data);
+int set_anon_super_fc(struct super_block *s, struct fs_context *fc);
 int get_anon_bdev(dev_t *);
 void free_anon_bdev(dev_t);
+struct super_block *sget_fc(struct fs_context *fc,
+			    int (*test)(struct super_block *, struct fs_context *),
+			    int (*set)(struct super_block *, struct fs_context *));
 struct super_block *sget_userns(struct file_system_type *type,
 			int (*test)(struct super_block *,void *),
 			int (*set)(struct super_block *,void *),
@@ -2198,8 +2213,8 @@  mount_pseudo(struct file_system_type *fs_type, char *name,
 
 extern int register_filesystem(struct file_system_type *);
 extern int unregister_filesystem(struct file_system_type *);
+extern struct vfsmount *kern_mount(struct file_system_type *);
 extern struct vfsmount *kern_mount_data(struct file_system_type *, void *, size_t);
-#define kern_mount(type) kern_mount_data(type, NULL, 0)
 extern void kern_unmount(struct vfsmount *mnt);
 extern int may_umount_tree(struct vfsmount *);
 extern int may_umount(struct vfsmount *);
diff --git a/include/linux/fs_context.h b/include/linux/fs_context.h
index a88b54752f86..bbb8114f2fdc 100644
--- a/include/linux/fs_context.h
+++ b/include/linux/fs_context.h
@@ -25,6 +25,7 @@  struct pid_namespace;
 struct super_block;
 struct user_namespace;
 struct vfsmount;
+struct path;
 
 enum fs_context_purpose {
 	FS_CONTEXT_FOR_USER_MOUNT,	/* New superblock for user-specified mount */
@@ -33,6 +34,19 @@  enum fs_context_purpose {
 	FS_CONTEXT_FOR_RECONFIGURE,	/* Superblock reconfiguration (remount) */
 };
 
+/*
+ * Userspace usage phase for fsopen/fspick.
+ */
+enum fs_context_phase {
+	FS_CONTEXT_CREATE_PARAMS,	/* Loading params for sb creation */
+	FS_CONTEXT_CREATING,		/* A superblock is being created */
+	FS_CONTEXT_AWAITING_MOUNT,	/* Superblock created, awaiting fsmount() */
+	FS_CONTEXT_AWAITING_RECONF,	/* Awaiting initialisation for reconfiguration */
+	FS_CONTEXT_RECONF_PARAMS,	/* Loading params for reconfiguration */
+	FS_CONTEXT_RECONFIGURING,	/* Reconfiguring the superblock */
+	FS_CONTEXT_FAILED,		/* Failed to correctly transition a context */
+};
+
 /*
  * Type of parameter value.
  */
@@ -85,8 +99,10 @@  struct fs_context {
 	void			*s_fs_info;	/* Proposed s_fs_info */
 	unsigned int		sb_flags;	/* Proposed superblock flags (SB_*) */
 	enum fs_context_purpose	purpose:8;
+	enum fs_context_phase	phase:8;	/* The phase the context is in */
 	bool			sloppy:1;	/* T if unrecognised options are okay */
 	bool			silent:1;	/* T if "o silent" specified */
+	bool			need_free:1;	/* Need to call ops->free() */
 };
 
 struct fs_context_operations {
@@ -98,6 +114,35 @@  struct fs_context_operations {
 	int (*get_tree)(struct fs_context *fc);
 };
 
+/*
+ * fs_context manipulation functions.
+ */
+extern struct fs_context *vfs_new_fs_context(struct file_system_type *fs_type,
+					     struct dentry *reference,
+					     unsigned int ms_flags,
+					     enum fs_context_purpose purpose);
+extern struct fs_context *vfs_sb_reconfig(struct path *path, unsigned int ms_flags);
+extern struct fs_context *vfs_dup_fs_context(struct fs_context *src);
+extern int vfs_parse_fs_param(struct fs_context *fc, struct fs_parameter *param);
+extern int vfs_parse_fs_string(struct fs_context *fc, const char *key,
+			       const char *value, size_t v_size);
+extern int generic_parse_monolithic(struct fs_context *fc, void *data, size_t data_size);
+extern int vfs_get_tree(struct fs_context *fc);
+extern void put_fs_context(struct fs_context *fc);
+
+/*
+ * sget() wrapper to be called from the ->get_tree() op.
+ */
+enum vfs_get_super_keying {
+	vfs_get_single_super,	/* Only one such superblock may exist */
+	vfs_get_keyed_super,	/* Superblocks with different s_fs_info keys may exist */
+	vfs_get_independent_super, /* Multiple independent superblocks may exist */
+};
+extern int vfs_get_super(struct fs_context *fc,
+			 enum vfs_get_super_keying keying,
+			 int (*fill_super)(struct super_block *sb,
+					   struct fs_context *fc));
+
 #define logfc(FC, FMT, ...) pr_notice(FMT, ## __VA_ARGS__)
 
 /**
diff --git a/include/linux/mount.h b/include/linux/mount.h
index c9edd284f0af..41b6b080ffd0 100644
--- a/include/linux/mount.h
+++ b/include/linux/mount.h
@@ -21,6 +21,7 @@  struct super_block;
 struct vfsmount;
 struct dentry;
 struct mnt_namespace;
+struct fs_context;
 
 #define MNT_NOSUID	0x01
 #define MNT_NODEV	0x02
@@ -88,6 +89,8 @@  struct path;
 extern struct vfsmount *clone_private_mount(const struct path *path);
 
 struct file_system_type;
+extern struct vfsmount *vfs_create_mount(struct fs_context *fc,
+					 unsigned int mnt_flags);
 extern struct vfsmount *vfs_kern_mount(struct file_system_type *type,
 				      int flags, const char *name,
 				      void *data, size_t data_size);