diff mbox series

nfsd: fix kernel crash when load nfsd in docker

Message ID 20200615071211.31326-1-lxgrxd@163.com (mailing list archive)
State New, archived
Headers show
Series nfsd: fix kernel crash when load nfsd in docker | expand

Commit Message

Luo Xiaogang June 15, 2020, 7:12 a.m. UTC
We load nfsd module in the docker container, kernel crash as following.

The 'current->nsproxy->net_ns->gen->ptr[nfsd_net_id]' is overflow in the
nfsd_init_net.

We should use the net_ns which is being init in the nfsd_init_net,
not the 'current->nsproxy->net_ns'.

[  939.174448] Installing knfsd (copyright (C) 1996 okir@monad.swb.de).
[  939.174533] BUG: kernel NULL pointer dereference, address: 0000000000000058
[  939.174536] #PF: supervisor write access in kernel mode
[  939.174538] #PF: error_code(0x0002) - not-present page
[  939.174540] PGD 0 P4D 0
[  939.174543] Oops: 0002 [#1] SMP PTI
[  939.174546] CPU: 0 PID: 5031 Comm: modprobe Tainted: G           O      5.3.0-51-generic #44~18.04.2-Ubuntu
[  939.174548] Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006
[  939.174562] RIP: 0010:nfsd_fill_super+0x71/0x90 [nfsd]
[  939.174565] Code: 85 c0 89 c3 74 09 89 d8 5b 41 5c 41 5d 5d c3 49 8b 7c 24 68 31 f6 48 c7 c2 70 24 9f c0 e8 97 fe ff ff 48 3d 00 f0 ff ff 77 0d <49> 89 45 58 89 d8 5b 41 5c 41 5d 5d c3 89 c3 eb cb 0f 1f 40 00 66
[  939.174567] RSP: 0018:ffffaf12850f7aa8 EFLAGS: 00010287
[  939.174569] RAX: ffff94269f29a600 RBX: 0000000000000000 RCX: 0000000000000002
[  939.174570] RDX: 0000000000000000 RSI: 0000000000000100 RDI: ffff94269f30f820
[  939.174572] RBP: ffffaf12850f7ac0 R08: ffff94269f29a620 R09: 0000000000000000
[  939.174573] R10: 0000000000000000 R11: fefefefefefefeff R12: ffff942754da4800
[  939.174575] R13: 0000000000000000 R14: ffffffffc09b94d0 R15: ffff94275b344480
[  939.174577] FS:  00007f25508ed540(0000) GS:ffff94275ba00000(0000) knlGS:0000000000000000
[  939.174579] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[  939.174580] CR2: 0000000000000058 CR3: 00000000619be000 CR4: 00000000000406f0
[  939.174586] Call Trace:
[  939.174593]  vfs_get_super+0x5b/0xe0
[  939.174597]  ? vfs_parse_fs_param+0xdc/0x1c0
[  939.174608]  nfsd_fs_get_tree+0x2c/0x30 [nfsd]
[  939.174610]  vfs_get_tree+0x2a/0x100
[  939.174613]  fc_mount+0x12/0x40
[  939.174615]  vfs_kern_mount.part.31+0x76/0x90
[  939.174618]  vfs_kern_mount+0x13/0x20
[  939.174627]  nfsd_init_net+0x101/0x140 [nfsd]
[  939.174630]  ops_init+0x44/0x120
[  939.174633]  register_pernet_operations+0xed/0x200
[  939.174645]  ? trace_event_define_fields_nfsd_stateid_class+0xb3/0xb3 [nfsd]
[  939.174647]  register_pernet_subsys+0x28/0x40
[  939.174658]  init_nfsd+0x22/0xcbc [nfsd]
[  939.174661]  do_one_initcall+0x4a/0x1fa
[  939.174664]  ? _cond_resched+0x19/0x40
[  939.174667]  ? kmem_cache_alloc_trace+0x15c/0x210
[  939.174671]  do_init_module+0x5f/0x227
[  939.174674]  load_module+0x1aa4/0x2140
[  939.174678]  __do_sys_finit_module+0xfc/0x120
[  939.174681]  ? __do_sys_finit_module+0xfc/0x120
[  939.174684]  __x64_sys_finit_module+0x1a/0x20
[  939.174687]  do_syscall_64+0x5a/0x130
[  939.174690]  entry_SYSCALL_64_after_hwframe+0x44/0xa9
[  939.174692] RIP: 0033:0x7f2550a3270d
[  939.174694] Code: 00 c3 66 2e 0f 1f 84 00 00 00 00 00 90 f3 0f 1e fa 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 53 f7 0c 00 f7 d8 64 89 01 48
[  939.174696] RSP: 002b:00007ffd4a3d9738 EFLAGS: 00000246 ORIG_RAX: 0000000000000139
[  939.174698] RAX: ffffffffffffffda RBX: 000055d5164584e0 RCX: 00007f2550a3270d
[  939.174699] RDX: 0000000000000000 RSI: 000055d5146b7358 RDI: 0000000000000007
[  939.174701] RBP: 0000000000040000 R08: 0000000000000000 R09: 0000000000000000
[  939.174702] R10: 0000000000000007 R11: 0000000000000246 R12: 000055d5146b7358
[  939.174704] R13: 0000000000000000 R14: 000055d5164547c0 R15: 000055d5164584e0
[  939.174706] Modules linked in: nfsd(+) auth_rpcgss nfs_acl lockd grace veth xt_conntrack xt_MASQUERADE nf_conntrack_netlink nfnetlink xfrm_user xfrm_algo xt_addrtype iptable_filter iptable_nat nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 libcrc32c bpfilter br_netfilter bridge stp llc nls_utf8 isofs vboxsf(O) aufs overlay intel_rapl_msr snd_intel8x0 snd_ac97_codec ac97_bus snd_pcm snd_seq_midi snd_seq_midi_event snd_rawmidi snd_seq snd_seq_device snd_timer snd joydev soundcore vboxvideo intel_rapl_common drm_vram_helper ttm drm_kms_helper drm fb_sys_fops crct10dif_pclmul crc32_pclmul syscopyarea sysfillrect ghash_clmulni_intel sysimgblt aesni_intel aes_x86_64 crypto_simd cryptd glue_helper vboxguest(O) intel_rapl_perf input_leds mac_hid serio_raw binfmt_misc sch_fq_codel cuse sunrpc parport_pc ppdev lp parport ip_tables x_tables autofs4 hid_generic usbhid hid psmouse ahci libahci e1000 i2c_piix4 pata_acpi video
[  939.174739] CR2: 0000000000000058
[  939.174742] ---[ end trace 9fba6033f11f2b84 ]---
[  939.174752] RIP: 0010:nfsd_fill_super+0x71/0x90 [nfsd]
[  939.174754] Code: 85 c0 89 c3 74 09 89 d8 5b 41 5c 41 5d 5d c3 49 8b 7c 24 68 31 f6 48 c7 c2 70 24 9f c0 e8 97 fe ff ff 48 3d 00 f0 ff ff 77 0d <49> 89 45 58 89 d8 5b 41 5c 41 5d 5d c3 89 c3 eb cb 0f 1f 40 00 66
[  939.174755] RSP: 0018:ffffaf12850f7aa8 EFLAGS: 00010287
[  939.174757] RAX: ffff94269f29a600 RBX: 0000000000000000 RCX: 0000000000000002
[  939.174759] RDX: 0000000000000000 RSI: 0000000000000100 RDI: ffff94269f30f820
[  939.174760] RBP: ffffaf12850f7ac0 R08: ffff94269f29a620 R09: 0000000000000000
[  939.174761] R10: 0000000000000000 R11: fefefefefefefeff R12: ffff942754da4800
[  939.174763] R13: 0000000000000000 R14: ffffffffc09b94d0 R15: ffff94275b344480
[  939.174765] FS:  00007f25508ed540(0000) GS:ffff94275ba00000(0000) knlGS:0000000000000000
[  939.174766] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[  939.174768] CR2: 0000000000000058 CR3: 00000000619be000 CR4: 00000000000406f0

Signed-off-by: Luo Xiaogang <lxgrxd@163.com>
---
 fs/nfsd/nfsctl.c | 19 ++++++++++++++-----
 1 file changed, 14 insertions(+), 5 deletions(-)

Comments

J. Bruce Fields June 24, 2020, 1:29 a.m. UTC | #1
On Mon, Jun 15, 2020 at 03:12:11PM +0800, Luo Xiaogang wrote:
> We load nfsd module in the docker container, kernel crash as following.
> 
> The 'current->nsproxy->net_ns->gen->ptr[nfsd_net_id]' is overflow in the
> nfsd_init_net.
> 
> We should use the net_ns which is being init in the nfsd_init_net,
> not the 'current->nsproxy->net_ns'.

Thanks!  Actually, I think my problem was that net init and exit are
just the wrong place to be doing this--I moved them to nfsd start/stop
instead.

And then that exposed the fact that I had an inode leak.

Do the following two patches help?

--b.

From 16f954bd5c481596a63271a91963bf260e2f3f46 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Tue, 23 Jun 2020 16:00:33 -0400
Subject: [PATCH 1/2] nfsd4: fix nfsdfs reference count loop

We don't drop the reference on the nfsdfs filesystem with
mntput(nn->nfsd_mnt) until nfsd_exit_net(), but that won't be called
until the nfsd module's unloaded, and we can't unload the module as long
as there's a reference on nfsdfs.  So this prevents module unloading.

Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/nfs4state.c |  8 +++++++-
 fs/nfsd/nfsctl.c    | 22 ++++++++++++----------
 fs/nfsd/nfsd.h      |  3 +++
 3 files changed, 22 insertions(+), 11 deletions(-)

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index bb3d2c32664a..cce2510b2cca 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -7912,9 +7912,14 @@ nfs4_state_start_net(struct net *net)
 	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
 	int ret;
 
-	ret = nfs4_state_create_net(net);
+	ret = get_nfsdfs(net);
 	if (ret)
 		return ret;
+	ret = nfs4_state_create_net(net);
+	if (ret) {
+		mntput(nn->nfsd_mnt);
+		return ret;
+	}
 	locks_start_grace(net, &nn->nfsd4_manager);
 	nfsd4_client_tracking_init(net);
 	if (nn->track_reclaim_completes && nn->reclaim_str_hashtbl_size == 0)
@@ -7984,6 +7989,7 @@ nfs4_state_shutdown_net(struct net *net)
 
 	nfsd4_client_tracking_exit(net);
 	nfs4_state_destroy_net(net);
+	mntput(nn->nfsd_mnt);
 }
 
 void
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index b68e96681522..cf98a81ca1ea 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -1424,6 +1424,18 @@ static struct file_system_type nfsd_fs_type = {
 };
 MODULE_ALIAS_FS("nfsd");
 
+int get_nfsdfs(struct net *net)
+{
+	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+	struct vfsmount *mnt;
+
+	mnt =  vfs_kern_mount(&nfsd_fs_type, SB_KERNMOUNT, "nfsd", NULL);
+	if (IS_ERR(mnt))
+		return PTR_ERR(mnt);
+	nn->nfsd_mnt = mnt;
+	return 0;
+}
+
 #ifdef CONFIG_PROC_FS
 static int create_proc_exports_entry(void)
 {
@@ -1451,7 +1463,6 @@ unsigned int nfsd_net_id;
 static __net_init int nfsd_init_net(struct net *net)
 {
 	int retval;
-	struct vfsmount *mnt;
 	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
 
 	retval = nfsd_export_init(net);
@@ -1478,16 +1489,8 @@ static __net_init int nfsd_init_net(struct net *net)
 	init_waitqueue_head(&nn->ntf_wq);
 	seqlock_init(&nn->boot_lock);
 
-	mnt =  vfs_kern_mount(&nfsd_fs_type, SB_KERNMOUNT, "nfsd", NULL);
-	if (IS_ERR(mnt)) {
-		retval = PTR_ERR(mnt);
-		goto out_mount_err;
-	}
-	nn->nfsd_mnt = mnt;
 	return 0;
 
-out_mount_err:
-	nfsd_reply_cache_shutdown(nn);
 out_drc_error:
 	nfsd_idmap_shutdown(net);
 out_idmap_error:
@@ -1500,7 +1503,6 @@ static __net_exit void nfsd_exit_net(struct net *net)
 {
 	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
 
-	mntput(nn->nfsd_mnt);
 	nfsd_reply_cache_shutdown(nn);
 	nfsd_idmap_shutdown(net);
 	nfsd_export_shutdown(net);
diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
index 36cdd81b6688..57c832d1b30f 100644
--- a/fs/nfsd/nfsd.h
+++ b/fs/nfsd/nfsd.h
@@ -90,6 +90,8 @@ void		nfsd_destroy(struct net *net);
 
 bool		i_am_nfsd(void);
 
+int get_nfsdfs(struct net *);
+
 struct nfsdfs_client {
 	struct kref cl_ref;
 	void (*cl_release)(struct kref *kref);
@@ -100,6 +102,7 @@ struct dentry *nfsd_client_mkdir(struct nfsd_net *nn,
 		struct nfsdfs_client *ncl, u32 id, const struct tree_descr *);
 void nfsd_client_rmdir(struct dentry *dentry);
 
+
 #if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
 #ifdef CONFIG_NFSD_V2_ACL
 extern const struct svc_version nfsd_acl_version2;
Luo Xiaogang June 26, 2020, 11:35 a.m. UTC | #2
Thanks for reply. Very well, the two patches you provided solved this problem.
Luo Xiaogang June 26, 2020, 12:45 p.m. UTC | #3
At 2020-06-24 09:29:01, "J. Bruce Fields" <bfields@fieldses.org> wrote:
>On Mon, Jun 15, 2020 at 03:12:11PM +0800, Luo Xiaogang wrote:
>> We load nfsd module in the docker container, kernel crash as following.
>> 
>> The 'current->nsproxy->net_ns->gen->ptr[nfsd_net_id]' is overflow in the
>> nfsd_init_net.
>> 
>> We should use the net_ns which is being init in the nfsd_init_net,
>> not the 'current->nsproxy->net_ns'.
>
>Thanks!  Actually, I think my problem was that net init and exit are
>just the wrong place to be doing this--I moved them to nfsd start/stop
>instead.
>
>And then that exposed the fact that I had an inode leak.
>
>Do the following two patches help?

Just test it on Ubuntu 18.04 + Docker 19.03.6, and the docker image is ubuntu:18.04.

Your patchset helps, here is my reported-and-tested-by, Thanks very much.

Reported-and-Tested-by:  Luo Xiaogang <lxgrxd@163.com>


>--b.
>
>From 16f954bd5c481596a63271a91963bf260e2f3f46 Mon Sep 17 00:00:00 2001
>From: "J. Bruce Fields" <bfields@redhat.com>
>Date: Tue, 23 Jun 2020 16:00:33 -0400
>Subject: [PATCH 1/2] nfsd4: fix nfsdfs reference count loop
>
>We don't drop the reference on the nfsdfs filesystem with
>mntput(nn->nfsd_mnt) until nfsd_exit_net(), but that won't be called
>until the nfsd module's unloaded, and we can't unload the module as long
>as there's a reference on nfsdfs.  So this prevents module unloading.
>
>Signed-off-by: J. Bruce Fields <bfields@redhat.com>
>---
> fs/nfsd/nfs4state.c |  8 +++++++-
> fs/nfsd/nfsctl.c    | 22 ++++++++++++----------
> fs/nfsd/nfsd.h      |  3 +++
> 3 files changed, 22 insertions(+), 11 deletions(-)
>
>diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
>index bb3d2c32664a..cce2510b2cca 100644
>--- a/fs/nfsd/nfs4state.c
>+++ b/fs/nfsd/nfs4state.c
>@@ -7912,9 +7912,14 @@ nfs4_state_start_net(struct net *net)
> 	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
> 	int ret;
> 
>-	ret = nfs4_state_create_net(net);
>+	ret = get_nfsdfs(net);
> 	if (ret)
> 		return ret;
>+	ret = nfs4_state_create_net(net);
>+	if (ret) {
>+		mntput(nn->nfsd_mnt);
>+		return ret;
>+	}
> 	locks_start_grace(net, &nn->nfsd4_manager);
> 	nfsd4_client_tracking_init(net);
> 	if (nn->track_reclaim_completes && nn->reclaim_str_hashtbl_size == 0)
>@@ -7984,6 +7989,7 @@ nfs4_state_shutdown_net(struct net *net)
> 
> 	nfsd4_client_tracking_exit(net);
> 	nfs4_state_destroy_net(net);
>+	mntput(nn->nfsd_mnt);
> }
> 
> void
>diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
>index b68e96681522..cf98a81ca1ea 100644
>--- a/fs/nfsd/nfsctl.c
>+++ b/fs/nfsd/nfsctl.c
>@@ -1424,6 +1424,18 @@ static struct file_system_type nfsd_fs_type = {
> };
> MODULE_ALIAS_FS("nfsd");
> 
>+int get_nfsdfs(struct net *net)
>+{
>+	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
>+	struct vfsmount *mnt;
>+
>+	mnt =  vfs_kern_mount(&nfsd_fs_type, SB_KERNMOUNT, "nfsd", NULL);
>+	if (IS_ERR(mnt))
>+		return PTR_ERR(mnt);
>+	nn->nfsd_mnt = mnt;
>+	return 0;
>+}
>+
> #ifdef CONFIG_PROC_FS
> static int create_proc_exports_entry(void)
> {
>@@ -1451,7 +1463,6 @@ unsigned int nfsd_net_id;
> static __net_init int nfsd_init_net(struct net *net)
> {
> 	int retval;
>-	struct vfsmount *mnt;
> 	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
> 
> 	retval = nfsd_export_init(net);
>@@ -1478,16 +1489,8 @@ static __net_init int nfsd_init_net(struct net *net)
> 	init_waitqueue_head(&nn->ntf_wq);
> 	seqlock_init(&nn->boot_lock);
> 
>-	mnt =  vfs_kern_mount(&nfsd_fs_type, SB_KERNMOUNT, "nfsd", NULL);
>-	if (IS_ERR(mnt)) {
>-		retval = PTR_ERR(mnt);
>-		goto out_mount_err;
>-	}
>-	nn->nfsd_mnt = mnt;
> 	return 0;
> 
>-out_mount_err:
>-	nfsd_reply_cache_shutdown(nn);
> out_drc_error:
> 	nfsd_idmap_shutdown(net);
> out_idmap_error:
>@@ -1500,7 +1503,6 @@ static __net_exit void nfsd_exit_net(struct net *net)
> {
> 	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
> 
>-	mntput(nn->nfsd_mnt);
> 	nfsd_reply_cache_shutdown(nn);
> 	nfsd_idmap_shutdown(net);
> 	nfsd_export_shutdown(net);
>diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
>index 36cdd81b6688..57c832d1b30f 100644
>--- a/fs/nfsd/nfsd.h
>+++ b/fs/nfsd/nfsd.h
>@@ -90,6 +90,8 @@ void		nfsd_destroy(struct net *net);
> 
> bool		i_am_nfsd(void);
> 
>+int get_nfsdfs(struct net *);
>+
> struct nfsdfs_client {
> 	struct kref cl_ref;
> 	void (*cl_release)(struct kref *kref);
>@@ -100,6 +102,7 @@ struct dentry *nfsd_client_mkdir(struct nfsd_net *nn,
> 		struct nfsdfs_client *ncl, u32 id, const struct tree_descr *);
> void nfsd_client_rmdir(struct dentry *dentry);
> 
>+
> #if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
> #ifdef CONFIG_NFSD_V2_ACL
> extern const struct svc_version nfsd_acl_version2;
>-- 
>2.26.2
>
>
>From 51de3b460b39e862f7dcfd4d600e8de0afe73e29 Mon Sep 17 00:00:00 2001
>From: "J. Bruce Fields" <bfields@redhat.com>
>Date: Tue, 23 Jun 2020 21:01:19 -0400
>Subject: [PATCH 2/2] nfsd: fix nfsdfs inode reference count leak
>
>I don't understand this code well, but  I'm seeing a warning about a
>still-referenced inode on unmount, and every other similar filesystem
>does a dput() here.
>
>Signed-off-by: J. Bruce Fields <bfields@redhat.com>
>---
> fs/nfsd/nfsctl.c | 1 +
> 1 file changed, 1 insertion(+)
>
>diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
>index cf98a81ca1ea..cd05732f8eaa 100644
>--- a/fs/nfsd/nfsctl.c
>+++ b/fs/nfsd/nfsctl.c
>@@ -1335,6 +1335,7 @@ void nfsd_client_rmdir(struct dentry *dentry)
> 	WARN_ON_ONCE(ret);
> 	fsnotify_rmdir(dir, dentry);
> 	d_delete(dentry);
>+	dput(dentry);
> 	inode_unlock(dir);
> }
> 
>-- 
>2.26.2
J. Bruce Fields June 26, 2020, 3:04 p.m. UTC | #4
On Fri, Jun 26, 2020 at 08:45:23PM +0800, Luo Xiaogang wrote:
> At 2020-06-24 09:29:01, "J. Bruce Fields" <bfields@fieldses.org> wrote:
> >On Mon, Jun 15, 2020 at 03:12:11PM +0800, Luo Xiaogang wrote:
> >> We load nfsd module in the docker container, kernel crash as following.
> >> 
> >> The 'current->nsproxy->net_ns->gen->ptr[nfsd_net_id]' is overflow in the
> >> nfsd_init_net.
> >> 
> >> We should use the net_ns which is being init in the nfsd_init_net,
> >> not the 'current->nsproxy->net_ns'.
> >
> >Thanks!  Actually, I think my problem was that net init and exit are
> >just the wrong place to be doing this--I moved them to nfsd start/stop
> >instead.
> >
> >And then that exposed the fact that I had an inode leak.
> >
> >Do the following two patches help?
> 
> Just test it on Ubuntu 18.04 + Docker 19.03.6, and the docker image is ubuntu:18.04.
> 
> Your patchset helps, here is my reported-and-tested-by, Thanks very much.
> 
> Reported-and-Tested-by:  Luo Xiaogang <lxgrxd@163.com>

Thank you!

--b.

> 
> 
> >--b.
> >
> >From 16f954bd5c481596a63271a91963bf260e2f3f46 Mon Sep 17 00:00:00 2001
> >From: "J. Bruce Fields" <bfields@redhat.com>
> >Date: Tue, 23 Jun 2020 16:00:33 -0400
> >Subject: [PATCH 1/2] nfsd4: fix nfsdfs reference count loop
> >
> >We don't drop the reference on the nfsdfs filesystem with
> >mntput(nn->nfsd_mnt) until nfsd_exit_net(), but that won't be called
> >until the nfsd module's unloaded, and we can't unload the module as long
> >as there's a reference on nfsdfs.  So this prevents module unloading.
> >
> >Signed-off-by: J. Bruce Fields <bfields@redhat.com>
> >---
> > fs/nfsd/nfs4state.c |  8 +++++++-
> > fs/nfsd/nfsctl.c    | 22 ++++++++++++----------
> > fs/nfsd/nfsd.h      |  3 +++
> > 3 files changed, 22 insertions(+), 11 deletions(-)
> >
> >diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> >index bb3d2c32664a..cce2510b2cca 100644
> >--- a/fs/nfsd/nfs4state.c
> >+++ b/fs/nfsd/nfs4state.c
> >@@ -7912,9 +7912,14 @@ nfs4_state_start_net(struct net *net)
> > 	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
> > 	int ret;
> > 
> >-	ret = nfs4_state_create_net(net);
> >+	ret = get_nfsdfs(net);
> > 	if (ret)
> > 		return ret;
> >+	ret = nfs4_state_create_net(net);
> >+	if (ret) {
> >+		mntput(nn->nfsd_mnt);
> >+		return ret;
> >+	}
> > 	locks_start_grace(net, &nn->nfsd4_manager);
> > 	nfsd4_client_tracking_init(net);
> > 	if (nn->track_reclaim_completes && nn->reclaim_str_hashtbl_size == 0)
> >@@ -7984,6 +7989,7 @@ nfs4_state_shutdown_net(struct net *net)
> > 
> > 	nfsd4_client_tracking_exit(net);
> > 	nfs4_state_destroy_net(net);
> >+	mntput(nn->nfsd_mnt);
> > }
> > 
> > void
> >diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
> >index b68e96681522..cf98a81ca1ea 100644
> >--- a/fs/nfsd/nfsctl.c
> >+++ b/fs/nfsd/nfsctl.c
> >@@ -1424,6 +1424,18 @@ static struct file_system_type nfsd_fs_type = {
> > };
> > MODULE_ALIAS_FS("nfsd");
> > 
> >+int get_nfsdfs(struct net *net)
> >+{
> >+	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
> >+	struct vfsmount *mnt;
> >+
> >+	mnt =  vfs_kern_mount(&nfsd_fs_type, SB_KERNMOUNT, "nfsd", NULL);
> >+	if (IS_ERR(mnt))
> >+		return PTR_ERR(mnt);
> >+	nn->nfsd_mnt = mnt;
> >+	return 0;
> >+}
> >+
> > #ifdef CONFIG_PROC_FS
> > static int create_proc_exports_entry(void)
> > {
> >@@ -1451,7 +1463,6 @@ unsigned int nfsd_net_id;
> > static __net_init int nfsd_init_net(struct net *net)
> > {
> > 	int retval;
> >-	struct vfsmount *mnt;
> > 	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
> > 
> > 	retval = nfsd_export_init(net);
> >@@ -1478,16 +1489,8 @@ static __net_init int nfsd_init_net(struct net *net)
> > 	init_waitqueue_head(&nn->ntf_wq);
> > 	seqlock_init(&nn->boot_lock);
> > 
> >-	mnt =  vfs_kern_mount(&nfsd_fs_type, SB_KERNMOUNT, "nfsd", NULL);
> >-	if (IS_ERR(mnt)) {
> >-		retval = PTR_ERR(mnt);
> >-		goto out_mount_err;
> >-	}
> >-	nn->nfsd_mnt = mnt;
> > 	return 0;
> > 
> >-out_mount_err:
> >-	nfsd_reply_cache_shutdown(nn);
> > out_drc_error:
> > 	nfsd_idmap_shutdown(net);
> > out_idmap_error:
> >@@ -1500,7 +1503,6 @@ static __net_exit void nfsd_exit_net(struct net *net)
> > {
> > 	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
> > 
> >-	mntput(nn->nfsd_mnt);
> > 	nfsd_reply_cache_shutdown(nn);
> > 	nfsd_idmap_shutdown(net);
> > 	nfsd_export_shutdown(net);
> >diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
> >index 36cdd81b6688..57c832d1b30f 100644
> >--- a/fs/nfsd/nfsd.h
> >+++ b/fs/nfsd/nfsd.h
> >@@ -90,6 +90,8 @@ void		nfsd_destroy(struct net *net);
> > 
> > bool		i_am_nfsd(void);
> > 
> >+int get_nfsdfs(struct net *);
> >+
> > struct nfsdfs_client {
> > 	struct kref cl_ref;
> > 	void (*cl_release)(struct kref *kref);
> >@@ -100,6 +102,7 @@ struct dentry *nfsd_client_mkdir(struct nfsd_net *nn,
> > 		struct nfsdfs_client *ncl, u32 id, const struct tree_descr *);
> > void nfsd_client_rmdir(struct dentry *dentry);
> > 
> >+
> > #if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
> > #ifdef CONFIG_NFSD_V2_ACL
> > extern const struct svc_version nfsd_acl_version2;
> >-- 
> >2.26.2
> >
> >
> >From 51de3b460b39e862f7dcfd4d600e8de0afe73e29 Mon Sep 17 00:00:00 2001
> >From: "J. Bruce Fields" <bfields@redhat.com>
> >Date: Tue, 23 Jun 2020 21:01:19 -0400
> >Subject: [PATCH 2/2] nfsd: fix nfsdfs inode reference count leak
> >
> >I don't understand this code well, but  I'm seeing a warning about a
> >still-referenced inode on unmount, and every other similar filesystem
> >does a dput() here.
> >
> >Signed-off-by: J. Bruce Fields <bfields@redhat.com>
> >---
> > fs/nfsd/nfsctl.c | 1 +
> > 1 file changed, 1 insertion(+)
> >
> >diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
> >index cf98a81ca1ea..cd05732f8eaa 100644
> >--- a/fs/nfsd/nfsctl.c
> >+++ b/fs/nfsd/nfsctl.c
> >@@ -1335,6 +1335,7 @@ void nfsd_client_rmdir(struct dentry *dentry)
> > 	WARN_ON_ONCE(ret);
> > 	fsnotify_rmdir(dir, dentry);
> > 	d_delete(dentry);
> >+	dput(dentry);
> > 	inode_unlock(dir);
> > }
> > 
> >-- 
> >2.26.2
diff mbox series

Patch

diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index b68e96681522..87bb348a05ed 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -1340,8 +1340,7 @@  void nfsd_client_rmdir(struct dentry *dentry)
 
 static int nfsd_fill_super(struct super_block *sb, struct fs_context *fc)
 {
-	struct nfsd_net *nn = net_generic(current->nsproxy->net_ns,
-							nfsd_net_id);
+	struct nfsd_net *nn = net_generic(fc->net_ns, nfsd_net_id);
 	struct dentry *dentry;
 	int ret;
 
@@ -1395,15 +1394,25 @@  static void nfsd_fs_free_fc(struct fs_context *fc)
 		put_net(fc->s_fs_info);
 }
 
+static int nfsd_fs_parse_monolithic(struct fs_context *fc, void *data)
+{
+	put_net(fc->net_ns);
+	fc->net_ns = get_net(data);
+
+	put_user_ns(fc->user_ns);
+	fc->user_ns = get_user_ns(fc->net_ns->user_ns);
+
+	return 0;
+}
+
 static const struct fs_context_operations nfsd_fs_context_ops = {
 	.free		= nfsd_fs_free_fc,
 	.get_tree	= nfsd_fs_get_tree,
+	.parse_monolithic = nfsd_fs_parse_monolithic,
 };
 
 static int nfsd_init_fs_context(struct fs_context *fc)
 {
-	put_user_ns(fc->user_ns);
-	fc->user_ns = get_user_ns(fc->net_ns->user_ns);
 	fc->ops = &nfsd_fs_context_ops;
 	return 0;
 }
@@ -1478,7 +1487,7 @@  static __net_init int nfsd_init_net(struct net *net)
 	init_waitqueue_head(&nn->ntf_wq);
 	seqlock_init(&nn->boot_lock);
 
-	mnt =  vfs_kern_mount(&nfsd_fs_type, SB_KERNMOUNT, "nfsd", NULL);
+	mnt =  vfs_kern_mount(&nfsd_fs_type, SB_KERNMOUNT, "nfsd", net);
 	if (IS_ERR(mnt)) {
 		retval = PTR_ERR(mnt);
 		goto out_mount_err;