Message ID | 20250306092007.1419237-2-maninder1.s@samsung.com (mailing list archive) |
---|---|
State | Under Review |
Delegated to: | Chuck Lever |
Headers | show |
Series | [1/2] NFSD: unregister filesystem in case genl_register_family() fails | expand |
On Thu, 2025-03-06 at 14:50 +0530, Maninder Singh wrote: > As of now nfsd calls create_proc_exports_entry() at start of init_nfsd > and cleanup by remove_proc_entry() at last of exit_nfsd. > > Which causes kernel OOPs if there is race between below 2 operations: > (i) exportfs -r > (ii) mount -t nfsd none /proc/fs/nfsd > > for 5.4 kernel ARM64: > > CPU 1: > el1_irq+0xbc/0x180 > arch_counter_get_cntvct+0x14/0x18 > running_clock+0xc/0x18 > preempt_count_add+0x88/0x110 > prep_new_page+0xb0/0x220 > get_page_from_freelist+0x2d8/0x1778 > __alloc_pages_nodemask+0x15c/0xef0 > __vmalloc_node_range+0x28c/0x478 > __vmalloc_node_flags_caller+0x8c/0xb0 > kvmalloc_node+0x88/0xe0 > nfsd_init_net+0x6c/0x108 [nfsd] > ops_init+0x44/0x170 > register_pernet_operations+0x114/0x270 > register_pernet_subsys+0x34/0x50 > init_nfsd+0xa8/0x718 [nfsd] > do_one_initcall+0x54/0x2e0 > > CPU 2 : > Unable to handle kernel NULL pointer dereference at virtual address 0000000000000010 > > PC is at : exports_net_open+0x50/0x68 [nfsd] > > Call trace: > exports_net_open+0x50/0x68 [nfsd] > exports_proc_open+0x2c/0x38 [nfsd] > proc_reg_open+0xb8/0x198 > do_dentry_open+0x1c4/0x418 > vfs_open+0x38/0x48 > path_openat+0x28c/0xf18 > do_filp_open+0x70/0xe8 > do_sys_open+0x154/0x248 > > Sometimes it crashes at exports_net_open() and sometimes cache_seq_next_rcu(). > > and same is happening on latest 6.14 kernel as well: > > [ 0.000000] Linux version 6.14.0-rc5-next-20250304-dirty > ... > [ 285.455918] Unable to handle kernel paging request at virtual address 00001f4800001f48 > ... > [ 285.464902] pc : cache_seq_next_rcu+0x78/0xa4 > ... > [ 285.469695] Call trace: > [ 285.470083] cache_seq_next_rcu+0x78/0xa4 (P) > [ 285.470488] seq_read+0xe0/0x11c > [ 285.470675] proc_reg_read+0x9c/0xf0 > [ 285.470874] vfs_read+0xc4/0x2fc > [ 285.471057] ksys_read+0x6c/0xf4 > [ 285.471231] __arm64_sys_read+0x1c/0x28 > [ 285.471428] invoke_syscall+0x44/0x100 > [ 285.471633] el0_svc_common.constprop.0+0x40/0xe0 > [ 285.471870] do_el0_svc_compat+0x1c/0x34 > [ 285.472073] el0_svc_compat+0x2c/0x80 > [ 285.472265] el0t_32_sync_handler+0x90/0x140 > [ 285.472473] el0t_32_sync+0x19c/0x1a0 > [ 285.472887] Code: f9400885 93407c23 937d7c27 11000421 (f86378a3) > [ 285.473422] ---[ end trace 0000000000000000 ]--- > > It reproduced simply with below script: > while [ 1 ] > do > /exportfs -r > done & > > while [ 1 ] > do > insmod /nfsd.ko > mount -t nfsd none /proc/fs/nfsd > umount /proc/fs/nfsd > rmmod nfsd > done & > > So exporting interfaces to user space shall be done at last and > cleanup at first place. > > With change there is no Kernel OOPs. > > Co-developed-by: Shubham Rana <s9.rana@samsung.com> > Signed-off-by: Shubham Rana <s9.rana@samsung.com> > Signed-off-by: Maninder Singh <maninder1.s@samsung.com> > --- > fs/nfsd/nfsctl.c | 17 ++++++++--------- > 1 file changed, 8 insertions(+), 9 deletions(-) > > diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c > index d773481bcf10..f9763ced743d 100644 > --- a/fs/nfsd/nfsctl.c > +++ b/fs/nfsd/nfsctl.c > @@ -2291,12 +2291,9 @@ static int __init init_nfsd(void) > if (retval) > goto out_free_pnfs; > nfsd_lockd_init(); /* lockd->nfsd callbacks */ > - retval = create_proc_exports_entry(); > - if (retval) > - goto out_free_lockd; > retval = register_pernet_subsys(&nfsd_net_ops); > if (retval < 0) > - goto out_free_exports; > + goto out_free_lockd; > retval = register_cld_notifier(); > if (retval) > goto out_free_subsys; > @@ -2307,12 +2304,17 @@ static int __init init_nfsd(void) > if (retval) > goto out_free_nfsd4; > retval = genl_register_family(&nfsd_nl_family); > + if (retval) > + goto out_free_filesystem; > + retval = create_proc_exports_entry(); > if (retval) > goto out_free_all; > nfsd_localio_ops_init(); > > return 0; > out_free_all: > + genl_unregister_family(&nfsd_nl_family); > +out_free_filesystem: > unregister_filesystem(&nfsd_fs_type); > out_free_nfsd4: > nfsd4_destroy_laundry_wq(); > @@ -2320,9 +2322,6 @@ static int __init init_nfsd(void) > unregister_cld_notifier(); > out_free_subsys: > unregister_pernet_subsys(&nfsd_net_ops); > -out_free_exports: > - remove_proc_entry("fs/nfs/exports", NULL); > - remove_proc_entry("fs/nfs", NULL); > out_free_lockd: > nfsd_lockd_shutdown(); > nfsd_drc_slab_free(); > @@ -2335,14 +2334,14 @@ static int __init init_nfsd(void) > > static void __exit exit_nfsd(void) > { > + remove_proc_entry("fs/nfs/exports", NULL); > + remove_proc_entry("fs/nfs", NULL); > genl_unregister_family(&nfsd_nl_family); > unregister_filesystem(&nfsd_fs_type); > nfsd4_destroy_laundry_wq(); > unregister_cld_notifier(); > unregister_pernet_subsys(&nfsd_net_ops); > nfsd_drc_slab_free(); > - remove_proc_entry("fs/nfs/exports", NULL); > - remove_proc_entry("fs/nfs", NULL); > nfsd_lockd_shutdown(); > nfsd4_free_slabs(); > nfsd4_exit_pnfs(); To make sure I understand, the race is that sometimes the exports interface gets created before the net namespace is set up, and then that causes GPFs when exports_net_open tries to access the nfsd_net? Thanks,
Hi, > > As of now nfsd calls create_proc_exports_entry() at start of init_nfsd > > and cleanup by remove_proc_entry() at last of exit_nfsd. > > > > Which causes kernel OOPs if there is race between below 2 operations: > > (i) exportfs -r > > (ii) mount -t nfsd none /proc/fs/nfsd > > > > for 5.4 kernel ARM64: > > > > CPU 1: > > el1_irq+0xbc/0x180 > > arch_counter_get_cntvct+0x14/0x18 > > running_clock+0xc/0x18 > > preempt_count_add+0x88/0x110 > > prep_new_page+0xb0/0x220 > > get_page_from_freelist+0x2d8/0x1778 > > __alloc_pages_nodemask+0x15c/0xef0 > > __vmalloc_node_range+0x28c/0x478 > > __vmalloc_node_flags_caller+0x8c/0xb0 > > kvmalloc_node+0x88/0xe0 > > nfsd_init_net+0x6c/0x108 [nfsd] > > ops_init+0x44/0x170 > > register_pernet_operations+0x114/0x270 > > register_pernet_subsys+0x34/0x50 > > init_nfsd+0xa8/0x718 [nfsd] > > do_one_initcall+0x54/0x2e0 > > > > CPU 2 : > > Unable to handle kernel NULL pointer dereference at virtual address 0000000000000010 > > > > PC is at : exports_net_open+0x50/0x68 [nfsd] > > > > Call trace: > > exports_net_open+0x50/0x68 [nfsd] > > exports_proc_open+0x2c/0x38 [nfsd] > > proc_reg_open+0xb8/0x198 > > do_dentry_open+0x1c4/0x418 > > vfs_open+0x38/0x48 > > path_openat+0x28c/0xf18 > > do_filp_open+0x70/0xe8 > > do_sys_open+0x154/0x248 > To make sure I understand, the race is that sometimes the exports > interface gets created before the net namespace is set up, and then > that causes GPFs when exports_net_open tries to access the nfsd_net? > Yes, Sometime at time of module init this happened as I shared state of 2 CPUs at time of crash. and sometimes it occurs when module was unloading and user space was accessing it. So I though interface to user shall be exported late during init and clean up early. But what is actual position for that I was not sure, So I moved to last at time of init and first at time of clean up. And originally at time of 4.13 kernel this cleanup was the first thing to do in exit time. but with time to fixing other issues, its position got changed. https://web.git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git/commit/?id=027690c75e8fd91b60a634d31c4891a6e39d45bd https://web.git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git/commit/?id=bd5ae9288d6451bd346a1b4a59d4fe7e62ba29b7 https://web.git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git/commit/?id=6f6f84aa215f7b6665ccbb937db50860f9ec2989 Which caused this kernel OOPs I think. Thanks, Maninder Singh
On Thu, 2025-03-06 at 14:50 +0530, Maninder Singh wrote: > As of now nfsd calls create_proc_exports_entry() at start of init_nfsd > and cleanup by remove_proc_entry() at last of exit_nfsd. > > Which causes kernel OOPs if there is race between below 2 operations: > (i) exportfs -r > (ii) mount -t nfsd none /proc/fs/nfsd > > for 5.4 kernel ARM64: > > CPU 1: > el1_irq+0xbc/0x180 > arch_counter_get_cntvct+0x14/0x18 > running_clock+0xc/0x18 > preempt_count_add+0x88/0x110 > prep_new_page+0xb0/0x220 > get_page_from_freelist+0x2d8/0x1778 > __alloc_pages_nodemask+0x15c/0xef0 > __vmalloc_node_range+0x28c/0x478 > __vmalloc_node_flags_caller+0x8c/0xb0 > kvmalloc_node+0x88/0xe0 > nfsd_init_net+0x6c/0x108 [nfsd] > ops_init+0x44/0x170 > register_pernet_operations+0x114/0x270 > register_pernet_subsys+0x34/0x50 > init_nfsd+0xa8/0x718 [nfsd] > do_one_initcall+0x54/0x2e0 > > CPU 2 : > Unable to handle kernel NULL pointer dereference at virtual address 0000000000000010 > > PC is at : exports_net_open+0x50/0x68 [nfsd] > > Call trace: > exports_net_open+0x50/0x68 [nfsd] > exports_proc_open+0x2c/0x38 [nfsd] > proc_reg_open+0xb8/0x198 > do_dentry_open+0x1c4/0x418 > vfs_open+0x38/0x48 > path_openat+0x28c/0xf18 > do_filp_open+0x70/0xe8 > do_sys_open+0x154/0x248 > > Sometimes it crashes at exports_net_open() and sometimes cache_seq_next_rcu(). > > and same is happening on latest 6.14 kernel as well: > > [ 0.000000] Linux version 6.14.0-rc5-next-20250304-dirty > ... > [ 285.455918] Unable to handle kernel paging request at virtual address 00001f4800001f48 > ... > [ 285.464902] pc : cache_seq_next_rcu+0x78/0xa4 > ... > [ 285.469695] Call trace: > [ 285.470083] cache_seq_next_rcu+0x78/0xa4 (P) > [ 285.470488] seq_read+0xe0/0x11c > [ 285.470675] proc_reg_read+0x9c/0xf0 > [ 285.470874] vfs_read+0xc4/0x2fc > [ 285.471057] ksys_read+0x6c/0xf4 > [ 285.471231] __arm64_sys_read+0x1c/0x28 > [ 285.471428] invoke_syscall+0x44/0x100 > [ 285.471633] el0_svc_common.constprop.0+0x40/0xe0 > [ 285.471870] do_el0_svc_compat+0x1c/0x34 > [ 285.472073] el0_svc_compat+0x2c/0x80 > [ 285.472265] el0t_32_sync_handler+0x90/0x140 > [ 285.472473] el0t_32_sync+0x19c/0x1a0 > [ 285.472887] Code: f9400885 93407c23 937d7c27 11000421 (f86378a3) > [ 285.473422] ---[ end trace 0000000000000000 ]--- > > It reproduced simply with below script: > while [ 1 ] > do > /exportfs -r > done & > > while [ 1 ] > do > insmod /nfsd.ko > mount -t nfsd none /proc/fs/nfsd > umount /proc/fs/nfsd > rmmod nfsd > done & > > So exporting interfaces to user space shall be done at last and > cleanup at first place. > > With change there is no Kernel OOPs. > > Co-developed-by: Shubham Rana <s9.rana@samsung.com> > Signed-off-by: Shubham Rana <s9.rana@samsung.com> > Signed-off-by: Maninder Singh <maninder1.s@samsung.com> > --- > fs/nfsd/nfsctl.c | 17 ++++++++--------- > 1 file changed, 8 insertions(+), 9 deletions(-) > > diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c > index d773481bcf10..f9763ced743d 100644 > --- a/fs/nfsd/nfsctl.c > +++ b/fs/nfsd/nfsctl.c > @@ -2291,12 +2291,9 @@ static int __init init_nfsd(void) > if (retval) > goto out_free_pnfs; > nfsd_lockd_init(); /* lockd->nfsd callbacks */ > - retval = create_proc_exports_entry(); > - if (retval) > - goto out_free_lockd; > retval = register_pernet_subsys(&nfsd_net_ops); > if (retval < 0) > - goto out_free_exports; > + goto out_free_lockd; > retval = register_cld_notifier(); > if (retval) > goto out_free_subsys; > @@ -2307,12 +2304,17 @@ static int __init init_nfsd(void) > if (retval) > goto out_free_nfsd4; > retval = genl_register_family(&nfsd_nl_family); > + if (retval) > + goto out_free_filesystem; > + retval = create_proc_exports_entry(); > if (retval) > goto out_free_all; > nfsd_localio_ops_init(); > > return 0; > out_free_all: > + genl_unregister_family(&nfsd_nl_family); > +out_free_filesystem: > unregister_filesystem(&nfsd_fs_type); > out_free_nfsd4: > nfsd4_destroy_laundry_wq(); > @@ -2320,9 +2322,6 @@ static int __init init_nfsd(void) > unregister_cld_notifier(); > out_free_subsys: > unregister_pernet_subsys(&nfsd_net_ops); > -out_free_exports: > - remove_proc_entry("fs/nfs/exports", NULL); > - remove_proc_entry("fs/nfs", NULL); > out_free_lockd: > nfsd_lockd_shutdown(); > nfsd_drc_slab_free(); > @@ -2335,14 +2334,14 @@ static int __init init_nfsd(void) > > static void __exit exit_nfsd(void) > { > + remove_proc_entry("fs/nfs/exports", NULL); > + remove_proc_entry("fs/nfs", NULL); > genl_unregister_family(&nfsd_nl_family); > unregister_filesystem(&nfsd_fs_type); > nfsd4_destroy_laundry_wq(); > unregister_cld_notifier(); > unregister_pernet_subsys(&nfsd_net_ops); > nfsd_drc_slab_free(); > - remove_proc_entry("fs/nfs/exports", NULL); > - remove_proc_entry("fs/nfs", NULL); > nfsd_lockd_shutdown(); > nfsd4_free_slabs(); > nfsd4_exit_pnfs(); Reviewed-by: Jeff Layton <jlayton@kernel.org>
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index d773481bcf10..f9763ced743d 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -2291,12 +2291,9 @@ static int __init init_nfsd(void) if (retval) goto out_free_pnfs; nfsd_lockd_init(); /* lockd->nfsd callbacks */ - retval = create_proc_exports_entry(); - if (retval) - goto out_free_lockd; retval = register_pernet_subsys(&nfsd_net_ops); if (retval < 0) - goto out_free_exports; + goto out_free_lockd; retval = register_cld_notifier(); if (retval) goto out_free_subsys; @@ -2307,12 +2304,17 @@ static int __init init_nfsd(void) if (retval) goto out_free_nfsd4; retval = genl_register_family(&nfsd_nl_family); + if (retval) + goto out_free_filesystem; + retval = create_proc_exports_entry(); if (retval) goto out_free_all; nfsd_localio_ops_init(); return 0; out_free_all: + genl_unregister_family(&nfsd_nl_family); +out_free_filesystem: unregister_filesystem(&nfsd_fs_type); out_free_nfsd4: nfsd4_destroy_laundry_wq(); @@ -2320,9 +2322,6 @@ static int __init init_nfsd(void) unregister_cld_notifier(); out_free_subsys: unregister_pernet_subsys(&nfsd_net_ops); -out_free_exports: - remove_proc_entry("fs/nfs/exports", NULL); - remove_proc_entry("fs/nfs", NULL); out_free_lockd: nfsd_lockd_shutdown(); nfsd_drc_slab_free(); @@ -2335,14 +2334,14 @@ static int __init init_nfsd(void) static void __exit exit_nfsd(void) { + remove_proc_entry("fs/nfs/exports", NULL); + remove_proc_entry("fs/nfs", NULL); genl_unregister_family(&nfsd_nl_family); unregister_filesystem(&nfsd_fs_type); nfsd4_destroy_laundry_wq(); unregister_cld_notifier(); unregister_pernet_subsys(&nfsd_net_ops); nfsd_drc_slab_free(); - remove_proc_entry("fs/nfs/exports", NULL); - remove_proc_entry("fs/nfs", NULL); nfsd_lockd_shutdown(); nfsd4_free_slabs(); nfsd4_exit_pnfs();