diff mbox series

[v4,05/11] NFS: add superblock sysfs entries

Message ID 095dda5e682c8367335b9fa448f2834b9435ee69.1686851158.git.bcodding@redhat.com (mailing list archive)
State New, archived
Headers show
Series NFS sysfs scaffolding | expand

Commit Message

Benjamin Coddington June 15, 2023, 6:07 p.m. UTC
Create a sysfs directory for each mount that corresponds to the mount's
nfs_server struct.  As the mount is being constructed, use the name
"server-n", but rename it to the "MAJOR:MINOR" of the mount after assigning
a device_id. The rename approach allows us to populate the mount's directory
with links to the various rpc_client objects during the mount's
construction.  The naming convention (MAJOR:MINOR) can be used to reference
a particular NFS mount's sysfs tree.

Signed-off-by: Benjamin Coddington <bcodding@redhat.com>
---
 fs/nfs/client.c           | 16 +++++++++++
 fs/nfs/nfs4client.c       |  3 ++
 fs/nfs/super.c            |  6 +++-
 fs/nfs/sysfs.c            | 59 +++++++++++++++++++++++++++++++++++++++
 fs/nfs/sysfs.h            |  5 ++++
 include/linux/nfs_fs_sb.h |  2 ++
 6 files changed, 90 insertions(+), 1 deletion(-)

Comments

Nathan Chancellor June 26, 2023, 9:12 p.m. UTC | #1
Hi Benjamin,

On Thu, Jun 15, 2023 at 02:07:26PM -0400, Benjamin Coddington wrote:
> Create a sysfs directory for each mount that corresponds to the mount's
> nfs_server struct.  As the mount is being constructed, use the name
> "server-n", but rename it to the "MAJOR:MINOR" of the mount after assigning
> a device_id. The rename approach allows us to populate the mount's directory
> with links to the various rpc_client objects during the mount's
> construction.  The naming convention (MAJOR:MINOR) can be used to reference
> a particular NFS mount's sysfs tree.
> 
> Signed-off-by: Benjamin Coddington <bcodding@redhat.com>

I am not sure if this has been reported or fixed already, so I apologize
if this is a duplicate. After this change landed in -next as commit
1c7251187dc0 ("NFS: add superblock sysfs entries"), I see the following
splat when accessing a NFS server:

[   21.180403] RPC: Registered named UNIX socket transport module.
[   21.180407] RPC: Registered udp transport module.
[   21.180407] RPC: Registered tcp transport module.
[   21.180408] RPC: Registered tcp-with-tls transport module.
[   21.180408] RPC: Registered tcp NFSv4.1 backchannel transport module.
[   21.210652] Key type dns_resolver registered
[   21.297136] NFS: Registering the id_resolver key type
[   21.297140] Key type id_resolver registered
[   21.297140] Key type id_legacy registered
[   21.322237] ------------[ cut here ]------------
[   21.322238] kobject: '(null)' (000000000804ef51): is not initialized, yet kobject_put() is being called.
[   21.322246] WARNING: CPU: 0 PID: 2501 at lib/kobject.c:728 kobject_put+0xc1/0x1d0
[   21.322251] Modules linked in: rpcsec_gss_krb5 auth_rpcgss nfsv4 dns_resolver nfs lockd grace sunrpc fscache netfs overlay xt_mark snd_seq_dummy snd_hrtimer snd_seq snd_seq_device tun hid_logitech_hidpp mousedev joydev xt_CHECKSUM xt_MASQUERADE xt_conntrack ipt_REJECT nf_reject_ipv4 xt_tcpudp nft_compat nft_chain_nat nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 nf_tables nfnetlink bridge stp llc hid_logitech_dj hid_razer snd_hda_codec_hdmi snd_hda_codec_realtek snd_hda_codec_generic vfat fat snd_sof_pci_intel_tgl snd_sof_intel_hda_common snd_soc_hdac_hda snd_sof_pci snd_sof_xtensa_dsp snd_sof_intel_hda_mlink snd_sof_intel_hda intel_rapl_msr snd_sof intel_rapl_common x86_pkg_temp_thermal snd_sof_utils intel_powerclamp snd_hda_ext_core i915 snd_soc_acpi_intel_match snd_soc_acpi snd_soc_core coretemp snd_compress eeepc_wmi kvm_intel asus_wmi snd_hda_intel i2c_algo_bit mei_hdcp mei_pxp ledtrig_audio usbhid snd_intel_dspcfg drm_buddy sparse_keymap kvm snd_hda_codec iTCO_wdt platform_profile ee1004 intel_pmc_bxt rfkill
[   21.322304]  snd_hwdep intel_gtt iTCO_vendor_support irqbypass wmi_bmof snd_hda_core crct10dif_pclmul drm_display_helper crc32_pclmul polyval_clmulni polyval_generic drm_kms_helper snd_pcm gf128mul syscopyarea ghash_clmulni_intel sysfillrect sha512_ssse3 sysimgblt snd_timer aesni_intel video crypto_simd cryptd intel_cstate intel_uncore spi_nor snd intel_lpss_pci mei_me pcspkr intel_lpss cec i2c_i801 e1000e mtd i2c_smbus mei soundcore ttm idma64 wmi acpi_pad acpi_tad mac_hid pkcs8_key_parser dm_multipath drm crypto_user fuse dm_mod loop zram bpf_preload ip_tables x_tables nvme nvme_core spi_intel_pci xhci_pci spi_intel xhci_pci_renesas nvme_common btrfs blake2b_generic libcrc32c crc32c_generic crc32c_intel xor raid6_pq
[   21.322331] CPU: 0 PID: 2501 Comm: mount.nfs Not tainted 6.4.0-rc7-debug-00022-g1c7251187dc0 #1 650a78de916f9c7b93e3e16eabf8ef901a04fe73
[   21.322333] Hardware name: ASUS System Product Name/PRIME Z590M-PLUS, BIOS 1203 10/27/2021
[   21.322334] RIP: 0010:kobject_put+0xc1/0x1d0
[   21.322347] Code: 00 4c 89 ef e8 00 c8 65 ff 48 85 db 74 9a f6 43 3c 01 0f 85 78 ff ff ff 48 8b 33 48 89 da 48 c7 c7 e0 91 a6 90 e8 7f 7c 40 ff <0f> 0b e9 5f ff ff ff c3 cc cc cc cc 4d 89 f1 49 c7 c0 d0 9b 61 90
[   21.322349] RSP: 0018:ffffa94f44e93d50 EFLAGS: 00010286
[   21.322350] RAX: 0000000000000000 RBX: ffff93d852edfc48 RCX: 0000000000000027
[   21.322351] RDX: ffff93df7f421688 RSI: 0000000000000001 RDI: ffff93df7f421680
[   21.322352] RBP: 00000000ffffffff R08: 0000000000000000 R09: ffffa94f44e93be0
[   21.322353] R10: 0000000000000003 R11: ffffffff910ca808 R12: 00000000ffffffa3
[   21.322353] R13: ffffa94f44e93d90 R14: ffff93d8a96c1000 R15: 0000000000000000
[   21.322354] FS:  00007f6f56579740(0000) GS:ffff93df7f400000(0000) knlGS:0000000000000000
[   21.322355] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[   21.322356] CR2: 00007f0c3e6eaa48 CR3: 00000001793a2005 CR4: 0000000000770ef0
[   21.322357] PKRU: 55555554
[   21.322358] Call Trace:
[   21.322359]  <TASK>
[   21.322360]  ? kobject_put+0xc1/0x1d0
[   21.322362]  ? __warn+0x81/0x130
[   21.322365]  ? kobject_put+0xc1/0x1d0
[   21.322367]  ? report_bug+0x171/0x1a0
[   21.322376]  ? prb_read_valid+0x1b/0x30
[   21.322379]  ? handle_bug+0x3c/0x80
[   21.322382]  ? exc_invalid_op+0x17/0x70
[   21.322384]  ? asm_exc_invalid_op+0x1a/0x20
[   21.322387]  ? kobject_put+0xc1/0x1d0
[   21.322390]  nfs_free_server+0x6b/0xe0 [nfs 624204e86a6049542d6fb7844efc3f7da8452ecc]
[   21.322408]  nfs4_create_server+0x234/0x350 [nfsv4 c1cc88cc5dfaa7ab85ce33d7e154b7db9d5a5f0d]
[   21.322443]  nfs4_try_get_tree+0x37/0xd0 [nfsv4 c1cc88cc5dfaa7ab85ce33d7e154b7db9d5a5f0d]
[   21.322470]  vfs_get_tree+0x26/0xd0
[   21.322473]  path_mount+0x4a1/0xae0
[   21.322476]  __x64_sys_mount+0x11a/0x150
[   21.322477]  do_syscall_64+0x5d/0x90
[   21.322480]  ? syscall_exit_to_user_mode+0x1b/0x40
[   21.322482]  ? do_syscall_64+0x6c/0x90
[   21.322484]  ? exc_page_fault+0x7f/0x180
[   21.322486]  entry_SYSCALL_64_after_hwframe+0x72/0xdc
[   21.322489] RIP: 0033:0x7f6f5684f08e
[   21.322509] Code: 48 8b 0d cd ec 0c 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 90 f3 0f 1e fa 49 89 ca b8 a5 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 9a ec 0c 00 f7 d8 64 89 01 48
[   21.322510] RSP: 002b:00007ffc781debe8 EFLAGS: 00000246 ORIG_RAX: 00000000000000a5
[   21.322512] RAX: ffffffffffffffda RBX: 00007ffc781dee10 RCX: 00007f6f5684f08e
[   21.322512] RDX: 0000560364a99a10 RSI: 0000560364a999f0 RDI: 0000560364a99880
[   21.322513] RBP: 0000560364a99da0 R08: 0000560364a99da0 R09: 0000000000000060
[   21.322514] R10: 0000000000000000 R11: 0000000000000246 R12: 00007ffc781dee10
[   21.322514] R13: 0000560364a99d60 R14: 0000000000000004 R15: 0000560362f49927
[   21.322516]  </TASK>
[   21.322517] ---[ end trace 0000000000000000 ]---
[   21.322518] ------------[ cut here ]------------
[   21.322518] refcount_t: underflow; use-after-free.
[   21.322522] WARNING: CPU: 0 PID: 2501 at lib/refcount.c:28 refcount_warn_saturate+0xbe/0x110
[   21.322525] Modules linked in: rpcsec_gss_krb5 auth_rpcgss nfsv4 dns_resolver nfs lockd grace sunrpc fscache netfs overlay xt_mark snd_seq_dummy snd_hrtimer snd_seq snd_seq_device tun hid_logitech_hidpp mousedev joydev xt_CHECKSUM xt_MASQUERADE xt_conntrack ipt_REJECT nf_reject_ipv4 xt_tcpudp nft_compat nft_chain_nat nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 nf_tables nfnetlink bridge stp llc hid_logitech_dj hid_razer snd_hda_codec_hdmi snd_hda_codec_realtek snd_hda_codec_generic vfat fat snd_sof_pci_intel_tgl snd_sof_intel_hda_common snd_soc_hdac_hda snd_sof_pci snd_sof_xtensa_dsp snd_sof_intel_hda_mlink snd_sof_intel_hda intel_rapl_msr snd_sof intel_rapl_common x86_pkg_temp_thermal snd_sof_utils intel_powerclamp snd_hda_ext_core i915 snd_soc_acpi_intel_match snd_soc_acpi snd_soc_core coretemp snd_compress eeepc_wmi kvm_intel asus_wmi snd_hda_intel i2c_algo_bit mei_hdcp mei_pxp ledtrig_audio usbhid snd_intel_dspcfg drm_buddy sparse_keymap kvm snd_hda_codec iTCO_wdt platform_profile ee1004 intel_pmc_bxt rfkill
[   21.322551]  snd_hwdep intel_gtt iTCO_vendor_support irqbypass wmi_bmof snd_hda_core crct10dif_pclmul drm_display_helper crc32_pclmul polyval_clmulni polyval_generic drm_kms_helper snd_pcm gf128mul syscopyarea ghash_clmulni_intel sysfillrect sha512_ssse3 sysimgblt snd_timer aesni_intel video crypto_simd cryptd intel_cstate intel_uncore spi_nor snd intel_lpss_pci mei_me pcspkr intel_lpss cec i2c_i801 e1000e mtd i2c_smbus mei soundcore ttm idma64 wmi acpi_pad acpi_tad mac_hid pkcs8_key_parser dm_multipath drm crypto_user fuse dm_mod loop zram bpf_preload ip_tables x_tables nvme nvme_core spi_intel_pci xhci_pci spi_intel xhci_pci_renesas nvme_common btrfs blake2b_generic libcrc32c crc32c_generic crc32c_intel xor raid6_pq
[   21.322573] CPU: 0 PID: 2501 Comm: mount.nfs Tainted: G        W          6.4.0-rc7-debug-00022-g1c7251187dc0 #1 650a78de916f9c7b93e3e16eabf8ef901a04fe73
[   21.322574] Hardware name: ASUS System Product Name/PRIME Z590M-PLUS, BIOS 1203 10/27/2021
[   21.322575] RIP: 0010:refcount_warn_saturate+0xbe/0x110
[   21.322576] Code: 01 01 e8 d5 e0 a9 ff 0f 0b c3 cc cc cc cc 80 3d 82 f5 7a 01 00 75 85 48 c7 c7 f0 eb a0 90 c6 05 72 f5 7a 01 01 e8 b2 e0 a9 ff <0f> 0b c3 cc cc cc cc 80 3d 60 f5 7a 01 00 0f 85 5e ff ff ff 48 c7
[   21.322577] RSP: 0018:ffffa94f44e93d78 EFLAGS: 00010286
[   21.322578] RAX: 0000000000000000 RBX: ffff93d852edf800 RCX: 0000000000000027
[   21.322579] RDX: ffff93df7f421688 RSI: 0000000000000001 RDI: ffff93df7f421680
[   21.322580] RBP: ffffffffffffffa3 R08: 0000000000000000 R09: ffffa94f44e93c08
[   21.322580] R10: 0000000000000003 R11: ffffffff910ca808 R12: 00000000ffffffa3
[   21.322581] R13: ffffa94f44e93d90 R14: ffff93d8a96c1000 R15: 0000000000000000
[   21.322582] FS:  00007f6f56579740(0000) GS:ffff93df7f400000(0000) knlGS:0000000000000000
[   21.322583] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[   21.322584] CR2: 00007f0c3e6eaa48 CR3: 00000001793a2005 CR4: 0000000000770ef0
[   21.322584] PKRU: 55555554
[   21.322585] Call Trace:
[   21.322585]  <TASK>
[   21.322586]  ? refcount_warn_saturate+0xbe/0x110
[   21.322587]  ? __warn+0x81/0x130
[   21.322589]  ? refcount_warn_saturate+0xbe/0x110
[   21.322591]  ? report_bug+0x171/0x1a0
[   21.322592]  ? prb_read_valid+0x1b/0x30
[   21.322595]  ? handle_bug+0x3c/0x80
[   21.322597]  ? exc_invalid_op+0x17/0x70
[   21.322599]  ? asm_exc_invalid_op+0x1a/0x20
[   21.322600]  ? refcount_warn_saturate+0xbe/0x110
[   21.322602]  ? refcount_warn_saturate+0xbe/0x110
[   21.322604]  nfs_free_server+0x6b/0xe0 [nfs 624204e86a6049542d6fb7844efc3f7da8452ecc]
[   21.322621]  nfs4_create_server+0x234/0x350 [nfsv4 c1cc88cc5dfaa7ab85ce33d7e154b7db9d5a5f0d]
[   21.322650]  nfs4_try_get_tree+0x37/0xd0 [nfsv4 c1cc88cc5dfaa7ab85ce33d7e154b7db9d5a5f0d]
[   21.322675]  vfs_get_tree+0x26/0xd0
[   21.322677]  path_mount+0x4a1/0xae0
[   21.322679]  __x64_sys_mount+0x11a/0x150
[   21.322681]  do_syscall_64+0x5d/0x90
[   21.322683]  ? syscall_exit_to_user_mode+0x1b/0x40
[   21.322685]  ? do_syscall_64+0x6c/0x90
[   21.322686]  ? exc_page_fault+0x7f/0x180
[   21.322688]  entry_SYSCALL_64_after_hwframe+0x72/0xdc
[   21.322691] RIP: 0033:0x7f6f5684f08e
[   21.322696] Code: 48 8b 0d cd ec 0c 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 90 f3 0f 1e fa 49 89 ca b8 a5 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 9a ec 0c 00 f7 d8 64 89 01 48
[   21.322697] RSP: 002b:00007ffc781debe8 EFLAGS: 00000246 ORIG_RAX: 00000000000000a5
[   21.322698] RAX: ffffffffffffffda RBX: 00007ffc781dee10 RCX: 00007f6f5684f08e
[   21.322699] RDX: 0000560364a99a10 RSI: 0000560364a999f0 RDI: 0000560364a99880
[   21.322699] RBP: 0000560364a99da0 R08: 0000560364a99da0 R09: 0000000000000060
[   21.322700] R10: 0000000000000000 R11: 0000000000000246 R12: 00007ffc781dee10
[   21.322701] R13: 0000560364a99d60 R14: 0000000000000004 R15: 0000560362f49927
[   21.322702]  </TASK>
[   21.322703] ---[ end trace 0000000000000000 ]---

The configuration is just Arch Linux's (run through olddefconfig), in
case it is relevant:

https://gitlab.archlinux.org/archlinux/packaging/packages/linux/-/raw/main/config

If there is any other information I can provide or patches I can test, I
am more than happy to do so.

Cheers,
Nathan
Benjamin Coddington June 26, 2023, 9:47 p.m. UTC | #2
On 26 Jun 2023, at 17:12, Nathan Chancellor wrote:

> Hi Benjamin,
>
> On Thu, Jun 15, 2023 at 02:07:26PM -0400, Benjamin Coddington wrote:
>> Create a sysfs directory for each mount that corresponds to the mount's
>> nfs_server struct.  As the mount is being constructed, use the name
>> "server-n", but rename it to the "MAJOR:MINOR" of the mount after assigning
>> a device_id. The rename approach allows us to populate the mount's directory
>> with links to the various rpc_client objects during the mount's
>> construction.  The naming convention (MAJOR:MINOR) can be used to reference
>> a particular NFS mount's sysfs tree.
>>
>> Signed-off-by: Benjamin Coddington <bcodding@redhat.com>
>
> I am not sure if this has been reported or fixed already, so I apologize
> if this is a duplicate. After this change landed in -next as commit
> 1c7251187dc0 ("NFS: add superblock sysfs entries"), I see the following
> splat when accessing a NFS server:

Hi Nathan, oh yes - I see there are a few paths through nfs4_init_server()
where we can exit early due to an error or duplicate client, in which case
nfs_free_server() tries to clean up the server kobject before it has been
initialized.

I think we can simply do:

diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 4967ac800b14..4046072663f2 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -1013,8 +1013,10 @@ void nfs_free_server(struct nfs_server *server)

        nfs_put_client(server->nfs_client);

-       nfs_sysfs_remove_server(server);
-       kobject_put(&server->kobj);
+       if (server->kobj.state_initialized) {
+               nfs_sysfs_remove_server(server);
+               kobject_put(&server->kobj);
+       }
        ida_free(&s_sysfs_ids, server->s_sysfs_id);

        ida_destroy(&server->lockowner_id);

Are you able to test that?  If not, totally fine - I think I should be able
to reproduce the problem and send a patch.

Ben
Nathan Chancellor June 26, 2023, 10:25 p.m. UTC | #3
On Mon, Jun 26, 2023 at 05:47:21PM -0400, Benjamin Coddington wrote:
> On 26 Jun 2023, at 17:12, Nathan Chancellor wrote:
> 
> > Hi Benjamin,
> >
> > On Thu, Jun 15, 2023 at 02:07:26PM -0400, Benjamin Coddington wrote:
> >> Create a sysfs directory for each mount that corresponds to the mount's
> >> nfs_server struct.  As the mount is being constructed, use the name
> >> "server-n", but rename it to the "MAJOR:MINOR" of the mount after assigning
> >> a device_id. The rename approach allows us to populate the mount's directory
> >> with links to the various rpc_client objects during the mount's
> >> construction.  The naming convention (MAJOR:MINOR) can be used to reference
> >> a particular NFS mount's sysfs tree.
> >>
> >> Signed-off-by: Benjamin Coddington <bcodding@redhat.com>
> >
> > I am not sure if this has been reported or fixed already, so I apologize
> > if this is a duplicate. After this change landed in -next as commit
> > 1c7251187dc0 ("NFS: add superblock sysfs entries"), I see the following
> > splat when accessing a NFS server:
> 
> Hi Nathan, oh yes - I see there are a few paths through nfs4_init_server()
> where we can exit early due to an error or duplicate client, in which case
> nfs_free_server() tries to clean up the server kobject before it has been
> initialized.
> 
> I think we can simply do:
> 
> diff --git a/fs/nfs/client.c b/fs/nfs/client.c
> index 4967ac800b14..4046072663f2 100644
> --- a/fs/nfs/client.c
> +++ b/fs/nfs/client.c
> @@ -1013,8 +1013,10 @@ void nfs_free_server(struct nfs_server *server)
> 
>         nfs_put_client(server->nfs_client);
> 
> -       nfs_sysfs_remove_server(server);
> -       kobject_put(&server->kobj);
> +       if (server->kobj.state_initialized) {
> +               nfs_sysfs_remove_server(server);
> +               kobject_put(&server->kobj);
> +       }
>         ida_free(&s_sysfs_ids, server->s_sysfs_id);
> 
>         ida_destroy(&server->lockowner_id);
> 
> Are you able to test that?  If not, totally fine - I think I should be able
> to reproduce the problem and send a patch.

Yes, that appears to work for me! Feel free to add

Tested-by: Nathan Chancellor <nathan@kernel.org>

to that diff if you send it along formally.

Cheers,
Nathan
diff mbox series

Patch

diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index f50e025ae406..9aea938e4bf2 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -690,6 +690,7 @@  static int nfs_init_server(struct nfs_server *server,
 		return PTR_ERR(clp);
 
 	server->nfs_client = clp;
+	nfs_sysfs_add_server(server);
 
 	/* Initialise the client representation from the mount data */
 	server->flags = ctx->flags;
@@ -944,6 +945,8 @@  void nfs_server_remove_lists(struct nfs_server *server)
 }
 EXPORT_SYMBOL_GPL(nfs_server_remove_lists);
 
+static DEFINE_IDA(s_sysfs_ids);
+
 /*
  * Allocate and initialise a server record
  */
@@ -955,6 +958,12 @@  struct nfs_server *nfs_alloc_server(void)
 	if (!server)
 		return NULL;
 
+	server->s_sysfs_id = ida_alloc(&s_sysfs_ids, GFP_KERNEL);
+	if (server->s_sysfs_id < 0) {
+		kfree(server);
+		return NULL;
+	}
+
 	server->client = server->client_acl = ERR_PTR(-EINVAL);
 
 	/* Zero out the NFS state stuff */
@@ -1001,6 +1010,10 @@  void nfs_free_server(struct nfs_server *server)
 
 	nfs_put_client(server->nfs_client);
 
+	nfs_sysfs_remove_server(server);
+	kobject_put(&server->kobj);
+	ida_free(&s_sysfs_ids, server->s_sysfs_id);
+
 	ida_destroy(&server->lockowner_id);
 	ida_destroy(&server->openowner_id);
 	nfs_free_iostats(server->io_stats);
@@ -1102,6 +1115,8 @@  struct nfs_server *nfs_clone_server(struct nfs_server *source,
 
 	server->fsid = fattr->fsid;
 
+	nfs_sysfs_add_server(server);
+
 	error = nfs_init_server_rpcclient(server,
 			source->client->cl_timeout,
 			flavor);
@@ -1385,6 +1400,7 @@  int __init nfs_fs_proc_init(void)
 void nfs_fs_proc_exit(void)
 {
 	remove_proc_subtree("fs/nfsfs", NULL);
+	ida_destroy(&s_sysfs_ids);
 }
 
 #endif /* CONFIG_PROC_FS */
diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
index d3051b051a56..41c75602ff76 100644
--- a/fs/nfs/nfs4client.c
+++ b/fs/nfs/nfs4client.c
@@ -18,6 +18,7 @@ 
 #include "nfs4idmap.h"
 #include "pnfs.h"
 #include "netns.h"
+#include "sysfs.h"
 
 #define NFSDBG_FACILITY		NFSDBG_CLIENT
 
@@ -947,6 +948,8 @@  static int nfs4_set_client(struct nfs_server *server,
 	set_bit(NFS_CS_CHECK_LEASE_TIME, &clp->cl_res_state);
 
 	server->nfs_client = clp;
+	nfs_sysfs_add_server(server);
+
 	return 0;
 }
 
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 05ae23657527..40a866db7965 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -68,6 +68,8 @@ 
 #include "nfs4session.h"
 #include "pnfs.h"
 #include "nfs.h"
+#include "netns.h"
+#include "sysfs.h"
 
 #define NFSDBG_FACILITY		NFSDBG_VFS
 
@@ -1088,6 +1090,7 @@  static void nfs_fill_super(struct super_block *sb, struct nfs_fs_context *ctx)
 						 &sb->s_blocksize_bits);
 
 	nfs_super_set_maxbytes(sb, server->maxfilesize);
+	nfs_sysfs_move_server_to_sb(sb);
 	server->has_sec_mnt_opts = ctx->has_sec_mnt_opts;
 }
 
@@ -1333,13 +1336,14 @@  int nfs_get_tree_common(struct fs_context *fc)
 }
 
 /*
- * Destroy an NFS2/3 superblock
+ * Destroy an NFS superblock
  */
 void nfs_kill_super(struct super_block *s)
 {
 	struct nfs_server *server = NFS_SB(s);
 	dev_t dev = s->s_dev;
 
+	nfs_sysfs_move_sb_to_server(server);
 	generic_shutdown_super(s);
 
 	nfs_fscache_release_super_cookie(s);
diff --git a/fs/nfs/sysfs.c b/fs/nfs/sysfs.c
index 90256a3a714e..0ff24f133a02 100644
--- a/fs/nfs/sysfs.c
+++ b/fs/nfs/sysfs.c
@@ -215,3 +215,62 @@  void nfs_netns_sysfs_destroy(struct nfs_net *netns)
 		netns->nfs_client = NULL;
 	}
 }
+
+static void nfs_sysfs_sb_release(struct kobject *kobj)
+{
+	/* no-op: why? see lib/kobject.c kobject_cleanup() */
+}
+
+static const void *nfs_netns_server_namespace(const struct kobject *kobj)
+{
+	return container_of(kobj, struct nfs_server, kobj)->nfs_client->cl_net;
+}
+
+static struct kobj_type nfs_sb_ktype = {
+	.release = nfs_sysfs_sb_release,
+	.sysfs_ops = &kobj_sysfs_ops,
+	.namespace = nfs_netns_server_namespace,
+	.child_ns_type = nfs_netns_object_child_ns_type,
+};
+
+void nfs_sysfs_add_server(struct nfs_server *server)
+{
+	int ret;
+
+	ret = kobject_init_and_add(&server->kobj, &nfs_sb_ktype,
+				&nfs_kset->kobj, "server-%d", server->s_sysfs_id);
+	if (ret < 0)
+		pr_warn("NFS: nfs sysfs add server-%d failed (%d)\n",
+					server->s_sysfs_id, ret);
+}
+EXPORT_SYMBOL_GPL(nfs_sysfs_add_server);
+
+void nfs_sysfs_move_server_to_sb(struct super_block *s)
+{
+	struct nfs_server *server = s->s_fs_info;
+	int ret;
+
+	ret = kobject_rename(&server->kobj, s->s_id);
+	if (ret < 0)
+		pr_warn("NFS: rename sysfs %s failed (%d)\n",
+					server->kobj.name, ret);
+}
+
+void nfs_sysfs_move_sb_to_server(struct nfs_server *server)
+{
+	const char *s;
+	int ret = -ENOMEM;
+
+	s = kasprintf(GFP_KERNEL, "server-%d", server->s_sysfs_id);
+	if (s)
+		ret = kobject_rename(&server->kobj, s);
+	if (ret < 0)
+		pr_warn("NFS: rename sysfs %s failed (%d)\n",
+					server->kobj.name, ret);
+}
+
+/* unlink, not dec-ref */
+void nfs_sysfs_remove_server(struct nfs_server *server)
+{
+	kobject_del(&server->kobj);
+}
diff --git a/fs/nfs/sysfs.h b/fs/nfs/sysfs.h
index dc4cc9809d1b..c9f5e3677eb5 100644
--- a/fs/nfs/sysfs.h
+++ b/fs/nfs/sysfs.h
@@ -23,4 +23,9 @@  extern void nfs_sysfs_exit(void);
 void nfs_netns_sysfs_setup(struct nfs_net *netns, struct net *net);
 void nfs_netns_sysfs_destroy(struct nfs_net *netns);
 
+void nfs_sysfs_add_server(struct nfs_server *s);
+void nfs_sysfs_move_server_to_sb(struct super_block *s);
+void nfs_sysfs_move_sb_to_server(struct nfs_server *s);
+void nfs_sysfs_remove_server(struct nfs_server *s);
+
 #endif
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index ea2f7e6b1b0b..9e71eb357e0b 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -183,6 +183,7 @@  struct nfs_server {
 				change_attr_type;/* Description of change attribute */
 
 	struct nfs_fsid		fsid;
+	int			s_sysfs_id;	/* sysfs dentry index */
 	__u64			maxfilesize;	/* maximum file size */
 	struct timespec64	time_delta;	/* smallest time granularity */
 	unsigned long		mount_time;	/* when this fs was mounted */
@@ -259,6 +260,7 @@  struct nfs_server {
 	/* User namespace info */
 	const struct cred	*cred;
 	bool			has_sec_mnt_opts;
+	struct kobject		kobj;
 };
 
 /* Server capabilities */