diff mbox series

[v3,2/2] NFSv4.1: Use the nfs_client's rpc timeouts for backchannel

Message ID 90c9365ad91e1eb0058b170fb332ea70ad554d8b.1702496910.git.bcodding@redhat.com (mailing list archive)
State New
Headers show
Series [v3,1/2] SUNRPC: Fixup v4.1 backchannel request timeouts | expand

Commit Message

Benjamin Coddington Dec. 13, 2023, 7:49 p.m. UTC
For backchannel requests that lookup the appropriate nfs_client, use the
state-management rpc_clnt's rpc_timeout parameters for the backchannel's
response.  When the nfs_client cannot be found, fall back to using the
xprt's default timeout parameters.

Signed-off-by: Benjamin Coddington <bcodding@redhat.com>
---
 fs/nfs/callback_proc.c         | 3 +++
 fs/nfs/callback_xdr.c          | 3 +++
 include/linux/sunrpc/bc_xprt.h | 3 ++-
 include/linux/sunrpc/sched.h   | 3 ++-
 include/linux/sunrpc/svc.h     | 1 +
 net/sunrpc/clnt.c              | 8 ++++++--
 net/sunrpc/svc.c               | 6 +++++-
 net/sunrpc/xprt.c              | 5 +++--
 8 files changed, 25 insertions(+), 7 deletions(-)

Comments

Anna Schumaker Jan. 3, 2024, 9:45 p.m. UTC | #1
Hi Ben,

On Wed, Dec 13, 2023 at 2:49 PM Benjamin Coddington <bcodding@redhat.com> wrote:
>
> For backchannel requests that lookup the appropriate nfs_client, use the
> state-management rpc_clnt's rpc_timeout parameters for the backchannel's
> response.  When the nfs_client cannot be found, fall back to using the
> xprt's default timeout parameters.

I'm seeing a use-after-free after applying this patch when using pNFS
and session trunking. Any idea what's going on? Here is the stack
trace I'm seeing:

[    9.935256] nfs4filelayout_init: NFSv4 File Layout Driver Registering...
[   10.914810] NFS:  192.168.122.42: Session trunking succeeded for
192.168.122.44
[   11.864302] ------------[ cut here ]------------
[   11.864305] refcount_t: addition on 0; use-after-free.
[   11.864320] WARNING: CPU: 0 PID: 742 at lib/refcount.c:25
refcount_warn_saturate+0x82/0x120
[   11.864333] Modules linked in: nfs_layout_nfsv41_files
rpcsec_gss_krb5 nfsv4 nfs fscache netfs rpcrdma rdma_cm iw_cm ib_cm
ib_cor
e cfg80211 rfkill 8021q garp stp mrp llc intel_rapl_msr
intel_rapl_common intel_uncore_frequency_common kvm_intel kvm
snd_hda_codec_
generic irqbypass crct10dif_pclmul crc32_pclmul polyval_clmulni
snd_hda_intel polyval_generic snd_intel_dspcfg gf128mul snd_hda_code
c ghash_clmulni_intel sha512_ssse3 snd_hwdep sha256_ssse3 sha1_ssse3
vfat iTCO_wdt snd_hda_core aesni_intel fat joydev snd_pcm mouse
dev crypto_simd intel_pmc_bxt cryptd snd_timer iTCO_vendor_support
usbhid rapl snd psmouse pcspkr i2c_i801 soundcore lpc_ich i2c_smb
us mac_hid nfsd nfs_acl auth_rpcgss lockd grace usbip_host usbip_core
loop fuse sunrpc dm_mod nfnetlink qemu_fw_cfg ip_tables x_tabl
es xfs libcrc32c crc32c_generic virtio_net virtio_gpu virtio_balloon
net_failover virtio_blk failover virtio_dma_buf serio_raw virti
o_console atkbd virtio_scsi libps2 virtio_rng vivaldi_fmap
crc32c_intel i8042 intel_agp virtio_pci intel_gtt
[   11.864383]  virtio_pci_legacy_dev xhci_pci serio xhci_pci_renesas
virtio_pci_modern_dev
[   11.864387] CPU: 0 PID: 742 Comm: umount.nfs Not tainted
6.7.0-rc8-g4f79a1eeea4b+ #41661
749ab84c1af8a7111125e804fccf9cd536b09ccc
[   11.864390] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009),
BIOS unknown 2/2/2022
[   11.864391] RIP: 0010:refcount_warn_saturate+0x82/0x120
[   11.864393] Code: 89 e8 32 33 a3 ff 0f 0b c3 cc cc cc cc cc 80 3d
44 b4 52 01 00 75 c0 c6 05 3b b4 52 01 01 48 c7 c7 61 ad 7d 89
e8 0e 33 a3 ff <0f> 0b c3 cc cc cc cc cc 80 3d 21 b4 52 01 00 75 9c c6
05 18 b4 52
[   11.864394] RSP: 0018:ffffa677c0b97ca8 EFLAGS: 00010246
[   11.864395] RAX: bd28f387bfec6500 RBX: ffff99e884324e00 RCX: 0000000000000027
[   11.864397] RDX: ffffa677c0b97b70 RSI: 00000000ffffefff RDI: ffff99e8fbc21748
[   11.864398] RBP: 0000000000000000 R08: 0000000000000fff R09: ffffffff89a577e0
[   11.864399] R10: 0000000000002ffd R11: 0000000000000004 R12: ffff99e88c09a000
[   11.864400] R13: 0000000000000000 R14: ffffa677c0b97cf0 R15: ffff99e884324e00
[   11.864401] FS:  00007f0f757e0740(0000) GS:ffff99e8fbc00000(0000)
knlGS:0000000000000000
[   11.864402] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[   11.864403] CR2: 000055b5ebbfdf70 CR3: 000000010c23e000 CR4: 0000000000750ef0
[   11.864406] PKRU: 55555554
[   11.864407] Call Trace:
[   11.864409]  <TASK>
[   11.864411]  ? __warn+0xc8/0x1c0
[   11.864413]  ? refcount_warn_saturate+0x82/0x120
[   11.864415]  ? report_bug+0x14e/0x1f0
[   11.864418]  ? handle_bug+0x42/0x70
[   11.864420]  ? exc_invalid_op+0x1a/0x50
[   11.864421]  ? asm_exc_invalid_op+0x1a/0x20
[   11.864424]  ? refcount_warn_saturate+0x82/0x120
[   11.864425]  ? refcount_warn_saturate+0x82/0x120
[   11.864427]  rpc_run_task+0x189/0x190 [sunrpc
6793b05ff35aa080c42f3ac953d266676d3f5305]
[   11.864465]  rpc_call_sync+0x76/0xd0 [sunrpc
6793b05ff35aa080c42f3ac953d266676d3f5305]
[   11.864494]  nfs4_proc_destroy_session+0x5f/0x110 [nfsv4
40e2e5b44d7c7f6d8dd1ddb28617f0073ea88879]
[   11.864529]  nfs4_destroy_session+0x29/0x110 [nfsv4
40e2e5b44d7c7f6d8dd1ddb28617f0073ea88879]
[   11.864561]  nfs41_shutdown_client+0xdc/0x100 [nfsv4
40e2e5b44d7c7f6d8dd1ddb28617f0073ea88879]
[   11.864587]  nfs4_free_client+0x2e/0xb0 [nfsv4
40e2e5b44d7c7f6d8dd1ddb28617f0073ea88879]
[   11.864612]  nfs_free_server+0x55/0xe0 [nfs
29da860307c99416b772706e6b567729d002ddce]
[   11.864638]  deactivate_locked_super+0x38/0x100
[   11.864641]  cleanup_mnt+0xfd/0x150
[   11.864643]  task_work_run+0x86/0xb0
[   11.864646]  exit_to_user_mode_loop+0xc8/0x110
[   11.864648]  exit_to_user_mode_prepare+0x73/0xe0
[   11.864650]  syscall_exit_to_user_mode+0x31/0x1b0
[   11.864651]  do_syscall_64+0x77/0x100
[   11.864654]  ? syscall_exit_to_user_mode+0x31/0x1b0
[   11.864655]  ? do_syscall_64+0x77/0x100
[   11.864657]  ? exc_page_fault+0x7a/0x1b0
[   11.864658]  entry_SYSCALL_64_after_hwframe+0x6e/0x76
[   11.864660] RIP: 0033:0x7f0f759114fb
[   11.864663] Code: c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 40 00 f3
0f 1e fa 31 f6 e9 05 00 00 00 0f 1f 44 00 00 f3 0f 1e fa b8 a6
00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 05 c3 0f 1f 40 00 48 8b 15 01 68
0c 00 f7 d8
[   11.864664] RSP: 002b:00007fff8bb7e358 EFLAGS: 00000246 ORIG_RAX:
00000000000000a6
[   11.864665] RAX: 0000000000000000 RBX: 000055f3424bbfc0 RCX: 00007f0f759114fb
[   11.864666] RDX: 0000000000000000 RSI: 0000000000000000 RDI: 000055f3424bbfc0
[   11.864667] RBP: 000055f3424bbf90 R08: 00000000ffffffff R09: 0000000000000020
[   11.864668] R10: 00007f0f7580ddd0 R11: 0000000000000246 R12: 0000000000000000
[   11.864669] R13: 00007fff8bb7e568 R14: 00007f0f75bc8000 R15: 000055f341da97d8
[   11.864670]  </TASK>
[   11.864671] ---[ end trace 0000000000000000 ]---
[   11.864675] BUG: kernel NULL pointer dereference, address: 0000000000000018
[   11.865152] #PF: supervisor read access in kernel mode
[   11.865483] #PF: error_code(0x0000) - not-present page
[   11.865811] PGD 0 P4D 0
[   11.865983] Oops: 0000 [#1] PREEMPT SMP NOPTI
[   11.866275] CPU: 0 PID: 742 Comm: umount.nfs Tainted: G        W
      6.7.0-rc8-g4f79a1eeea4b+ #41661 749ab84c1af8a7111125e80
4fccf9cd536b09ccc
[   11.867085] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009),
BIOS unknown 2/2/2022
[   11.867587] RIP: 0010:rpcauth_refreshcred+0x133/0x2b0 [sunrpc]
[   11.867987] Code: 8b 70 50 48 c7 c7 40 15 a1 89 e8 08 6e 8f c7 48
89 44 24 18 48 c7 44 24 20 00 00 00 00 8b 83 d8 00 00 00 01 c0
83 e0 02 09 c5 <49> 8b 46 18 4c 8b 58 30 48 8d 74 24 18 4c 89 f7 89 ea
41 ff d3 0f
[   11.869142] RSP: 0018:ffffa677c0b97bc8 EFLAGS: 00010246
[   11.869479] RAX: 0000000000000000 RBX: ffff99e884324e00 RCX: 0000000000000005
[   11.869933] RDX: 000000000000d380 RSI: 000000000000464f RDI: ffff99e88d6ca0c0
[   11.870383] RBP: 0000000000000000 R08: ffff99e88c09a018 R09: ffffffff89a577e0
[   11.870839] R10: 0000000000002ffd R11: ffffffffc07dc330 R12: ffff99e88c09a600
[   11.871293] R13: ffffffffc082d8d0 R14: 0000000000000000 R15: ffffffffc07dc330
[   11.871730] FS:  00007f0f757e0740(0000) GS:ffff99e8fbc00000(0000)
knlGS:0000000000000000
[   11.872012] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[   11.872221] CR2: 0000000000000018 CR3: 000000010c23e000 CR4: 0000000000750ef0
[   11.872467] PKRU: 55555554
[   11.872565] Call Trace:
[   11.872656]  <TASK>
[   11.872736]  ? __die_body+0x68/0xb0
[   11.872862]  ? page_fault_oops+0x3a6/0x400
[   11.873006]  ? exc_page_fault+0x7a/0x1b0
[   11.873144]  ? asm_exc_page_fault+0x26/0x30
[   11.873302]  ? __pfx_call_refresh+0x10/0x10 [sunrpc
6793b05ff35aa080c42f3ac953d266676d3f5305]
[   11.873620]  ? __pfx_call_refresh+0x10/0x10 [sunrpc
6793b05ff35aa080c42f3ac953d266676d3f5305]
[   11.873945]  ? rpcauth_refreshcred+0x133/0x2b0 [sunrpc
6793b05ff35aa080c42f3ac953d266676d3f5305]
[   11.874280]  ? rpcauth_refreshcred+0x118/0x2b0 [sunrpc
6793b05ff35aa080c42f3ac953d266676d3f5305]
[   11.874596]  ? _raw_spin_unlock+0xe/0x30
[   11.874734]  ? __pfx_call_refresh+0x10/0x10 [sunrpc
6793b05ff35aa080c42f3ac953d266676d3f5305]
[   11.875042]  ? __pfx_rpc_exit_task+0x10/0x10 [sunrpc
6793b05ff35aa080c42f3ac953d266676d3f5305]
[   11.875352]  __rpc_execute+0xfe/0x4b0 [sunrpc
6793b05ff35aa080c42f3ac953d266676d3f5305]
[   11.875642]  rpc_execute+0xa8/0x150 [sunrpc
6793b05ff35aa080c42f3ac953d266676d3f5305]
[   11.875926]  rpc_run_task+0x165/0x190 [sunrpc
6793b05ff35aa080c42f3ac953d266676d3f5305]
[   11.876236]  rpc_call_sync+0x76/0xd0 [sunrpc
6793b05ff35aa080c42f3ac953d266676d3f5305]
[   11.876525]  nfs4_proc_destroy_session+0x5f/0x110 [nfsv4
40e2e5b44d7c7f6d8dd1ddb28617f0073ea88879]
[   11.876853]  nfs4_destroy_session+0x29/0x110 [nfsv4
40e2e5b44d7c7f6d8dd1ddb28617f0073ea88879]
[   11.877164]  nfs41_shutdown_client+0xdc/0x100 [nfsv4
40e2e5b44d7c7f6d8dd1ddb28617f0073ea88879]
[   11.877493]  nfs4_free_client+0x2e/0xb0 [nfsv4
40e2e5b44d7c7f6d8dd1ddb28617f0073ea88879]
[   11.877790]  nfs_free_server+0x55/0xe0 [nfs
29da860307c99416b772706e6b567729d002ddce]
[   11.878200]  deactivate_locked_super+0x38/0x100
[   11.878370]  cleanup_mnt+0xfd/0x150
[   11.878496]  task_work_run+0x86/0xb0
[   11.878629]  exit_to_user_mode_loop+0xc8/0x110
[   11.878785]  exit_to_user_mode_prepare+0x73/0xe0
[   11.879121]  syscall_exit_to_user_mode+0x31/0x1b0
[   11.879451]  do_syscall_64+0x77/0x100
[   11.879746]  ? syscall_exit_to_user_mode+0x31/0x1b0
[   11.880076]  ? do_syscall_64+0x77/0x100
[   11.880370]  ? exc_page_fault+0x7a/0x1b0
[   11.880683]  entry_SYSCALL_64_after_hwframe+0x6e/0x76
[   11.881031] RIP: 0033:0x7f0f759114fb
[   11.881321] Code: c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 40 00 f3
0f 1e fa 31 f6 e9 05 00 00 00 0f 1f 44 00 00 f3 0f 1e fa b8 a6
00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 05 c3 0f 1f 40 00 48 8b 15 01 68
0c 00 f7 d8
[   11.882267] RSP: 002b:00007fff8bb7e358 EFLAGS: 00000246 ORIG_RAX:
00000000000000a6
[   11.882691] RAX: 0000000000000000 RBX: 000055f3424bbfc0 RCX: 00007f0f759114fb
[   11.883121] RDX: 0000000000000000 RSI: 0000000000000000 RDI: 000055f3424bbfc0
[   11.883550] RBP: 000055f3424bbf90 R08: 00000000ffffffff R09: 0000000000000020
[   11.883987] R10: 00007f0f7580ddd0 R11: 0000000000000246 R12: 0000000000000000
[   11.884429] R13: 00007fff8bb7e568 R14: 00007f0f75bc8000 R15: 000055f341da97d8
[   11.884873]  </TASK>
[   11.885132] Modules linked in: nfs_layout_nfsv41_files
rpcsec_gss_krb5 nfsv4 nfs fscache netfs rpcrdma rdma_cm iw_cm ib_cm
ib_cor
e cfg80211 rfkill 8021q garp stp mrp llc intel_rapl_msr
intel_rapl_common intel_uncore_frequency_common kvm_intel kvm
snd_hda_codec_
generic irqbypass crct10dif_pclmul crc32_pclmul polyval_clmulni
snd_hda_intel polyval_generic snd_intel_dspcfg gf128mul snd_hda_code
c ghash_clmulni_intel sha512_ssse3 snd_hwdep sha256_ssse3 sha1_ssse3
vfat iTCO_wdt snd_hda_core aesni_intel fat joydev snd_pcm mouse
dev crypto_simd intel_pmc_bxt cryptd snd_timer iTCO_vendor_support
usbhid rapl snd psmouse pcspkr i2c_i801 soundcore lpc_ich i2c_smb
us mac_hid nfsd nfs_acl auth_rpcgss lockd grace usbip_host usbip_core
loop fuse sunrpc dm_mod nfnetlink qemu_fw_cfg ip_tables x_tabl
es xfs libcrc32c crc32c_generic virtio_net virtio_gpu virtio_balloon
net_failover virtio_blk failover virtio_dma_buf serio_raw virti
o_console atkbd virtio_scsi libps2 virtio_rng vivaldi_fmap
crc32c_intel i8042 intel_agp virtio_pci intel_gtt
[   11.885172]  virtio_pci_legacy_dev xhci_pci serio xhci_pci_renesas
virtio_pci_modern_dev
[   11.890142] CR2: 0000000000000018
[   11.890468] ---[ end trace 0000000000000000 ]---

I hit this while testing against ontap, if that helps.

Thanks,
Anna

>
> Signed-off-by: Benjamin Coddington <bcodding@redhat.com>
> ---
>  fs/nfs/callback_proc.c         | 3 +++
>  fs/nfs/callback_xdr.c          | 3 +++
>  include/linux/sunrpc/bc_xprt.h | 3 ++-
>  include/linux/sunrpc/sched.h   | 3 ++-
>  include/linux/sunrpc/svc.h     | 1 +
>  net/sunrpc/clnt.c              | 8 ++++++--
>  net/sunrpc/svc.c               | 6 +++++-
>  net/sunrpc/xprt.c              | 5 +++--
>  8 files changed, 25 insertions(+), 7 deletions(-)
>
> diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
> index 96a4923080ae..8ce721eba383 100644
> --- a/fs/nfs/callback_proc.c
> +++ b/fs/nfs/callback_proc.c
> @@ -504,6 +504,9 @@ __be32 nfs4_callback_sequence(void *argp, void *resp,
>         if (!(clp->cl_session->flags & SESSION4_BACK_CHAN))
>                 goto out;
>
> +       /* release in svc_process_bc */
> +       refcount_inc(&clp->cl_rpcclient->cl_count);
> +
>         tbl = &clp->cl_session->bc_slot_table;
>
>         /* Set up res before grabbing the spinlock */
> diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c
> index 321af81c456e..40a43d615b82 100644
> --- a/fs/nfs/callback_xdr.c
> +++ b/fs/nfs/callback_xdr.c
> @@ -967,6 +967,9 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp)
>                 nops--;
>         }
>
> +       if (svc_is_backchannel(rqstp) && cps.clp)
> +               rqstp->bc_rpc_clnt = cps.clp->cl_rpcclient;
> +
>         *hdr_res.status = status;
>         *hdr_res.nops = htonl(nops);
>         nfs4_cb_free_slot(&cps);
> diff --git a/include/linux/sunrpc/bc_xprt.h b/include/linux/sunrpc/bc_xprt.h
> index db30a159f9d5..f22bf915dcf6 100644
> --- a/include/linux/sunrpc/bc_xprt.h
> +++ b/include/linux/sunrpc/bc_xprt.h
> @@ -20,7 +20,8 @@
>  #ifdef CONFIG_SUNRPC_BACKCHANNEL
>  struct rpc_rqst *xprt_lookup_bc_request(struct rpc_xprt *xprt, __be32 xid);
>  void xprt_complete_bc_request(struct rpc_rqst *req, uint32_t copied);
> -void xprt_init_bc_request(struct rpc_rqst *req, struct rpc_task *task);
> +void xprt_init_bc_request(struct rpc_rqst *req, struct rpc_task *task,
> +               const struct rpc_timeout *to);
>  void xprt_free_bc_request(struct rpc_rqst *req);
>  int xprt_setup_backchannel(struct rpc_xprt *, unsigned int min_reqs);
>  void xprt_destroy_backchannel(struct rpc_xprt *, unsigned int max_reqs);
> diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h
> index 8ada7dc802d3..786c1d2e9d6a 100644
> --- a/include/linux/sunrpc/sched.h
> +++ b/include/linux/sunrpc/sched.h
> @@ -205,7 +205,8 @@ struct rpc_wait_queue {
>   */
>  struct rpc_task *rpc_new_task(const struct rpc_task_setup *);
>  struct rpc_task *rpc_run_task(const struct rpc_task_setup *);
> -struct rpc_task *rpc_run_bc_task(struct rpc_rqst *req);
> +struct rpc_task *rpc_run_bc_task(struct rpc_rqst *req,
> +               struct rpc_clnt *clnt);
>  void           rpc_put_task(struct rpc_task *);
>  void           rpc_put_task_async(struct rpc_task *);
>  bool           rpc_task_set_rpc_status(struct rpc_task *task, int rpc_status);
> diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
> index b10f987509cc..1c727a422a65 100644
> --- a/include/linux/sunrpc/svc.h
> +++ b/include/linux/sunrpc/svc.h
> @@ -250,6 +250,7 @@ struct svc_rqst {
>         struct net              *rq_bc_net;     /* pointer to backchannel's
>                                                  * net namespace
>                                                  */
> +       struct rpc_clnt         *bc_rpc_clnt;   /* v4.1 backchannel RPC client */
>         void **                 rq_lease_breaker; /* The v4 client breaking a lease */
>         unsigned int            rq_status_counter; /* RPC processing counter */
>  };
> diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
> index daa9582ec861..1946c2b39de3 100644
> --- a/net/sunrpc/clnt.c
> +++ b/net/sunrpc/clnt.c
> @@ -1302,9 +1302,12 @@ static void call_bc_encode(struct rpc_task *task);
>   * rpc_run_bc_task - Allocate a new RPC task for backchannel use, then run
>   * rpc_execute against it
>   * @req: RPC request
> + * @clnt: RPC client from request's cl_rpcclient
>   */
> -struct rpc_task *rpc_run_bc_task(struct rpc_rqst *req)
> +struct rpc_task *rpc_run_bc_task(struct rpc_rqst *req,
> +               struct rpc_clnt *clnt)
>  {
> +       const struct rpc_timeout *timeout;
>         struct rpc_task *task;
>         struct rpc_task_setup task_setup_data = {
>                 .callback_ops = &rpc_default_ops,
> @@ -1322,7 +1325,8 @@ struct rpc_task *rpc_run_bc_task(struct rpc_rqst *req)
>                 return task;
>         }
>
> -       xprt_init_bc_request(req, task);
> +       timeout = clnt ? clnt->cl_timeout : req->rq_xprt->timeout;
> +       xprt_init_bc_request(req, task, timeout);
>
>         task->tk_action = call_bc_encode;
>         atomic_inc(&task->tk_count);
> diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
> index 3f2ea7a0496f..47685c05499a 100644
> --- a/net/sunrpc/svc.c
> +++ b/net/sunrpc/svc.c
> @@ -1603,7 +1603,11 @@ void svc_process_bc(struct rpc_rqst *req, struct svc_rqst *rqstp)
>         }
>         /* Finally, send the reply synchronously */
>         memcpy(&req->rq_snd_buf, &rqstp->rq_res, sizeof(req->rq_snd_buf));
> -       task = rpc_run_bc_task(req);
> +       task = rpc_run_bc_task(req, rqstp->bc_rpc_clnt);
> +
> +       if (rqstp->bc_rpc_clnt)
> +               rpc_release_client(rqstp->bc_rpc_clnt);
> +
>         if (IS_ERR(task))
>                 return;
>
> diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
> index 6cc9ffac962d..6f69975f3764 100644
> --- a/net/sunrpc/xprt.c
> +++ b/net/sunrpc/xprt.c
> @@ -1986,7 +1986,8 @@ void xprt_release(struct rpc_task *task)
>
>  #ifdef CONFIG_SUNRPC_BACKCHANNEL
>  void
> -xprt_init_bc_request(struct rpc_rqst *req, struct rpc_task *task)
> +xprt_init_bc_request(struct rpc_rqst *req, struct rpc_task *task,
> +               const struct rpc_timeout *to)
>  {
>         struct xdr_buf *xbufp = &req->rq_snd_buf;
>
> @@ -2000,7 +2001,7 @@ xprt_init_bc_request(struct rpc_rqst *req, struct rpc_task *task)
>         xbufp->len = xbufp->head[0].iov_len + xbufp->page_len +
>                 xbufp->tail[0].iov_len;
>
> -       xprt_init_majortimeo(task, req, req->rq_xprt->timeout);
> +       xprt_init_majortimeo(task, req, to);
>  }
>  #endif
>
> --
> 2.43.0
>
Trond Myklebust Jan. 3, 2024, 11 p.m. UTC | #2
On Wed, 2024-01-03 at 16:45 -0500, Anna Schumaker wrote:
> Hi Ben,
> 
> On Wed, Dec 13, 2023 at 2:49 PM Benjamin Coddington
> <bcodding@redhat.com> wrote:
> > 
> > For backchannel requests that lookup the appropriate nfs_client,
> > use the
> > state-management rpc_clnt's rpc_timeout parameters for the
> > backchannel's
> > response.  When the nfs_client cannot be found, fall back to using
> > the
> > xprt's default timeout parameters.
> 
> I'm seeing a use-after-free after applying this patch when using pNFS
> and session trunking. Any idea what's going on? Here is the stack
> trace I'm seeing:

I'm going to guess that this is happening because nothing is clearing
rqstp->bc_rpc_clnt after a call to svc_process_bc(). So if something
later calls CB_NULL, then the resulting svc_process_bc() will free an
extra reference.

> 
> [    9.935256] nfs4filelayout_init: NFSv4 File Layout Driver
> Registering...
> [   10.914810] NFS:  192.168.122.42: Session trunking succeeded for
> 192.168.122.44
> [   11.864302] ------------[ cut here ]------------
> [   11.864305] refcount_t: addition on 0; use-after-free.
> [   11.864320] WARNING: CPU: 0 PID: 742 at lib/refcount.c:25
> refcount_warn_saturate+0x82/0x120
> [   11.864333] Modules linked in: nfs_layout_nfsv41_files
> rpcsec_gss_krb5 nfsv4 nfs fscache netfs rpcrdma rdma_cm iw_cm ib_cm
> ib_cor
> e cfg80211 rfkill 8021q garp stp mrp llc intel_rapl_msr
> intel_rapl_common intel_uncore_frequency_common kvm_intel kvm
> snd_hda_codec_
> generic irqbypass crct10dif_pclmul crc32_pclmul polyval_clmulni
> snd_hda_intel polyval_generic snd_intel_dspcfg gf128mul snd_hda_code
> c ghash_clmulni_intel sha512_ssse3 snd_hwdep sha256_ssse3 sha1_ssse3
> vfat iTCO_wdt snd_hda_core aesni_intel fat joydev snd_pcm mouse
> dev crypto_simd intel_pmc_bxt cryptd snd_timer iTCO_vendor_support
> usbhid rapl snd psmouse pcspkr i2c_i801 soundcore lpc_ich i2c_smb
> us mac_hid nfsd nfs_acl auth_rpcgss lockd grace usbip_host usbip_core
> loop fuse sunrpc dm_mod nfnetlink qemu_fw_cfg ip_tables x_tabl
> es xfs libcrc32c crc32c_generic virtio_net virtio_gpu virtio_balloon
> net_failover virtio_blk failover virtio_dma_buf serio_raw virti
> o_console atkbd virtio_scsi libps2 virtio_rng vivaldi_fmap
> crc32c_intel i8042 intel_agp virtio_pci intel_gtt
> [   11.864383]  virtio_pci_legacy_dev xhci_pci serio xhci_pci_renesas
> virtio_pci_modern_dev
> [   11.864387] CPU: 0 PID: 742 Comm: umount.nfs Not tainted
> 6.7.0-rc8-g4f79a1eeea4b+ #41661
> 749ab84c1af8a7111125e804fccf9cd536b09ccc
> [   11.864390] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009),
> BIOS unknown 2/2/2022
> [   11.864391] RIP: 0010:refcount_warn_saturate+0x82/0x120
> [   11.864393] Code: 89 e8 32 33 a3 ff 0f 0b c3 cc cc cc cc cc 80 3d
> 44 b4 52 01 00 75 c0 c6 05 3b b4 52 01 01 48 c7 c7 61 ad 7d 89
> e8 0e 33 a3 ff <0f> 0b c3 cc cc cc cc cc 80 3d 21 b4 52 01 00 75 9c
> c6
> 05 18 b4 52
> [   11.864394] RSP: 0018:ffffa677c0b97ca8 EFLAGS: 00010246
> [   11.864395] RAX: bd28f387bfec6500 RBX: ffff99e884324e00 RCX:
> 0000000000000027
> [   11.864397] RDX: ffffa677c0b97b70 RSI: 00000000ffffefff RDI:
> ffff99e8fbc21748
> [   11.864398] RBP: 0000000000000000 R08: 0000000000000fff R09:
> ffffffff89a577e0
> [   11.864399] R10: 0000000000002ffd R11: 0000000000000004 R12:
> ffff99e88c09a000
> [   11.864400] R13: 0000000000000000 R14: ffffa677c0b97cf0 R15:
> ffff99e884324e00
> [   11.864401] FS:  00007f0f757e0740(0000) GS:ffff99e8fbc00000(0000)
> knlGS:0000000000000000
> [   11.864402] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> [   11.864403] CR2: 000055b5ebbfdf70 CR3: 000000010c23e000 CR4:
> 0000000000750ef0
> [   11.864406] PKRU: 55555554
> [   11.864407] Call Trace:
> [   11.864409]  <TASK>
> [   11.864411]  ? __warn+0xc8/0x1c0
> [   11.864413]  ? refcount_warn_saturate+0x82/0x120
> [   11.864415]  ? report_bug+0x14e/0x1f0
> [   11.864418]  ? handle_bug+0x42/0x70
> [   11.864420]  ? exc_invalid_op+0x1a/0x50
> [   11.864421]  ? asm_exc_invalid_op+0x1a/0x20
> [   11.864424]  ? refcount_warn_saturate+0x82/0x120
> [   11.864425]  ? refcount_warn_saturate+0x82/0x120
> [   11.864427]  rpc_run_task+0x189/0x190 [sunrpc
> 6793b05ff35aa080c42f3ac953d266676d3f5305]
> [   11.864465]  rpc_call_sync+0x76/0xd0 [sunrpc
> 6793b05ff35aa080c42f3ac953d266676d3f5305]
> [   11.864494]  nfs4_proc_destroy_session+0x5f/0x110 [nfsv4
> 40e2e5b44d7c7f6d8dd1ddb28617f0073ea88879]
> [   11.864529]  nfs4_destroy_session+0x29/0x110 [nfsv4
> 40e2e5b44d7c7f6d8dd1ddb28617f0073ea88879]
> [   11.864561]  nfs41_shutdown_client+0xdc/0x100 [nfsv4
> 40e2e5b44d7c7f6d8dd1ddb28617f0073ea88879]
> [   11.864587]  nfs4_free_client+0x2e/0xb0 [nfsv4
> 40e2e5b44d7c7f6d8dd1ddb28617f0073ea88879]
> [   11.864612]  nfs_free_server+0x55/0xe0 [nfs
> 29da860307c99416b772706e6b567729d002ddce]
> [   11.864638]  deactivate_locked_super+0x38/0x100
> [   11.864641]  cleanup_mnt+0xfd/0x150
> [   11.864643]  task_work_run+0x86/0xb0
> [   11.864646]  exit_to_user_mode_loop+0xc8/0x110
> [   11.864648]  exit_to_user_mode_prepare+0x73/0xe0
> [   11.864650]  syscall_exit_to_user_mode+0x31/0x1b0
> [   11.864651]  do_syscall_64+0x77/0x100
> [   11.864654]  ? syscall_exit_to_user_mode+0x31/0x1b0
> [   11.864655]  ? do_syscall_64+0x77/0x100
> [   11.864657]  ? exc_page_fault+0x7a/0x1b0
> [   11.864658]  entry_SYSCALL_64_after_hwframe+0x6e/0x76
> [   11.864660] RIP: 0033:0x7f0f759114fb
> [   11.864663] Code: c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 40 00 f3
> 0f 1e fa 31 f6 e9 05 00 00 00 0f 1f 44 00 00 f3 0f 1e fa b8 a6
> 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 05 c3 0f 1f 40 00 48 8b 15 01
> 68
> 0c 00 f7 d8
> [   11.864664] RSP: 002b:00007fff8bb7e358 EFLAGS: 00000246 ORIG_RAX:
> 00000000000000a6
> [   11.864665] RAX: 0000000000000000 RBX: 000055f3424bbfc0 RCX:
> 00007f0f759114fb
> [   11.864666] RDX: 0000000000000000 RSI: 0000000000000000 RDI:
> 000055f3424bbfc0
> [   11.864667] RBP: 000055f3424bbf90 R08: 00000000ffffffff R09:
> 0000000000000020
> [   11.864668] R10: 00007f0f7580ddd0 R11: 0000000000000246 R12:
> 0000000000000000
> [   11.864669] R13: 00007fff8bb7e568 R14: 00007f0f75bc8000 R15:
> 000055f341da97d8
> [   11.864670]  </TASK>
> [   11.864671] ---[ end trace 0000000000000000 ]---
> [   11.864675] BUG: kernel NULL pointer dereference, address:
> 0000000000000018
> [   11.865152] #PF: supervisor read access in kernel mode
> [   11.865483] #PF: error_code(0x0000) - not-present page
> [   11.865811] PGD 0 P4D 0
> [   11.865983] Oops: 0000 [#1] PREEMPT SMP NOPTI
> [   11.866275] CPU: 0 PID: 742 Comm: umount.nfs Tainted: G        W
>       6.7.0-rc8-g4f79a1eeea4b+ #41661 749ab84c1af8a7111125e80
> 4fccf9cd536b09ccc
> [   11.867085] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009),
> BIOS unknown 2/2/2022
> [   11.867587] RIP: 0010:rpcauth_refreshcred+0x133/0x2b0 [sunrpc]
> [   11.867987] Code: 8b 70 50 48 c7 c7 40 15 a1 89 e8 08 6e 8f c7 48
> 89 44 24 18 48 c7 44 24 20 00 00 00 00 8b 83 d8 00 00 00 01 c0
> 83 e0 02 09 c5 <49> 8b 46 18 4c 8b 58 30 48 8d 74 24 18 4c 89 f7 89
> ea
> 41 ff d3 0f
> [   11.869142] RSP: 0018:ffffa677c0b97bc8 EFLAGS: 00010246
> [   11.869479] RAX: 0000000000000000 RBX: ffff99e884324e00 RCX:
> 0000000000000005
> [   11.869933] RDX: 000000000000d380 RSI: 000000000000464f RDI:
> ffff99e88d6ca0c0
> [   11.870383] RBP: 0000000000000000 R08: ffff99e88c09a018 R09:
> ffffffff89a577e0
> [   11.870839] R10: 0000000000002ffd R11: ffffffffc07dc330 R12:
> ffff99e88c09a600
> [   11.871293] R13: ffffffffc082d8d0 R14: 0000000000000000 R15:
> ffffffffc07dc330
> [   11.871730] FS:  00007f0f757e0740(0000) GS:ffff99e8fbc00000(0000)
> knlGS:0000000000000000
> [   11.872012] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> [   11.872221] CR2: 0000000000000018 CR3: 000000010c23e000 CR4:
> 0000000000750ef0
> [   11.872467] PKRU: 55555554
> [   11.872565] Call Trace:
> [   11.872656]  <TASK>
> [   11.872736]  ? __die_body+0x68/0xb0
> [   11.872862]  ? page_fault_oops+0x3a6/0x400
> [   11.873006]  ? exc_page_fault+0x7a/0x1b0
> [   11.873144]  ? asm_exc_page_fault+0x26/0x30
> [   11.873302]  ? __pfx_call_refresh+0x10/0x10 [sunrpc
> 6793b05ff35aa080c42f3ac953d266676d3f5305]
> [   11.873620]  ? __pfx_call_refresh+0x10/0x10 [sunrpc
> 6793b05ff35aa080c42f3ac953d266676d3f5305]
> [   11.873945]  ? rpcauth_refreshcred+0x133/0x2b0 [sunrpc
> 6793b05ff35aa080c42f3ac953d266676d3f5305]
> [   11.874280]  ? rpcauth_refreshcred+0x118/0x2b0 [sunrpc
> 6793b05ff35aa080c42f3ac953d266676d3f5305]
> [   11.874596]  ? _raw_spin_unlock+0xe/0x30
> [   11.874734]  ? __pfx_call_refresh+0x10/0x10 [sunrpc
> 6793b05ff35aa080c42f3ac953d266676d3f5305]
> [   11.875042]  ? __pfx_rpc_exit_task+0x10/0x10 [sunrpc
> 6793b05ff35aa080c42f3ac953d266676d3f5305]
> [   11.875352]  __rpc_execute+0xfe/0x4b0 [sunrpc
> 6793b05ff35aa080c42f3ac953d266676d3f5305]
> [   11.875642]  rpc_execute+0xa8/0x150 [sunrpc
> 6793b05ff35aa080c42f3ac953d266676d3f5305]
> [   11.875926]  rpc_run_task+0x165/0x190 [sunrpc
> 6793b05ff35aa080c42f3ac953d266676d3f5305]
> [   11.876236]  rpc_call_sync+0x76/0xd0 [sunrpc
> 6793b05ff35aa080c42f3ac953d266676d3f5305]
> [   11.876525]  nfs4_proc_destroy_session+0x5f/0x110 [nfsv4
> 40e2e5b44d7c7f6d8dd1ddb28617f0073ea88879]
> [   11.876853]  nfs4_destroy_session+0x29/0x110 [nfsv4
> 40e2e5b44d7c7f6d8dd1ddb28617f0073ea88879]
> [   11.877164]  nfs41_shutdown_client+0xdc/0x100 [nfsv4
> 40e2e5b44d7c7f6d8dd1ddb28617f0073ea88879]
> [   11.877493]  nfs4_free_client+0x2e/0xb0 [nfsv4
> 40e2e5b44d7c7f6d8dd1ddb28617f0073ea88879]
> [   11.877790]  nfs_free_server+0x55/0xe0 [nfs
> 29da860307c99416b772706e6b567729d002ddce]
> [   11.878200]  deactivate_locked_super+0x38/0x100
> [   11.878370]  cleanup_mnt+0xfd/0x150
> [   11.878496]  task_work_run+0x86/0xb0
> [   11.878629]  exit_to_user_mode_loop+0xc8/0x110
> [   11.878785]  exit_to_user_mode_prepare+0x73/0xe0
> [   11.879121]  syscall_exit_to_user_mode+0x31/0x1b0
> [   11.879451]  do_syscall_64+0x77/0x100
> [   11.879746]  ? syscall_exit_to_user_mode+0x31/0x1b0
> [   11.880076]  ? do_syscall_64+0x77/0x100
> [   11.880370]  ? exc_page_fault+0x7a/0x1b0
> [   11.880683]  entry_SYSCALL_64_after_hwframe+0x6e/0x76
> [   11.881031] RIP: 0033:0x7f0f759114fb
> [   11.881321] Code: c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 40 00 f3
> 0f 1e fa 31 f6 e9 05 00 00 00 0f 1f 44 00 00 f3 0f 1e fa b8 a6
> 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 05 c3 0f 1f 40 00 48 8b 15 01
> 68
> 0c 00 f7 d8
> [   11.882267] RSP: 002b:00007fff8bb7e358 EFLAGS: 00000246 ORIG_RAX:
> 00000000000000a6
> [   11.882691] RAX: 0000000000000000 RBX: 000055f3424bbfc0 RCX:
> 00007f0f759114fb
> [   11.883121] RDX: 0000000000000000 RSI: 0000000000000000 RDI:
> 000055f3424bbfc0
> [   11.883550] RBP: 000055f3424bbf90 R08: 00000000ffffffff R09:
> 0000000000000020
> [   11.883987] R10: 00007f0f7580ddd0 R11: 0000000000000246 R12:
> 0000000000000000
> [   11.884429] R13: 00007fff8bb7e568 R14: 00007f0f75bc8000 R15:
> 000055f341da97d8
> [   11.884873]  </TASK>
> [   11.885132] Modules linked in: nfs_layout_nfsv41_files
> rpcsec_gss_krb5 nfsv4 nfs fscache netfs rpcrdma rdma_cm iw_cm ib_cm
> ib_cor
> e cfg80211 rfkill 8021q garp stp mrp llc intel_rapl_msr
> intel_rapl_common intel_uncore_frequency_common kvm_intel kvm
> snd_hda_codec_
> generic irqbypass crct10dif_pclmul crc32_pclmul polyval_clmulni
> snd_hda_intel polyval_generic snd_intel_dspcfg gf128mul snd_hda_code
> c ghash_clmulni_intel sha512_ssse3 snd_hwdep sha256_ssse3 sha1_ssse3
> vfat iTCO_wdt snd_hda_core aesni_intel fat joydev snd_pcm mouse
> dev crypto_simd intel_pmc_bxt cryptd snd_timer iTCO_vendor_support
> usbhid rapl snd psmouse pcspkr i2c_i801 soundcore lpc_ich i2c_smb
> us mac_hid nfsd nfs_acl auth_rpcgss lockd grace usbip_host usbip_core
> loop fuse sunrpc dm_mod nfnetlink qemu_fw_cfg ip_tables x_tabl
> es xfs libcrc32c crc32c_generic virtio_net virtio_gpu virtio_balloon
> net_failover virtio_blk failover virtio_dma_buf serio_raw virti
> o_console atkbd virtio_scsi libps2 virtio_rng vivaldi_fmap
> crc32c_intel i8042 intel_agp virtio_pci intel_gtt
> [   11.885172]  virtio_pci_legacy_dev xhci_pci serio xhci_pci_renesas
> virtio_pci_modern_dev
> [   11.890142] CR2: 0000000000000018
> [   11.890468] ---[ end trace 0000000000000000 ]---
> 
> I hit this while testing against ontap, if that helps.
> 
> Thanks,
> Anna
> 
> > 
> > Signed-off-by: Benjamin Coddington <bcodding@redhat.com>
> > ---
> >  fs/nfs/callback_proc.c         | 3 +++
> >  fs/nfs/callback_xdr.c          | 3 +++
> >  include/linux/sunrpc/bc_xprt.h | 3 ++-
> >  include/linux/sunrpc/sched.h   | 3 ++-
> >  include/linux/sunrpc/svc.h     | 1 +
> >  net/sunrpc/clnt.c              | 8 ++++++--
> >  net/sunrpc/svc.c               | 6 +++++-
> >  net/sunrpc/xprt.c              | 5 +++--
> >  8 files changed, 25 insertions(+), 7 deletions(-)
> > 
> > diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
> > index 96a4923080ae..8ce721eba383 100644
> > --- a/fs/nfs/callback_proc.c
> > +++ b/fs/nfs/callback_proc.c
> > @@ -504,6 +504,9 @@ __be32 nfs4_callback_sequence(void *argp, void
> > *resp,
> >         if (!(clp->cl_session->flags & SESSION4_BACK_CHAN))
> >                 goto out;
> > 
> > +       /* release in svc_process_bc */
> > +       refcount_inc(&clp->cl_rpcclient->cl_count);
> > +
> >         tbl = &clp->cl_session->bc_slot_table;
> > 
> >         /* Set up res before grabbing the spinlock */
> > diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c
> > index 321af81c456e..40a43d615b82 100644
> > --- a/fs/nfs/callback_xdr.c
> > +++ b/fs/nfs/callback_xdr.c
> > @@ -967,6 +967,9 @@ static __be32 nfs4_callback_compound(struct
> > svc_rqst *rqstp)
> >                 nops--;
> >         }
> > 
> > +       if (svc_is_backchannel(rqstp) && cps.clp)
> > +               rqstp->bc_rpc_clnt = cps.clp->cl_rpcclient;


You can re-create the clnt->cl_timeout in svc_process_bc() by just
storing the values of to_initval and to_retrans here. Why store a
reference to an entire rpc client structure that you don't need?
>
Benjamin Coddington Jan. 4, 2024, 11:55 a.m. UTC | #3
On 3 Jan 2024, at 18:00, Trond Myklebust wrote:

> On Wed, 2024-01-03 at 16:45 -0500, Anna Schumaker wrote:
>> Hi Ben,
>>
>> On Wed, Dec 13, 2023 at 2:49 PM Benjamin Coddington
>> <bcodding@redhat.com> wrote:
>>>
>>> For backchannel requests that lookup the appropriate nfs_client,
>>> use the
>>> state-management rpc_clnt's rpc_timeout parameters for the
>>> backchannel's
>>> response.  When the nfs_client cannot be found, fall back to using
>>> the
>>> xprt's default timeout parameters.
>>
>> I'm seeing a use-after-free after applying this patch when using pNFS
>> and session trunking. Any idea what's going on? Here is the stack
>> trace I'm seeing:
>
> I'm going to guess that this is happening because nothing is clearing
> rqstp->bc_rpc_clnt after a call to svc_process_bc(). So if something
> later calls CB_NULL, then the resulting svc_process_bc() will free an
> extra reference.

Doh!

>>
>> I hit this while testing against ontap, if that helps.
>>
>> Thanks,
>> Anna

Thank you for the test!!

>>> --- a/fs/nfs/callback_xdr.c
>>> +++ b/fs/nfs/callback_xdr.c
>>> @@ -967,6 +967,9 @@ static __be32 nfs4_callback_compound(struct
>>> svc_rqst *rqstp)
>>>                 nops--;
>>>         }
>>>
>>> +       if (svc_is_backchannel(rqstp) && cps.clp)
>>> +               rqstp->bc_rpc_clnt = cps.clp->cl_rpcclient;
>
>
> You can re-create the clnt->cl_timeout in svc_process_bc() by just
> storing the values of to_initval and to_retrans here. Why store a
> reference to an entire rpc client structure that you don't need?

Hmm, I think I started by thinking we could simply set tk_client, but then
didn't end up with that for other reasons and just kept passing the single
pointer.

I will send a v4 with your suggestion.

Ben
Benjamin Coddington Jan. 4, 2024, 12:13 p.m. UTC | #4
On 4 Jan 2024, at 6:55, Benjamin Coddington wrote:

> On 3 Jan 2024, at 18:00, Trond Myklebust wrote:
>
>> On Wed, 2024-01-03 at 16:45 -0500, Anna Schumaker wrote:
>>> Hi Ben,
>>>
>>> On Wed, Dec 13, 2023 at 2:49 PM Benjamin Coddington
>>> <bcodding@redhat.com> wrote:
>>>>
>>>> For backchannel requests that lookup the appropriate nfs_client,
>>>> use the
>>>> state-management rpc_clnt's rpc_timeout parameters for the
>>>> backchannel's
>>>> response.  When the nfs_client cannot be found, fall back to using
>>>> the
>>>> xprt's default timeout parameters.
>>>
>>> I'm seeing a use-after-free after applying this patch when using pNFS
>>> and session trunking. Any idea what's going on? Here is the stack
>>> trace I'm seeing:
>>
>> I'm going to guess that this is happening because nothing is clearing
>> rqstp->bc_rpc_clnt after a call to svc_process_bc(). So if something
>> later calls CB_NULL, then the resulting svc_process_bc() will free an
>> extra reference.
>
> Doh!
>
>>>
>>> I hit this while testing against ontap, if that helps.
>>>
>>> Thanks,
>>> Anna
>
> Thank you for the test!!
>
>>>> --- a/fs/nfs/callback_xdr.c
>>>> +++ b/fs/nfs/callback_xdr.c
>>>> @@ -967,6 +967,9 @@ static __be32 nfs4_callback_compound(struct
>>>> svc_rqst *rqstp)
>>>>                 nops--;
>>>>         }
>>>>
>>>> +       if (svc_is_backchannel(rqstp) && cps.clp)
>>>> +               rqstp->bc_rpc_clnt = cps.clp->cl_rpcclient;
>>
>>
>> You can re-create the clnt->cl_timeout in svc_process_bc() by just
>> storing the values of to_initval and to_retrans here. Why store a
>> reference to an entire rpc client structure that you don't need?
>
> Hmm, I think I started by thinking we could simply set tk_client, but then
> didn't end up with that for other reasons and just kept passing the single
> pointer.

.. and the client being NULL was the signal to fallback to using the xprt
timeouts.  That's lost with to_initval and to_retries because they can be
set to zero deliberately.  I'm not immediately seeing an easy way to carry
all that info across on the svc_rqst without three additional fields or
another struct.  Either we drop the part where we fallback to the xprt
defaults, or things get a bit uglier.

Ben
Benjamin Coddington Jan. 4, 2024, 1:23 p.m. UTC | #5
On 4 Jan 2024, at 7:13, Benjamin Coddington wrote:

> On 4 Jan 2024, at 6:55, Benjamin Coddington wrote:
>
>> On 3 Jan 2024, at 18:00, Trond Myklebust wrote:
>>
>>> On Wed, 2024-01-03 at 16:45 -0500, Anna Schumaker wrote:
>>>> Hi Ben,
>>>>
>>>> On Wed, Dec 13, 2023 at 2:49 PM Benjamin Coddington
>>>> <bcodding@redhat.com> wrote:
>>>>>
>>>>> For backchannel requests that lookup the appropriate nfs_client,
>>>>> use the
>>>>> state-management rpc_clnt's rpc_timeout parameters for the
>>>>> backchannel's
>>>>> response.  When the nfs_client cannot be found, fall back to using
>>>>> the
>>>>> xprt's default timeout parameters.
>>>>
>>>> I'm seeing a use-after-free after applying this patch when using pNFS
>>>> and session trunking. Any idea what's going on? Here is the stack
>>>> trace I'm seeing:
>>>
>>> I'm going to guess that this is happening because nothing is clearing
>>> rqstp->bc_rpc_clnt after a call to svc_process_bc(). So if something
>>> later calls CB_NULL, then the resulting svc_process_bc() will free an
>>> extra reference.
>>
>> Doh!
>>
>>>>
>>>> I hit this while testing against ontap, if that helps.
>>>>
>>>> Thanks,
>>>> Anna
>>
>> Thank you for the test!!
>>
>>>>> --- a/fs/nfs/callback_xdr.c
>>>>> +++ b/fs/nfs/callback_xdr.c
>>>>> @@ -967,6 +967,9 @@ static __be32 nfs4_callback_compound(struct
>>>>> svc_rqst *rqstp)
>>>>>                 nops--;
>>>>>         }
>>>>>
>>>>> +       if (svc_is_backchannel(rqstp) && cps.clp)
>>>>> +               rqstp->bc_rpc_clnt = cps.clp->cl_rpcclient;
>>>
>>>
>>> You can re-create the clnt->cl_timeout in svc_process_bc() by just
>>> storing the values of to_initval and to_retrans here. Why store a
>>> reference to an entire rpc client structure that you don't need?
>>
>> Hmm, I think I started by thinking we could simply set tk_client, but then
>> didn't end up with that for other reasons and just kept passing the single
>> pointer.
>
> .. and the client being NULL was the signal to fallback to using the xprt
> timeouts.  That's lost with to_initval and to_retries because they can be
> set to zero deliberately.

I'm wrong about this ^^.  The mount option timeo must be > 0, so we can
check if to_initval is zero to see if we should fall back to the xprt
default.

Ben
diff mbox series

Patch

diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index 96a4923080ae..8ce721eba383 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -504,6 +504,9 @@  __be32 nfs4_callback_sequence(void *argp, void *resp,
 	if (!(clp->cl_session->flags & SESSION4_BACK_CHAN))
 		goto out;
 
+	/* release in svc_process_bc */
+	refcount_inc(&clp->cl_rpcclient->cl_count);
+
 	tbl = &clp->cl_session->bc_slot_table;
 
 	/* Set up res before grabbing the spinlock */
diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c
index 321af81c456e..40a43d615b82 100644
--- a/fs/nfs/callback_xdr.c
+++ b/fs/nfs/callback_xdr.c
@@ -967,6 +967,9 @@  static __be32 nfs4_callback_compound(struct svc_rqst *rqstp)
 		nops--;
 	}
 
+	if (svc_is_backchannel(rqstp) && cps.clp)
+		rqstp->bc_rpc_clnt = cps.clp->cl_rpcclient;
+
 	*hdr_res.status = status;
 	*hdr_res.nops = htonl(nops);
 	nfs4_cb_free_slot(&cps);
diff --git a/include/linux/sunrpc/bc_xprt.h b/include/linux/sunrpc/bc_xprt.h
index db30a159f9d5..f22bf915dcf6 100644
--- a/include/linux/sunrpc/bc_xprt.h
+++ b/include/linux/sunrpc/bc_xprt.h
@@ -20,7 +20,8 @@ 
 #ifdef CONFIG_SUNRPC_BACKCHANNEL
 struct rpc_rqst *xprt_lookup_bc_request(struct rpc_xprt *xprt, __be32 xid);
 void xprt_complete_bc_request(struct rpc_rqst *req, uint32_t copied);
-void xprt_init_bc_request(struct rpc_rqst *req, struct rpc_task *task);
+void xprt_init_bc_request(struct rpc_rqst *req, struct rpc_task *task,
+		const struct rpc_timeout *to);
 void xprt_free_bc_request(struct rpc_rqst *req);
 int xprt_setup_backchannel(struct rpc_xprt *, unsigned int min_reqs);
 void xprt_destroy_backchannel(struct rpc_xprt *, unsigned int max_reqs);
diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h
index 8ada7dc802d3..786c1d2e9d6a 100644
--- a/include/linux/sunrpc/sched.h
+++ b/include/linux/sunrpc/sched.h
@@ -205,7 +205,8 @@  struct rpc_wait_queue {
  */
 struct rpc_task *rpc_new_task(const struct rpc_task_setup *);
 struct rpc_task *rpc_run_task(const struct rpc_task_setup *);
-struct rpc_task *rpc_run_bc_task(struct rpc_rqst *req);
+struct rpc_task *rpc_run_bc_task(struct rpc_rqst *req,
+		struct rpc_clnt *clnt);
 void		rpc_put_task(struct rpc_task *);
 void		rpc_put_task_async(struct rpc_task *);
 bool		rpc_task_set_rpc_status(struct rpc_task *task, int rpc_status);
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index b10f987509cc..1c727a422a65 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -250,6 +250,7 @@  struct svc_rqst {
 	struct net		*rq_bc_net;	/* pointer to backchannel's
 						 * net namespace
 						 */
+	struct rpc_clnt		*bc_rpc_clnt;	/* v4.1 backchannel RPC client */
 	void **			rq_lease_breaker; /* The v4 client breaking a lease */
 	unsigned int		rq_status_counter; /* RPC processing counter */
 };
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index daa9582ec861..1946c2b39de3 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -1302,9 +1302,12 @@  static void call_bc_encode(struct rpc_task *task);
  * rpc_run_bc_task - Allocate a new RPC task for backchannel use, then run
  * rpc_execute against it
  * @req: RPC request
+ * @clnt: RPC client from request's cl_rpcclient
  */
-struct rpc_task *rpc_run_bc_task(struct rpc_rqst *req)
+struct rpc_task *rpc_run_bc_task(struct rpc_rqst *req,
+		struct rpc_clnt *clnt)
 {
+	const struct rpc_timeout *timeout;
 	struct rpc_task *task;
 	struct rpc_task_setup task_setup_data = {
 		.callback_ops = &rpc_default_ops,
@@ -1322,7 +1325,8 @@  struct rpc_task *rpc_run_bc_task(struct rpc_rqst *req)
 		return task;
 	}
 
-	xprt_init_bc_request(req, task);
+	timeout = clnt ? clnt->cl_timeout : req->rq_xprt->timeout;
+	xprt_init_bc_request(req, task, timeout);
 
 	task->tk_action = call_bc_encode;
 	atomic_inc(&task->tk_count);
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 3f2ea7a0496f..47685c05499a 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -1603,7 +1603,11 @@  void svc_process_bc(struct rpc_rqst *req, struct svc_rqst *rqstp)
 	}
 	/* Finally, send the reply synchronously */
 	memcpy(&req->rq_snd_buf, &rqstp->rq_res, sizeof(req->rq_snd_buf));
-	task = rpc_run_bc_task(req);
+	task = rpc_run_bc_task(req, rqstp->bc_rpc_clnt);
+
+	if (rqstp->bc_rpc_clnt)
+		rpc_release_client(rqstp->bc_rpc_clnt);
+
 	if (IS_ERR(task))
 		return;
 
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 6cc9ffac962d..6f69975f3764 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -1986,7 +1986,8 @@  void xprt_release(struct rpc_task *task)
 
 #ifdef CONFIG_SUNRPC_BACKCHANNEL
 void
-xprt_init_bc_request(struct rpc_rqst *req, struct rpc_task *task)
+xprt_init_bc_request(struct rpc_rqst *req, struct rpc_task *task,
+		const struct rpc_timeout *to)
 {
 	struct xdr_buf *xbufp = &req->rq_snd_buf;
 
@@ -2000,7 +2001,7 @@  xprt_init_bc_request(struct rpc_rqst *req, struct rpc_task *task)
 	xbufp->len = xbufp->head[0].iov_len + xbufp->page_len +
 		xbufp->tail[0].iov_len;
 
-	xprt_init_majortimeo(task, req, req->rq_xprt->timeout);
+	xprt_init_majortimeo(task, req, to);
 }
 #endif