diff mbox

NFSv4.1: Hold reference to layout hdr in layoutget

Message ID 1361845653-63782-1-git-send-email-dros@netapp.com (mailing list archive)
State New, archived
Headers show

Commit Message

Weston Andros Adamson Feb. 26, 2013, 2:27 a.m. UTC
This fixes an oops where a LAYOUTGET is in still in the rpciod queue,
but the requesting processes has been killed.  Without this, killing
the process does the final pnfs_put_layout_hdr() and sets NFS_I(inode)->layout
to NULL while the LAYOUTGET rpc task still references it.

Example oops:

BUG: unable to handle kernel NULL pointer dereference at 0000000000000080
IP: [<ffffffffa01bd586>] pnfs_choose_layoutget_stateid+0x37/0xef [nfsv4]
PGD 7365b067 PUD 7365d067 PMD 0
Oops: 0000 [#1] SMP DEBUG_PAGEALLOC
Modules linked in: nfs_layout_nfsv41_files nfsv4 auth_rpcgss nfs lockd sunrpc ipt_MASQUERADE ip6table_mangle ip6t_REJECT nf_conntrack_ipv6 nf_defrag_ipv6 iptable_nat nf_nat_ipv4 nf_nat iptable_mangle ip6table_filter ip6_tables ppdev e1000 i2c_piix4 i2c_core shpchp parport_pc parport crc32c_intel aesni_intel xts aes_x86_64 lrw gf128mul ablk_helper cryptd mptspi scsi_transport_spi mptscsih mptbase floppy autofs4
CPU 0
Pid: 27, comm: kworker/0:1 Not tainted 3.8.0-dros_cthon2013+ #4 VMware, Inc. VMware Virtual Platform/440BX Desktop Reference Platform
RIP: 0010:[<ffffffffa01bd586>]  [<ffffffffa01bd586>] pnfs_choose_layoutget_stateid+0x37/0xef [nfsv4]
RSP: 0018:ffff88007b0c1c88  EFLAGS: 00010246
RAX: ffff88006ed36678 RBX: 0000000000000000 RCX: 0000000ea877e3bc
RDX: ffff88007a729da8 RSI: 0000000000000000 RDI: ffff88007a72b958
RBP: ffff88007b0c1ca8 R08: 0000000000000002 R09: 0000000000000000
R10: 0000000000000000 R11: 0000000000000000 R12: ffff88007a72b958
R13: ffff88007a729da8 R14: 0000000000000000 R15: ffffffffa011077e
FS:  0000000000000000(0000) GS:ffff88007f600000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 0000000000000080 CR3: 00000000735f8000 CR4: 00000000001407f0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
Process kworker/0:1 (pid: 27, threadinfo ffff88007b0c0000, task ffff88007c2fa0c0)
Stack:
 ffff88006fc05388 ffff88007a72b908 ffff88007b240900 ffff88006fc05388
 ffff88007b0c1cd8 ffffffffa01a2170 ffff88007b240900 ffff88007b240900
 ffff88007b240970 ffffffffa011077e ffff88007b0c1ce8 ffffffffa0110791
Call Trace:
 [<ffffffffa01a2170>] nfs4_layoutget_prepare+0x7b/0x92 [nfsv4]
 [<ffffffffa011077e>] ? __rpc_atrun+0x15/0x15 [sunrpc]
 [<ffffffffa0110791>] rpc_prepare_task+0x13/0x15 [sunrpc]

Reported-by: Tigran Mkrtchyan <tigran.mkrtchyan@desy.de>
Signed-off-by: Weston Andros Adamson <dros@netapp.com>
Cc: stable@kernel.org
---
 fs/nfs/nfs4proc.c | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

Comments

Trond Myklebust Feb. 26, 2013, 2:40 a.m. UTC | #1
On Mon, 2013-02-25 at 21:27 -0500, Weston Andros Adamson wrote:
> This fixes an oops where a LAYOUTGET is in still in the rpciod queue,
> but the requesting processes has been killed.  Without this, killing
> the process does the final pnfs_put_layout_hdr() and sets NFS_I(inode)->layout
> to NULL while the LAYOUTGET rpc task still references it.
> 
> Example oops:
> 
> BUG: unable to handle kernel NULL pointer dereference at 0000000000000080
> IP: [<ffffffffa01bd586>] pnfs_choose_layoutget_stateid+0x37/0xef [nfsv4]
> PGD 7365b067 PUD 7365d067 PMD 0
> Oops: 0000 [#1] SMP DEBUG_PAGEALLOC
> Modules linked in: nfs_layout_nfsv41_files nfsv4 auth_rpcgss nfs lockd sunrpc ipt_MASQUERADE ip6table_mangle ip6t_REJECT nf_conntrack_ipv6 nf_defrag_ipv6 iptable_nat nf_nat_ipv4 nf_nat iptable_mangle ip6table_filter ip6_tables ppdev e1000 i2c_piix4 i2c_core shpchp parport_pc parport crc32c_intel aesni_intel xts aes_x86_64 lrw gf128mul ablk_helper cryptd mptspi scsi_transport_spi mptscsih mptbase floppy autofs4
> CPU 0
> Pid: 27, comm: kworker/0:1 Not tainted 3.8.0-dros_cthon2013+ #4 VMware, Inc. VMware Virtual Platform/440BX Desktop Reference Platform
> RIP: 0010:[<ffffffffa01bd586>]  [<ffffffffa01bd586>] pnfs_choose_layoutget_stateid+0x37/0xef [nfsv4]
> RSP: 0018:ffff88007b0c1c88  EFLAGS: 00010246
> RAX: ffff88006ed36678 RBX: 0000000000000000 RCX: 0000000ea877e3bc
> RDX: ffff88007a729da8 RSI: 0000000000000000 RDI: ffff88007a72b958
> RBP: ffff88007b0c1ca8 R08: 0000000000000002 R09: 0000000000000000
> R10: 0000000000000000 R11: 0000000000000000 R12: ffff88007a72b958
> R13: ffff88007a729da8 R14: 0000000000000000 R15: ffffffffa011077e
> FS:  0000000000000000(0000) GS:ffff88007f600000(0000) knlGS:0000000000000000
> CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> CR2: 0000000000000080 CR3: 00000000735f8000 CR4: 00000000001407f0
> DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
> DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
> Process kworker/0:1 (pid: 27, threadinfo ffff88007b0c0000, task ffff88007c2fa0c0)
> Stack:
>  ffff88006fc05388 ffff88007a72b908 ffff88007b240900 ffff88006fc05388
>  ffff88007b0c1cd8 ffffffffa01a2170 ffff88007b240900 ffff88007b240900
>  ffff88007b240970 ffffffffa011077e ffff88007b0c1ce8 ffffffffa0110791
> Call Trace:
>  [<ffffffffa01a2170>] nfs4_layoutget_prepare+0x7b/0x92 [nfsv4]
>  [<ffffffffa011077e>] ? __rpc_atrun+0x15/0x15 [sunrpc]
>  [<ffffffffa0110791>] rpc_prepare_task+0x13/0x15 [sunrpc]
> 
> Reported-by: Tigran Mkrtchyan <tigran.mkrtchyan@desy.de>
> Signed-off-by: Weston Andros Adamson <dros@netapp.com>
> Cc: stable@kernel.org
> ---
>  fs/nfs/nfs4proc.c | 13 +++++++++++--
>  1 file changed, 11 insertions(+), 2 deletions(-)
> 
> diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
> index eae83bf..a2de760 100644
> --- a/fs/nfs/nfs4proc.c
> +++ b/fs/nfs/nfs4proc.c
> @@ -6129,12 +6129,14 @@ static struct page **nfs4_alloc_pages(size_t size, gfp_t gfp_flags)
>  static void nfs4_layoutget_release(void *calldata)
>  {
>  	struct nfs4_layoutget *lgp = calldata;
> -	struct nfs_server *server = NFS_SERVER(lgp->args.inode);
> +	struct inode *inode = lgp->args.inode;
> +	struct nfs_server *server = NFS_SERVER(inode);
>  	size_t max_pages = max_response_pages(server);
>  
>  	dprintk("--> %s\n", __func__);
>  	nfs4_free_pages(lgp->args.layout.pages, max_pages);
>  	put_nfs_open_context(lgp->args.ctx);
> +	pnfs_put_layout_hdr(NFS_I(inode)->layout);
>  	kfree(calldata);
>  	dprintk("<-- %s\n", __func__);
>  }
> @@ -6148,7 +6150,8 @@ static const struct rpc_call_ops nfs4_layoutget_call_ops = {
>  struct pnfs_layout_segment *
>  nfs4_proc_layoutget(struct nfs4_layoutget *lgp, gfp_t gfp_flags)
>  {
> -	struct nfs_server *server = NFS_SERVER(lgp->args.inode);
> +	struct inode *inode = lgp->args.inode;
> +	struct nfs_server *server = NFS_SERVER(inode);
>  	size_t max_pages = max_response_pages(server);
>  	struct rpc_task *task;
>  	struct rpc_message msg = {
> @@ -6178,6 +6181,12 @@ nfs4_proc_layoutget(struct nfs4_layoutget *lgp, gfp_t gfp_flags)
>  	lgp->res.layoutp = &lgp->args.layout;
>  	lgp->res.seq_res.sr_slot = NULL;
>  	nfs41_init_sequence(&lgp->args.seq_args, &lgp->res.seq_res, 0);
> +
> +	spin_lock(&inode->i_lock);
> +	/* nfs4_layoutget_release calls pnfs_put_layout_hdr */
> +	pnfs_get_layout_hdr(NFS_I(inode)->layout);
> +	spin_unlock(&inode->i_lock);
> +
>  	task = rpc_run_task(&task_setup_data);
>  	if (IS_ERR(task))
>  		return ERR_CAST(task);

Thanks! Added to the bugfixes and cthon2013 branch...
diff mbox

Patch

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index eae83bf..a2de760 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -6129,12 +6129,14 @@  static struct page **nfs4_alloc_pages(size_t size, gfp_t gfp_flags)
 static void nfs4_layoutget_release(void *calldata)
 {
 	struct nfs4_layoutget *lgp = calldata;
-	struct nfs_server *server = NFS_SERVER(lgp->args.inode);
+	struct inode *inode = lgp->args.inode;
+	struct nfs_server *server = NFS_SERVER(inode);
 	size_t max_pages = max_response_pages(server);
 
 	dprintk("--> %s\n", __func__);
 	nfs4_free_pages(lgp->args.layout.pages, max_pages);
 	put_nfs_open_context(lgp->args.ctx);
+	pnfs_put_layout_hdr(NFS_I(inode)->layout);
 	kfree(calldata);
 	dprintk("<-- %s\n", __func__);
 }
@@ -6148,7 +6150,8 @@  static const struct rpc_call_ops nfs4_layoutget_call_ops = {
 struct pnfs_layout_segment *
 nfs4_proc_layoutget(struct nfs4_layoutget *lgp, gfp_t gfp_flags)
 {
-	struct nfs_server *server = NFS_SERVER(lgp->args.inode);
+	struct inode *inode = lgp->args.inode;
+	struct nfs_server *server = NFS_SERVER(inode);
 	size_t max_pages = max_response_pages(server);
 	struct rpc_task *task;
 	struct rpc_message msg = {
@@ -6178,6 +6181,12 @@  nfs4_proc_layoutget(struct nfs4_layoutget *lgp, gfp_t gfp_flags)
 	lgp->res.layoutp = &lgp->args.layout;
 	lgp->res.seq_res.sr_slot = NULL;
 	nfs41_init_sequence(&lgp->args.seq_args, &lgp->res.seq_res, 0);
+
+	spin_lock(&inode->i_lock);
+	/* nfs4_layoutget_release calls pnfs_put_layout_hdr */
+	pnfs_get_layout_hdr(NFS_I(inode)->layout);
+	spin_unlock(&inode->i_lock);
+
 	task = rpc_run_task(&task_setup_data);
 	if (IS_ERR(task))
 		return ERR_CAST(task);