Message ID | 1404143423-24381-4-git-send-email-jlayton@primarydata.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Mon, Jun 30, 2014 at 11:48:32AM -0400, Jeff Layton wrote: > There's a fair chance we won't ever need this work struct, so we might > as well delay initializing it until just before we're going to use it. > In a later patch, we'll need to intialize the work with a different > function for delegation callbacks. By delaying this, we avoid having > to do it twice. When I run ./nfs4.1/testserver.py f19:/exports/xfs/FUBAR --maketree --rundeps COMP1 I get crashes like: BUG: unable to handle kernel NULL pointer dereference at 0000000000000008 IP: [<ffffffff810890b1>] process_one_work+0x31/0x500 PGD 0 Oops: 0000 [#1] PREEMPT SMP DEBUG_PAGEALLOC Modules linked in: nfsd auth_rpcgss oid_registry nfs_acl lockd sunrpc CPU: 2 PID: 30 Comm: kworker/u8:1 Not tainted 3.16.0-rc2-00023-g30c1d16 #3051 Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011 task: ffff88003d9cc810 ti: ffff88003d9d0000 task.ti: ffff88003d9d0000 RIP: 0010:[<ffffffff810890b1>] [<ffffffff810890b1>] process_one_work+0x31/0x500 RSP: 0018:ffff88003d9d3d68 EFLAGS: 00010046 RAX: 0000000000000100 RBX: ffff88003d9caf80 RCX: dead000000200200 RDX: 0000000000000100 RSI: ffff880031429b38 RDI: ffff88003d9caf80 RBP: ffff88003d9d3dd8 R08: ffff88003dcb7800 R09: 0000000001c40000 R10: 0000000000000000 R11: 000000000006b5b0 R12: ffff88003dcb7800 R13: 0000000000000000 R14: ffff880031429b38 R15: ffff88003d9caf80 FS: 0000000000000000(0000) GS:ffff88003fb00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b CR2: 0000000000000138 CR3: 0000000027744000 CR4: 00000000000006e0 Stack: 000000003d9d3d88 ffff88003dcb7818 ffff88003d9cafb0 ffff88003dcb7800 ffff88003dcb7800 ffff88003dcb7800 ffff88003dcb7800 ffff88003d9cafb0 ffff88003d9d3dd8 ffff88003dcb7800 ffff88003dcb7848 ffff88003d9cafb0 Call Trace: [<ffffffff81089b9b>] worker_thread+0x11b/0x4f0 [<ffffffff81089a80>] ? init_pwq+0x190/0x190 [<ffffffff81090ac4>] kthread+0xe4/0x100 [<ffffffff810909e0>] ? __init_kthread_worker+0x70/0x70 [<ffffffff81a4ca2c>] ret_from_fork+0x7c/0xb0 [<ffffffff810909e0>] ? __init_kthread_worker+0x70/0x70 Code: 48 89 e5 41 57 41 56 49 89 f6 41 55 45 31 ed 41 54 53 48 89 fb 48 83 ec 48 48 8b 06 4c 8b 67 48 48 89 c2 30 d2 a8 04 4c 0f 45 ea <49> 8b 45 08 48 c7 45 b8 00 00 00 00 48 c7 45 c0 00 00 00 00 8b RIP [<ffffffff810890b1>] process_one_work+0x31/0x500 RSP <ffff88003d9d3d68> CR2: 0000000000000008 and I'm suspicious of this: > @@ -763,6 +764,8 @@ static void do_probe_callback(struct nfs4_client *clp) > > cb->cb_ops = &nfsd4_cb_probe_ops; > > + INIT_WORK(&cb->cb_work, nfsd4_do_callback_rpc); > + > run_nfsd4_cb(cb); > } This is called all over the place, possibly multiple times on the same client, so we're calling INIT_WORK on something that may already be in use--I doubt that's safe. I'm backing out this patch for now. --b. -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Tue, 8 Jul 2014 17:11:34 -0400 "J. Bruce Fields" <bfields@fieldses.org> wrote: > On Mon, Jun 30, 2014 at 11:48:32AM -0400, Jeff Layton wrote: > > There's a fair chance we won't ever need this work struct, so we might > > as well delay initializing it until just before we're going to use it. > > In a later patch, we'll need to intialize the work with a different > > function for delegation callbacks. By delaying this, we avoid having > > to do it twice. > > When I run > > ./nfs4.1/testserver.py f19:/exports/xfs/FUBAR --maketree --rundeps COMP1 > > I get crashes like: > > BUG: unable to handle kernel NULL pointer dereference at 0000000000000008 > IP: [<ffffffff810890b1>] process_one_work+0x31/0x500 > PGD 0 > Oops: 0000 [#1] PREEMPT SMP DEBUG_PAGEALLOC > Modules linked in: nfsd auth_rpcgss oid_registry nfs_acl lockd sunrpc > CPU: 2 PID: 30 Comm: kworker/u8:1 Not tainted 3.16.0-rc2-00023-g30c1d16 #3051 > Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011 > task: ffff88003d9cc810 ti: ffff88003d9d0000 task.ti: ffff88003d9d0000 > RIP: 0010:[<ffffffff810890b1>] [<ffffffff810890b1>] process_one_work+0x31/0x500 > RSP: 0018:ffff88003d9d3d68 EFLAGS: 00010046 > RAX: 0000000000000100 RBX: ffff88003d9caf80 RCX: dead000000200200 > RDX: 0000000000000100 RSI: ffff880031429b38 RDI: ffff88003d9caf80 > RBP: ffff88003d9d3dd8 R08: ffff88003dcb7800 R09: 0000000001c40000 > R10: 0000000000000000 R11: 000000000006b5b0 R12: ffff88003dcb7800 > R13: 0000000000000000 R14: ffff880031429b38 R15: ffff88003d9caf80 > FS: 0000000000000000(0000) GS:ffff88003fb00000(0000) knlGS:0000000000000000 > CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b > CR2: 0000000000000138 CR3: 0000000027744000 CR4: 00000000000006e0 > Stack: > 000000003d9d3d88 ffff88003dcb7818 ffff88003d9cafb0 ffff88003dcb7800 > ffff88003dcb7800 ffff88003dcb7800 ffff88003dcb7800 ffff88003d9cafb0 > ffff88003d9d3dd8 ffff88003dcb7800 ffff88003dcb7848 ffff88003d9cafb0 > Call Trace: > [<ffffffff81089b9b>] worker_thread+0x11b/0x4f0 > [<ffffffff81089a80>] ? init_pwq+0x190/0x190 > [<ffffffff81090ac4>] kthread+0xe4/0x100 > [<ffffffff810909e0>] ? __init_kthread_worker+0x70/0x70 > [<ffffffff81a4ca2c>] ret_from_fork+0x7c/0xb0 > [<ffffffff810909e0>] ? __init_kthread_worker+0x70/0x70 > Code: 48 89 e5 41 57 41 56 49 89 f6 41 55 45 31 ed 41 54 53 48 89 fb 48 83 ec 48 48 8b 06 4c 8b 67 48 48 89 c2 30 d2 a8 04 4c 0f 45 ea <49> 8b 45 08 48 c7 45 b8 00 00 00 00 48 c7 45 c0 00 00 00 00 8b > RIP [<ffffffff810890b1>] process_one_work+0x31/0x500 > RSP <ffff88003d9d3d68> > CR2: 0000000000000008 > > and I'm suspicious of this: > > > @@ -763,6 +764,8 @@ static void do_probe_callback(struct nfs4_client *clp) > > > > cb->cb_ops = &nfsd4_cb_probe_ops; > > > > + INIT_WORK(&cb->cb_work, nfsd4_do_callback_rpc); > > + > > run_nfsd4_cb(cb); > > } > > This is called all over the place, possibly multiple times on the same > client, so we're calling INIT_WORK on something that may already be in > use--I doubt that's safe. > Doh! That makes total sense, and yes it would almost certainly wreak havoc to reinitialize a workqueue job that has already been queued. > I'm backing out this patch for now. > Yes, please do. Christoph, I think we'll just have to live with the double initialization here. Cheers,
On Tue, Jul 08, 2014 at 05:11:34PM -0400, J. Bruce Fields wrote: > BUG: unable to handle kernel NULL pointer dereference at 0000000000000008 > IP: [<ffffffff810890b1>] process_one_work+0x31/0x500 > PGD 0 > Oops: 0000 [#1] PREEMPT SMP DEBUG_PAGEALLOC > Modules linked in: nfsd auth_rpcgss oid_registry nfs_acl lockd sunrpc > CPU: 2 PID: 30 Comm: kworker/u8:1 Not tainted 3.16.0-rc2-00023-g30c1d16 #3051 > Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011 > task: ffff88003d9cc810 ti: ffff88003d9d0000 task.ti: ffff88003d9d0000 > RIP: 0010:[<ffffffff810890b1>] [<ffffffff810890b1>] process_one_work+0x31/0x500 > RSP: 0018:ffff88003d9d3d68 EFLAGS: 00010046 > RAX: 0000000000000100 RBX: ffff88003d9caf80 RCX: dead000000200200 > RDX: 0000000000000100 RSI: ffff880031429b38 RDI: ffff88003d9caf80 > RBP: ffff88003d9d3dd8 R08: ffff88003dcb7800 R09: 0000000001c40000 > R10: 0000000000000000 R11: 000000000006b5b0 R12: ffff88003dcb7800 > R13: 0000000000000000 R14: ffff880031429b38 R15: ffff88003d9caf80 > FS: 0000000000000000(0000) GS:ffff88003fb00000(0000) knlGS:0000000000000000 > CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b > CR2: 0000000000000138 CR3: 0000000027744000 CR4: 00000000000006e0 > Stack: > 000000003d9d3d88 ffff88003dcb7818 ffff88003d9cafb0 ffff88003dcb7800 > ffff88003dcb7800 ffff88003dcb7800 ffff88003dcb7800 ffff88003d9cafb0 > ffff88003d9d3dd8 ffff88003dcb7800 ffff88003dcb7848 ffff88003d9cafb0 > Call Trace: > [<ffffffff81089b9b>] worker_thread+0x11b/0x4f0 > [<ffffffff81089a80>] ? init_pwq+0x190/0x190 > [<ffffffff81090ac4>] kthread+0xe4/0x100 > [<ffffffff810909e0>] ? __init_kthread_worker+0x70/0x70 > [<ffffffff81a4ca2c>] ret_from_fork+0x7c/0xb0 > [<ffffffff810909e0>] ? __init_kthread_worker+0x70/0x70 > Code: 48 89 e5 41 57 41 56 49 89 f6 41 55 45 31 ed 41 54 53 48 89 fb 48 83 ec 48 48 8b 06 4c 8b 67 48 48 89 c2 30 d2 a8 04 4c 0f 45 ea <49> 8b 45 08 48 c7 45 b8 00 00 00 00 48 c7 45 c0 00 00 00 00 8b > RIP [<ffffffff810890b1>] process_one_work+0x31/0x500 > RSP <ffff88003d9d3d68> > CR2: 0000000000000008 > > and I'm suspicious of this: > > > @@ -763,6 +764,8 @@ static void do_probe_callback(struct nfs4_client *clp) > > > > cb->cb_ops = &nfsd4_cb_probe_ops; > > > > + INIT_WORK(&cb->cb_work, nfsd4_do_callback_rpc); > > + > > run_nfsd4_cb(cb); > > } > > This is called all over the place, possibly multiple times on the same > client, so we're calling INIT_WORK on something that may already be in > use--I doubt that's safe. > > I'm backing out this patch for now. If that's the case all of do_probe_callback seems very fishy to me, as it scrambles all over the callback structure. I guess we need to move more of it to an init function then, and have different init functions for the different callbacks. -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Wed, Jul 09, 2014 at 01:21:21AM -0700, Christoph Hellwig wrote: > On Tue, Jul 08, 2014 at 05:11:34PM -0400, J. Bruce Fields wrote: > > BUG: unable to handle kernel NULL pointer dereference at 0000000000000008 > > IP: [<ffffffff810890b1>] process_one_work+0x31/0x500 > > PGD 0 > > Oops: 0000 [#1] PREEMPT SMP DEBUG_PAGEALLOC > > Modules linked in: nfsd auth_rpcgss oid_registry nfs_acl lockd sunrpc > > CPU: 2 PID: 30 Comm: kworker/u8:1 Not tainted 3.16.0-rc2-00023-g30c1d16 #3051 > > Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011 > > task: ffff88003d9cc810 ti: ffff88003d9d0000 task.ti: ffff88003d9d0000 > > RIP: 0010:[<ffffffff810890b1>] [<ffffffff810890b1>] process_one_work+0x31/0x500 > > RSP: 0018:ffff88003d9d3d68 EFLAGS: 00010046 > > RAX: 0000000000000100 RBX: ffff88003d9caf80 RCX: dead000000200200 > > RDX: 0000000000000100 RSI: ffff880031429b38 RDI: ffff88003d9caf80 > > RBP: ffff88003d9d3dd8 R08: ffff88003dcb7800 R09: 0000000001c40000 > > R10: 0000000000000000 R11: 000000000006b5b0 R12: ffff88003dcb7800 > > R13: 0000000000000000 R14: ffff880031429b38 R15: ffff88003d9caf80 > > FS: 0000000000000000(0000) GS:ffff88003fb00000(0000) knlGS:0000000000000000 > > CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b > > CR2: 0000000000000138 CR3: 0000000027744000 CR4: 00000000000006e0 > > Stack: > > 000000003d9d3d88 ffff88003dcb7818 ffff88003d9cafb0 ffff88003dcb7800 > > ffff88003dcb7800 ffff88003dcb7800 ffff88003dcb7800 ffff88003d9cafb0 > > ffff88003d9d3dd8 ffff88003dcb7800 ffff88003dcb7848 ffff88003d9cafb0 > > Call Trace: > > [<ffffffff81089b9b>] worker_thread+0x11b/0x4f0 > > [<ffffffff81089a80>] ? init_pwq+0x190/0x190 > > [<ffffffff81090ac4>] kthread+0xe4/0x100 > > [<ffffffff810909e0>] ? __init_kthread_worker+0x70/0x70 > > [<ffffffff81a4ca2c>] ret_from_fork+0x7c/0xb0 > > [<ffffffff810909e0>] ? __init_kthread_worker+0x70/0x70 > > Code: 48 89 e5 41 57 41 56 49 89 f6 41 55 45 31 ed 41 54 53 48 89 fb 48 83 ec 48 48 8b 06 4c 8b 67 48 48 89 c2 30 d2 a8 04 4c 0f 45 ea <49> 8b 45 08 48 c7 45 b8 00 00 00 00 48 c7 45 c0 00 00 00 00 8b > > RIP [<ffffffff810890b1>] process_one_work+0x31/0x500 > > RSP <ffff88003d9d3d68> > > CR2: 0000000000000008 > > > > and I'm suspicious of this: > > > > > @@ -763,6 +764,8 @@ static void do_probe_callback(struct nfs4_client *clp) > > > > > > cb->cb_ops = &nfsd4_cb_probe_ops; > > > > > > + INIT_WORK(&cb->cb_work, nfsd4_do_callback_rpc); > > > + > > > run_nfsd4_cb(cb); > > > } > > > > This is called all over the place, possibly multiple times on the same > > client, so we're calling INIT_WORK on something that may already be in > > use--I doubt that's safe. > > > > I'm backing out this patch for now. > > If that's the case all of do_probe_callback seems very fishy to me, as > it scrambles all over the callback structure. I guess we need to move > more of it to an init function then, and have different init functions > for the different callbacks. Taking a look.... It does look fishy, but those fields are constant, so I don't see a bug. In delegation recall case (nfsd4_cb_recall()) those fields aren't constant, but we guarantee it's only called once. --b. -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Wed, 9 Jul 2014 15:41:14 -0400 "J. Bruce Fields" <bfields@fieldses.org> wrote: > On Wed, Jul 09, 2014 at 01:21:21AM -0700, Christoph Hellwig wrote: > > On Tue, Jul 08, 2014 at 05:11:34PM -0400, J. Bruce Fields wrote: > > > BUG: unable to handle kernel NULL pointer dereference at 0000000000000008 > > > IP: [<ffffffff810890b1>] process_one_work+0x31/0x500 > > > PGD 0 > > > Oops: 0000 [#1] PREEMPT SMP DEBUG_PAGEALLOC > > > Modules linked in: nfsd auth_rpcgss oid_registry nfs_acl lockd sunrpc > > > CPU: 2 PID: 30 Comm: kworker/u8:1 Not tainted 3.16.0-rc2-00023-g30c1d16 #3051 > > > Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011 > > > task: ffff88003d9cc810 ti: ffff88003d9d0000 task.ti: ffff88003d9d0000 > > > RIP: 0010:[<ffffffff810890b1>] [<ffffffff810890b1>] process_one_work+0x31/0x500 > > > RSP: 0018:ffff88003d9d3d68 EFLAGS: 00010046 > > > RAX: 0000000000000100 RBX: ffff88003d9caf80 RCX: dead000000200200 > > > RDX: 0000000000000100 RSI: ffff880031429b38 RDI: ffff88003d9caf80 > > > RBP: ffff88003d9d3dd8 R08: ffff88003dcb7800 R09: 0000000001c40000 > > > R10: 0000000000000000 R11: 000000000006b5b0 R12: ffff88003dcb7800 > > > R13: 0000000000000000 R14: ffff880031429b38 R15: ffff88003d9caf80 > > > FS: 0000000000000000(0000) GS:ffff88003fb00000(0000) knlGS:0000000000000000 > > > CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b > > > CR2: 0000000000000138 CR3: 0000000027744000 CR4: 00000000000006e0 > > > Stack: > > > 000000003d9d3d88 ffff88003dcb7818 ffff88003d9cafb0 ffff88003dcb7800 > > > ffff88003dcb7800 ffff88003dcb7800 ffff88003dcb7800 ffff88003d9cafb0 > > > ffff88003d9d3dd8 ffff88003dcb7800 ffff88003dcb7848 ffff88003d9cafb0 > > > Call Trace: > > > [<ffffffff81089b9b>] worker_thread+0x11b/0x4f0 > > > [<ffffffff81089a80>] ? init_pwq+0x190/0x190 > > > [<ffffffff81090ac4>] kthread+0xe4/0x100 > > > [<ffffffff810909e0>] ? __init_kthread_worker+0x70/0x70 > > > [<ffffffff81a4ca2c>] ret_from_fork+0x7c/0xb0 > > > [<ffffffff810909e0>] ? __init_kthread_worker+0x70/0x70 > > > Code: 48 89 e5 41 57 41 56 49 89 f6 41 55 45 31 ed 41 54 53 48 89 fb 48 83 ec 48 48 8b 06 4c 8b 67 48 48 89 c2 30 d2 a8 04 4c 0f 45 ea <49> 8b 45 08 48 c7 45 b8 00 00 00 00 48 c7 45 c0 00 00 00 00 8b > > > RIP [<ffffffff810890b1>] process_one_work+0x31/0x500 > > > RSP <ffff88003d9d3d68> > > > CR2: 0000000000000008 > > > > > > and I'm suspicious of this: > > > > > > > @@ -763,6 +764,8 @@ static void do_probe_callback(struct nfs4_client *clp) > > > > > > > > cb->cb_ops = &nfsd4_cb_probe_ops; > > > > > > > > + INIT_WORK(&cb->cb_work, nfsd4_do_callback_rpc); > > > > + > > > > run_nfsd4_cb(cb); > > > > } > > > > > > This is called all over the place, possibly multiple times on the same > > > client, so we're calling INIT_WORK on something that may already be in > > > use--I doubt that's safe. > > > > > > I'm backing out this patch for now. > > > > If that's the case all of do_probe_callback seems very fishy to me, as > > it scrambles all over the callback structure. I guess we need to move > > more of it to an init function then, and have different init functions > > for the different callbacks. > > Taking a look.... It does look fishy, but those fields are constant, so > I don't see a bug. > > In delegation recall case (nfsd4_cb_recall()) those fields aren't > constant, but we guarantee it's only called once. > Yes. I'm inclined to leave well enough alone on this and to leave any cleanup in this area to Christoph since he said he was overhauling that code anyway. I didn't see where you had reverted the patch in your repo, so I went ahead and did it and then rebased the series on top of the revert. There were some merge conflicts, but I at least was able to get rid of the double INIT_WORK calls in the case of a delegation (and do some related cleanup in this area): http://git.samba.org/?p=jlayton/linux.git;a=commitdiff;h=60b61375d6a84e74bf1c3c7c230712721e14773d http://git.samba.org/?p=jlayton/linux.git;a=commitdiff;h=491192c3e6f0966722c34ba36adfde7575640544 Unfortunately there are a few (fairly trivial) merge conflicts later in the series due to this change. Bruce, do you want me to repost the whole set, or would you rather just cherry-pick them from my updated branch?
On Wed, Jul 09, 2014 at 04:37:44PM -0400, Jeff Layton wrote: > On Wed, 9 Jul 2014 15:41:14 -0400 > "J. Bruce Fields" <bfields@fieldses.org> wrote: > > > On Wed, Jul 09, 2014 at 01:21:21AM -0700, Christoph Hellwig wrote: > > > On Tue, Jul 08, 2014 at 05:11:34PM -0400, J. Bruce Fields wrote: > > > > BUG: unable to handle kernel NULL pointer dereference at 0000000000000008 > > > > IP: [<ffffffff810890b1>] process_one_work+0x31/0x500 > > > > PGD 0 > > > > Oops: 0000 [#1] PREEMPT SMP DEBUG_PAGEALLOC > > > > Modules linked in: nfsd auth_rpcgss oid_registry nfs_acl lockd sunrpc > > > > CPU: 2 PID: 30 Comm: kworker/u8:1 Not tainted 3.16.0-rc2-00023-g30c1d16 #3051 > > > > Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011 > > > > task: ffff88003d9cc810 ti: ffff88003d9d0000 task.ti: ffff88003d9d0000 > > > > RIP: 0010:[<ffffffff810890b1>] [<ffffffff810890b1>] process_one_work+0x31/0x500 > > > > RSP: 0018:ffff88003d9d3d68 EFLAGS: 00010046 > > > > RAX: 0000000000000100 RBX: ffff88003d9caf80 RCX: dead000000200200 > > > > RDX: 0000000000000100 RSI: ffff880031429b38 RDI: ffff88003d9caf80 > > > > RBP: ffff88003d9d3dd8 R08: ffff88003dcb7800 R09: 0000000001c40000 > > > > R10: 0000000000000000 R11: 000000000006b5b0 R12: ffff88003dcb7800 > > > > R13: 0000000000000000 R14: ffff880031429b38 R15: ffff88003d9caf80 > > > > FS: 0000000000000000(0000) GS:ffff88003fb00000(0000) knlGS:0000000000000000 > > > > CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b > > > > CR2: 0000000000000138 CR3: 0000000027744000 CR4: 00000000000006e0 > > > > Stack: > > > > 000000003d9d3d88 ffff88003dcb7818 ffff88003d9cafb0 ffff88003dcb7800 > > > > ffff88003dcb7800 ffff88003dcb7800 ffff88003dcb7800 ffff88003d9cafb0 > > > > ffff88003d9d3dd8 ffff88003dcb7800 ffff88003dcb7848 ffff88003d9cafb0 > > > > Call Trace: > > > > [<ffffffff81089b9b>] worker_thread+0x11b/0x4f0 > > > > [<ffffffff81089a80>] ? init_pwq+0x190/0x190 > > > > [<ffffffff81090ac4>] kthread+0xe4/0x100 > > > > [<ffffffff810909e0>] ? __init_kthread_worker+0x70/0x70 > > > > [<ffffffff81a4ca2c>] ret_from_fork+0x7c/0xb0 > > > > [<ffffffff810909e0>] ? __init_kthread_worker+0x70/0x70 > > > > Code: 48 89 e5 41 57 41 56 49 89 f6 41 55 45 31 ed 41 54 53 48 89 fb 48 83 ec 48 48 8b 06 4c 8b 67 48 48 89 c2 30 d2 a8 04 4c 0f 45 ea <49> 8b 45 08 48 c7 45 b8 00 00 00 00 48 c7 45 c0 00 00 00 00 8b > > > > RIP [<ffffffff810890b1>] process_one_work+0x31/0x500 > > > > RSP <ffff88003d9d3d68> > > > > CR2: 0000000000000008 > > > > > > > > and I'm suspicious of this: > > > > > > > > > @@ -763,6 +764,8 @@ static void do_probe_callback(struct nfs4_client *clp) > > > > > > > > > > cb->cb_ops = &nfsd4_cb_probe_ops; > > > > > > > > > > + INIT_WORK(&cb->cb_work, nfsd4_do_callback_rpc); > > > > > + > > > > > run_nfsd4_cb(cb); > > > > > } > > > > > > > > This is called all over the place, possibly multiple times on the same > > > > client, so we're calling INIT_WORK on something that may already be in > > > > use--I doubt that's safe. > > > > > > > > I'm backing out this patch for now. > > > > > > If that's the case all of do_probe_callback seems very fishy to me, as > > > it scrambles all over the callback structure. I guess we need to move > > > more of it to an init function then, and have different init functions > > > for the different callbacks. > > > > Taking a look.... It does look fishy, but those fields are constant, so > > I don't see a bug. > > > > In delegation recall case (nfsd4_cb_recall()) those fields aren't > > constant, but we guarantee it's only called once. > > > > Yes. I'm inclined to leave well enough alone on this and to leave any > cleanup in this area to Christoph since he said he was overhauling that > code anyway. > > I didn't see where you had reverted the patch in your repo, Yeah, reverted locally but I had a testing problem that's kept me from pushing it out yet. > so I went > ahead and did it and then rebased the series on top of the revert. > There were some merge conflicts, but I at least was able to get rid of > the double INIT_WORK calls in the case of a delegation (and do some > related cleanup in this area): > > http://git.samba.org/?p=jlayton/linux.git;a=commitdiff;h=60b61375d6a84e74bf1c3c7c230712721e14773d > http://git.samba.org/?p=jlayton/linux.git;a=commitdiff;h=491192c3e6f0966722c34ba36adfde7575640544 > > Unfortunately there are a few (fairly trivial) merge conflicts later in > the series due to this change. Bruce, do you want me to repost the > whole set, or would you rather just cherry-pick them from my updated > branch? Let's spare everyone a reposting and I'll see how far I can get with fixing up the conflicts myself and/or cherry-picking from your updated branch. --b. -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Wed, Jul 09, 2014 at 04:37:44PM -0400, Jeff Layton wrote: > I didn't see where you had reverted the patch in your repo, so I went > ahead and did it and then rebased the series on top of the revert. > There were some merge conflicts, but I at least was able to get rid of > the double INIT_WORK calls in the case of a delegation (and do some > related cleanup in this area): This looks reasonable to me. -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
> Unfortunately there are a few (fairly trivial) merge conflicts later in > the series due to this change. Bruce, do you want me to repost the > whole set, or would you rather just cherry-pick them from my updated > branch? Can you resend just the whole fi_fds and access/deny mode patches as a series for the next step? This seems useful and complicated enough to do a standalone review. I also don't think the additional few spinlocks would have enough performance impact to avoid them until the big client lock is gone, although all those logic changes could probably be done easily enough without the new locking if someone cared enough (I don't). -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Thu, 10 Jul 2014 03:53:43 -0700 Christoph Hellwig <hch@infradead.org> wrote: > > Unfortunately there are a few (fairly trivial) merge conflicts later in > > the series due to this change. Bruce, do you want me to repost the > > whole set, or would you rather just cherry-pick them from my updated > > branch? > > Can you resend just the whole fi_fds and access/deny mode patches as a > series for the next step? This seems useful and complicated enough to > do a standalone review. I also don't think the additional few spinlocks > would have enough performance impact to avoid them until the big client > lock is gone, although all those logic changes could probably be done > easily enough without the new locking if someone cared enough (I don't). > Yes, that sounds reasonable. I'll go ahead and move the patches necessary for it to the front of the series and address your comments and we can see about getting them merged ahead of the rest. I agree that the extra spinlocking probably won't matter while the client_mutex is in play. If it does, then not needing to walk the whole stateids list in order to do deny mode enforcement may help make up for it.
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index 2c73cae9899d..00cb9b7a75f6 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -43,6 +43,7 @@ #define NFSDDBG_FACILITY NFSDDBG_PROC static void nfsd4_mark_cb_fault(struct nfs4_client *, int reason); +static void nfsd4_do_callback_rpc(struct work_struct *w); #define NFSPROC4_CB_NULL 0 #define NFSPROC4_CB_COMPOUND 1 @@ -763,6 +764,8 @@ static void do_probe_callback(struct nfs4_client *clp) cb->cb_ops = &nfsd4_cb_probe_ops; + INIT_WORK(&cb->cb_work, nfsd4_do_callback_rpc); + run_nfsd4_cb(cb); } @@ -1031,11 +1034,6 @@ static void nfsd4_do_callback_rpc(struct work_struct *w) cb->cb_ops, cb); } -void nfsd4_init_callback(struct nfsd4_callback *cb) -{ - INIT_WORK(&cb->cb_work, nfsd4_do_callback_rpc); -} - void nfsd4_cb_recall(struct nfs4_delegation *dp) { struct nfsd4_callback *cb = &dp->dl_recall; @@ -1053,5 +1051,7 @@ void nfsd4_cb_recall(struct nfs4_delegation *dp) INIT_LIST_HEAD(&cb->cb_per_client); cb->cb_done = true; + INIT_WORK(&cb->cb_work, nfsd4_do_callback_rpc); + run_nfsd4_cb(&dp->dl_recall); } diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 29788fd0da24..b49b46b0ce23 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -466,7 +466,6 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_ol_stateid *stp, struct sv fh_copy_shallow(&dp->dl_fh, ¤t_fh->fh_handle); dp->dl_time = 0; atomic_set(&dp->dl_count, 1); - nfsd4_init_callback(&dp->dl_recall); return dp; } @@ -1472,7 +1471,6 @@ static struct nfs4_client *create_client(struct xdr_netobj name, spin_unlock(&nn->client_lock); return NULL; } - nfsd4_init_callback(&clp->cl_cb_null); clp->cl_time = get_seconds(); clear_bit(0, &clp->cl_cb_slot_busy); copy_verf(clp, verf); diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index 374c66283ac5..9447f86f2778 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -464,7 +464,6 @@ extern struct nfs4_client_reclaim *nfsd4_find_reclaim_client(const char *recdir, struct nfsd_net *nn); extern __be32 nfs4_check_open_reclaim(clientid_t *clid, bool sessions, struct nfsd_net *nn); extern int set_callback_cred(void); -extern void nfsd4_init_callback(struct nfsd4_callback *); extern void nfsd4_probe_callback(struct nfs4_client *clp); extern void nfsd4_probe_callback_sync(struct nfs4_client *clp); extern void nfsd4_change_callback(struct nfs4_client *clp, struct nfs4_cb_conn *);