diff mbox

NFS over RDMA crashing

Message ID 531B47B3.1070503@opengridcomputing.com (mailing list archive)
State New, archived
Headers show

Commit Message

Steve Wise March 8, 2014, 4:39 p.m. UTC
On 3/7/2014 2:41 PM, Steve Wise wrote:
>>> Does this help?
>>>
>>> They must have added this for some reason, but I'm not seeing how it
>>> could have ever done anything....
>>>
>>> --b.
>>>
>>> diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
>>> b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
>>> index 0ce7552..e8f25ec 100644
>>> --- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
>>> +++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
>>> @@ -520,13 +520,6 @@ next_sge:
>>>   	for (ch_no = 0; &rqstp->rq_pages[ch_no] < rqstp->rq_respages;
>>> ch_no++)
>>>   		rqstp->rq_pages[ch_no] = NULL;
>>>
>>> -	/*
>>> -	 * Detach res pages. If svc_release sees any it will attempt to
>>> -	 * put them.
>>> -	 */
>>> -	while (rqstp->rq_next_page != rqstp->rq_respages)
>>> -		*(--rqstp->rq_next_page) = NULL;
>>> -
>>>   	return err;
>>>   }
>>>
>> I can reproduce this server crash readily on a recent net-next tree.
> I
>> added the above change, and see a different crash:
>>
>> [  192.764773] BUG: unable to handle kernel paging request at
>> 0000100000000000
>> [  192.765688] IP: [<ffffffff8113c159>] put_page+0x9/0x50
>> [  192.765688] PGD 0
>> [  192.765688] Oops: 0000 [#1] SMP DEBUG_PAGEALLOC
>> [  192.765688] Modules linked in: nfsd lockd nfs_acl exportfs
>> auth_rpcgss oid_registry svcrdma tg3 ip6table_filter ip6_tables
>> ebtable_nat ebtables nf_conntrack_ipv4 nf_defrag_ipv4 xt_state
>> nf_conntrack ipt_REJECT xt_CHECKSUM iptable_mangle iptable_filter
>> ip_tables bridge stp llc autofs4 sunrpc rdma_ucm rdma_cm iw_cm
> ib_ipoib
>> ib_cm ib_uverbs ib_umad iw_nes libcrc32c iw_cxgb4 iw_cxgb3 cxgb3 mdio
>> ib_qib dca mlx4_en ib_mthca vhost_net macvtap macvlan vhost tun
>> kvm_intel kvm uinput ipmi_si ipmi_msghandler iTCO_wdt
>> iTCO_vendor_support dcdbas sg microcode pcspkr mlx4_ib ib_sa serio_raw
>> ib_mad ib_core ib_addr ipv6 ptp pps_core lpc_ich mfd_core i5100_edac
>> edac_core mlx4_core cxgb4 ext4 jbd2 mbcache sd_mod crc_t10dif
>> crct10dif_common sr_mod cdrom pata_acpi ata_generic ata_piix radeon
>> ttm
>> drm_kms_helper drm i2c_algo_bit
>> [  192.765688]  i2c_core dm_mirror dm_region_hash dm_log dm_mod
>> [last
>> unloaded: tg3]
>> [  192.765688] CPU: 1 PID: 6590 Comm: nfsd Not tainted
>> 3.14.0-rc3-pending+ #5
>> [  192.765688] Hardware name: Dell Inc. PowerEdge R300/0TY179, BIOS
>> 1.3.0 08/15/2008
>> [  192.765688] task: ffff8800b75c62c0 ti: ffff8801faa4a000 task.ti:
>> ffff8801faa4a000
>> [  192.765688] RIP: 0010:[<ffffffff8113c159>]  [<ffffffff8113c159>]
>> put_page+0x9/0x50
>> [  192.765688] RSP: 0018:ffff8801faa4be28  EFLAGS: 00010206
>> [  192.765688] RAX: ffff8801fa9542a8 RBX: ffff8801fa954000 RCX:
>> 0000000000000001
>> [  192.765688] RDX: ffff8801fa953e10 RSI: 0000000000000200 RDI:
>> 0000100000000000
>> [  192.765688] RBP: ffff8801faa4be28 R08: 000000009b8d39b9 R09:
>> 0000000000000017
>> [  192.765688] R10: 0000000000000000 R11: 0000000000000000 R12:
>> ffff8800cb2e7c00
>> [  192.765688] R13: ffff8801fa954210 R14: 0000000000000000 R15:
>> 0000000000000000
>> [  192.765688] FS:  0000000000000000(0000) GS:ffff88022ec80000(0000)
>> knlGS:0000000000000000
>> [  192.765688] CS:  0010 DS: 0000 ES: 0000 CR0: 000000008005003b
>> [  192.765688] CR2: 0000100000000000 CR3: 00000000b9a5a000 CR4:
>> 00000000000007e0
>> [  192.765688] Stack:
>> [  192.765688]  ffff8801faa4be58 ffffffffa0881f4e ffff880204dd0e00
>> ffff8801fa954000
>> [  192.765688]  ffff880204dd0e00 ffff8800cb2e7c00 ffff8801faa4be88
>> ffffffffa08825f5
>> [  192.765688]  ffff8801fa954000 ffff8800b75c62c0 ffffffff81ae5ac0
>> ffffffffa08cf930
>> [  192.765688] Call Trace:
>> [  192.765688]  [<ffffffffa0881f4e>] svc_xprt_release+0x6e/0xf0
> [sunrpc]
>> [  192.765688]  [<ffffffffa08825f5>] svc_recv+0x165/0x190 [sunrpc]
>> [  192.765688]  [<ffffffffa08cf930>] ?
> nfsd_pool_stats_release+0x60/0x60
>> [nfsd]
>> [  192.765688]  [<ffffffffa08cf9e5>] nfsd+0xb5/0x160 [nfsd]
>> [  192.765688]  [<ffffffffa08cf930>] ?
> nfsd_pool_stats_release+0x60/0x60
>> [nfsd]
>> [  192.765688]  [<ffffffff8107471e>] kthread+0xce/0xf0
>> [  192.765688]  [<ffffffff81074650>] ?
>> kthread_freezable_should_stop+0x70/0x70
>> [  192.765688]  [<ffffffff81584e2c>] ret_from_fork+0x7c/0xb0
>> [  192.765688]  [<ffffffff81074650>] ?
>> kthread_freezable_should_stop+0x70/0x70
>> [  192.765688] Code: 8d 7b 10 e8 ea fa ff ff 48 c7 03 00 00 00 00 48
> 83
>> c4 08 5b c9 c3 66 66 66 2e 0f 1f 84 00 00 00 00 00 55 48 89 e5 66 66
> 66
>> 66 90 <66> f7 07 00 c0 75 32 8b 47 1c 48 8d 57 1c 85 c0 74 1c f0 ff 0a
>> [  192.765688] RIP  [<ffffffff8113c159>] put_page+0x9/0x50
>> [  192.765688]  RSP <ffff8801faa4be28>
>> [  192.765688] CR2: 0000100000000000
>> crash>
> This new crash is here calling put_page() on garbage I guess:
>
> static inline void svc_free_res_pages(struct svc_rqst *rqstp)
> {
>          while (rqstp->rq_next_page != rqstp->rq_respages) {
>                  struct page **pp = --rqstp->rq_next_page;
>                  if (*pp) {
>                          put_page(*pp);
>                          *pp = NULL;
>                  }
>          }
> }
>   

I removed your change and started debugging original crash that happens 
on top-o-tree.   Seems like rq_next_pages is screwed up.  It should 
always be >= rq_respages, yes?  I added a BUG_ON() to assert this in 
rdma_read_xdr() we hit the BUG_ON().  Look

crash> svc_rqst.rq_next_page 0xffff8800b84e6000
   rq_next_page = 0xffff8800b84e6228
crash> svc_rqst.rq_respages 0xffff8800b84e6000
   rq_respages = 0xffff8800b84e62a8

Any ideas Bruce/Tom?

Here are the BUG_ON()s I added:



Here's the stack:

Backtrace:
#   0: [RSP: 0xffff88020540d970, RIP: 0xffffffff8103c994] machine_kexec 
(struct kimage * arg = 0xffff880223b26c00)
#   1: [RSP: 0xffff88020540da40, RIP: 0xffffffff810d1e98] crash_kexec 
(struct pt_regs * arg = 0xffff88020540dba8)
#   2: [RSP: 0xffff88020540da70, RIP: 0xffffffff8157d650] oops_end 
(unsigned long arg = 0x296, struct pt_regs * arg = 0xffff88020540dba8, 
int arg = 0x1)
#   3: [RSP: 0xffff88020540daa0, RIP: 0xffffffff810072fb] die (const 
char * arg = 0xffffffff817d2d0a, struct pt_regs * arg = 
0xffff88020540dba8, long arg = 0x0)
#   4: [RSP: 0xffff88020540db00, RIP: 0xffffffff8157d19b] do_trap (int 
arg = 0x6, int arg = 0x4, char * arg = 0xffffffff817d2d0a, struct 
pt_regs * arg = 0xffff88020540dba8, long arg = 0x0, siginfo_t * arg = 
0xffff88020540db08)
#   5: [RSP: 0xffff88020540dba0, RIP: 0xffffffff81004555] do_invalid_op 
(struct pt_regs * arg = 0xffff88020540dba8, long arg = 0x0)
#   6: [RSP: 0xffff88020540dc50, RIP: 0xffffffff815863c8] invalid_op (void)
#   7: [RSP: 0xffff88020540ddc0, RIP: 0xffffffffa0521b93] rdma_read_xdr 
(struct svcxprt_rdma * arg = 0xffff88022273bc00, struct rpcrdma_msg * 
arg = 0xffff88020b6a6000, struct svc_rqst * arg = 0xffff8800b84e6000, 
struct svc_rdma_op_ctxt * arg = 0xffff88022397e300)
#   8: [RSP: 0xffff88020540de20, RIP: 0xffffffffa0521da4] 
svc_rdma_recvfrom (struct svc_rqst * arg = 0xffff8800b84e6000)
#   9: [RSP: 0xffff88020540de60, RIP: 0xffffffffa0881bca] 
svc_handle_xprt (struct svc_rqst * arg = 0xffff8800b84e6000, struct 
svc_xprt * arg = unknown)
#  10: [RSP: 0xffff88020540de90, RIP: 0xffffffffa0882577] svc_recv 
(struct svc_rqst * arg = 0xffff8800b84e6000, long arg = 0x36ee80)
crash> svc_rqst.rq_next_page 0xffff8800b84e6000
   rq_next_page = 0xffff8800b84e6228
crash> svc_rqst.rq_respages 0xffff8800b84e6000
   rq_respages = 0xffff8800b84e62a8


--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 04e7632..ab91905 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -339,6 +339,7 @@  xdr_ressize_check(struct svc_rqst *rqstp, __be32 *p)

  static inline void svc_free_res_pages(struct svc_rqst *rqstp)
  {
+       BUG_ON((unsigned long)rqstp->rq_next_page < (unsigned 
long)rqstp->rq_respages);
         while (rqstp->rq_next_page != rqstp->rq_respages) {
                 struct page **pp = --rqstp->rq_next_page;
                 if (*pp) {
diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c 
b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
index 0ce7552..fa49d40 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
@@ -524,6 +524,7 @@  next_sge:
          * Detach res pages. If svc_release sees any it will attempt to
          * put them.
          */
+       BUG_ON((unsigned long)rqstp->rq_next_page < (unsigned 
long)rqstp->rq_respages);
         while (rqstp->rq_next_page != rqstp->rq_respages)
                 *(--rqstp->rq_next_page) = NULL;