Message ID | 1415833444-48129-1-git-send-email-trond.myklebust@primarydata.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Wed, 12 Nov 2014 18:04:04 -0500 Trond Myklebust <trond.myklebust@primarydata.com> wrote: > Both xprt_lookup_rqst() and xprt_complete_rqst() require that you > take the transport lock in order to avoid races with xprt_transmit(). > > Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com> > Cc: Bruce Fields <bfields@fieldses.org> > --- > net/sunrpc/svcsock.c | 27 ++++++++++++++++----------- > 1 file changed, 16 insertions(+), 11 deletions(-) > > diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c > index 3f959c681885..f9c052d508f0 100644 > --- a/net/sunrpc/svcsock.c > +++ b/net/sunrpc/svcsock.c > @@ -1019,17 +1019,12 @@ static int receive_cb_reply(struct svc_sock *svsk, struct svc_rqst *rqstp) > xid = *p++; > calldir = *p; > > - if (bc_xprt) > - req = xprt_lookup_rqst(bc_xprt, xid); > - > - if (!req) { > - printk(KERN_NOTICE > - "%s: Got unrecognized reply: " > - "calldir 0x%x xpt_bc_xprt %p xid %08x\n", > - __func__, ntohl(calldir), > - bc_xprt, ntohl(xid)); > + if (!bc_xprt) > return -EAGAIN; > - } > + spin_lock_bh(&bc_xprt->transport_lock); > + req = xprt_lookup_rqst(bc_xprt, xid); > + if (!req) > + goto unlock_notfound; > > memcpy(&req->rq_private_buf, &req->rq_rcv_buf, sizeof(struct xdr_buf)); > /* > @@ -1040,11 +1035,21 @@ static int receive_cb_reply(struct svc_sock *svsk, struct svc_rqst *rqstp) > dst = &req->rq_private_buf.head[0]; > src = &rqstp->rq_arg.head[0]; > if (dst->iov_len < src->iov_len) > - return -EAGAIN; /* whatever; just giving up. */ > + goto unlock_eagain; /* whatever; just giving up. */ > memcpy(dst->iov_base, src->iov_base, src->iov_len); > xprt_complete_rqst(req->rq_task, rqstp->rq_arg.len); > rqstp->rq_arg.len = 0; > + spin_unlock_bh(&bc_xprt->transport_lock); > return 0; > +unlock_notfound: > + printk(KERN_NOTICE > + "%s: Got unrecognized reply: " > + "calldir 0x%x xpt_bc_xprt %p xid %08x\n", > + __func__, ntohl(calldir), > + bc_xprt, ntohl(xid)); > +unlock_eagain: > + spin_unlock_bh(&bc_xprt->transport_lock); > + return -EAGAIN; > } > > static int copy_pages_to_kvecs(struct kvec *vec, struct page **pages, int len) Nice catch. It would also be good to pair this with a lockdep_assert_held() call in xprt_lookup_rqst, but that could be added in a separate patch. Reviewed-by: Jeff Layton <jlayton@primarydata.com> -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Wed, Nov 12, 2014 at 09:41:28PM -0500, Jeff Layton wrote: > On Wed, 12 Nov 2014 18:04:04 -0500 > Trond Myklebust <trond.myklebust@primarydata.com> wrote: > > > Both xprt_lookup_rqst() and xprt_complete_rqst() require that you > > take the transport lock in order to avoid races with xprt_transmit(). Thanks, looks right. Have you seen this in practice? (I'm just wondering whether it's worth a stable cc:.) --b. > > Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com> > > Cc: Bruce Fields <bfields@fieldses.org> > > --- > > net/sunrpc/svcsock.c | 27 ++++++++++++++++----------- > > 1 file changed, 16 insertions(+), 11 deletions(-) > > > > diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c > > index 3f959c681885..f9c052d508f0 100644 > > --- a/net/sunrpc/svcsock.c > > +++ b/net/sunrpc/svcsock.c > > @@ -1019,17 +1019,12 @@ static int receive_cb_reply(struct svc_sock *svsk, struct svc_rqst *rqstp) > > xid = *p++; > > calldir = *p; > > > > - if (bc_xprt) > > - req = xprt_lookup_rqst(bc_xprt, xid); > > - > > - if (!req) { > > - printk(KERN_NOTICE > > - "%s: Got unrecognized reply: " > > - "calldir 0x%x xpt_bc_xprt %p xid %08x\n", > > - __func__, ntohl(calldir), > > - bc_xprt, ntohl(xid)); > > + if (!bc_xprt) > > return -EAGAIN; > > - } > > + spin_lock_bh(&bc_xprt->transport_lock); > > + req = xprt_lookup_rqst(bc_xprt, xid); > > + if (!req) > > + goto unlock_notfound; > > > > memcpy(&req->rq_private_buf, &req->rq_rcv_buf, sizeof(struct xdr_buf)); > > /* > > @@ -1040,11 +1035,21 @@ static int receive_cb_reply(struct svc_sock *svsk, struct svc_rqst *rqstp) > > dst = &req->rq_private_buf.head[0]; > > src = &rqstp->rq_arg.head[0]; > > if (dst->iov_len < src->iov_len) > > - return -EAGAIN; /* whatever; just giving up. */ > > + goto unlock_eagain; /* whatever; just giving up. */ > > memcpy(dst->iov_base, src->iov_base, src->iov_len); > > xprt_complete_rqst(req->rq_task, rqstp->rq_arg.len); > > rqstp->rq_arg.len = 0; > > + spin_unlock_bh(&bc_xprt->transport_lock); > > return 0; > > +unlock_notfound: > > + printk(KERN_NOTICE > > + "%s: Got unrecognized reply: " > > + "calldir 0x%x xpt_bc_xprt %p xid %08x\n", > > + __func__, ntohl(calldir), > > + bc_xprt, ntohl(xid)); > > +unlock_eagain: > > + spin_unlock_bh(&bc_xprt->transport_lock); > > + return -EAGAIN; > > } > > > > static int copy_pages_to_kvecs(struct kvec *vec, struct page **pages, int len) > > Nice catch. It would also be good to pair this with a > lockdep_assert_held() call in xprt_lookup_rqst, but that could be added > in a separate patch. > > Reviewed-by: Jeff Layton <jlayton@primarydata.com> -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Nov 18, 2014, at 3:10 PM, Bruce Fields <bfields@fieldses.org> wrote: > On Wed, Nov 12, 2014 at 09:41:28PM -0500, Jeff Layton wrote: >> On Wed, 12 Nov 2014 18:04:04 -0500 >> Trond Myklebust <trond.myklebust@primarydata.com> wrote: >> >>> Both xprt_lookup_rqst() and xprt_complete_rqst() require that you >>> take the transport lock in order to avoid races with xprt_transmit(). > > Thanks, looks right. > > Have you seen this in practice? (I'm just wondering whether it's worth > a stable cc:.) Since the backchannel has a single slot, I wonder if it would be possible to race here. > --b. > >>> Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com> >>> Cc: Bruce Fields <bfields@fieldses.org> >>> --- >>> net/sunrpc/svcsock.c | 27 ++++++++++++++++----------- >>> 1 file changed, 16 insertions(+), 11 deletions(-) >>> >>> diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c >>> index 3f959c681885..f9c052d508f0 100644 >>> --- a/net/sunrpc/svcsock.c >>> +++ b/net/sunrpc/svcsock.c >>> @@ -1019,17 +1019,12 @@ static int receive_cb_reply(struct svc_sock *svsk, struct svc_rqst *rqstp) >>> xid = *p++; >>> calldir = *p; >>> >>> - if (bc_xprt) >>> - req = xprt_lookup_rqst(bc_xprt, xid); >>> - >>> - if (!req) { >>> - printk(KERN_NOTICE >>> - "%s: Got unrecognized reply: " >>> - "calldir 0x%x xpt_bc_xprt %p xid %08x\n", >>> - __func__, ntohl(calldir), >>> - bc_xprt, ntohl(xid)); >>> + if (!bc_xprt) >>> return -EAGAIN; >>> - } >>> + spin_lock_bh(&bc_xprt->transport_lock); >>> + req = xprt_lookup_rqst(bc_xprt, xid); >>> + if (!req) >>> + goto unlock_notfound; >>> >>> memcpy(&req->rq_private_buf, &req->rq_rcv_buf, sizeof(struct xdr_buf)); >>> /* >>> @@ -1040,11 +1035,21 @@ static int receive_cb_reply(struct svc_sock *svsk, struct svc_rqst *rqstp) >>> dst = &req->rq_private_buf.head[0]; >>> src = &rqstp->rq_arg.head[0]; >>> if (dst->iov_len < src->iov_len) >>> - return -EAGAIN; /* whatever; just giving up. */ >>> + goto unlock_eagain; /* whatever; just giving up. */ >>> memcpy(dst->iov_base, src->iov_base, src->iov_len); >>> xprt_complete_rqst(req->rq_task, rqstp->rq_arg.len); >>> rqstp->rq_arg.len = 0; >>> + spin_unlock_bh(&bc_xprt->transport_lock); >>> return 0; >>> +unlock_notfound: >>> + printk(KERN_NOTICE >>> + "%s: Got unrecognized reply: " >>> + "calldir 0x%x xpt_bc_xprt %p xid %08x\n", >>> + __func__, ntohl(calldir), >>> + bc_xprt, ntohl(xid)); >>> +unlock_eagain: >>> + spin_unlock_bh(&bc_xprt->transport_lock); >>> + return -EAGAIN; >>> } >>> >>> static int copy_pages_to_kvecs(struct kvec *vec, struct page **pages, int len) >> >> Nice catch. It would also be good to pair this with a >> lockdep_assert_held() call in xprt_lookup_rqst, but that could be added >> in a separate patch. >> >> Reviewed-by: Jeff Layton <jlayton@primarydata.com> > -- > To unsubscribe from this list: send the line "unsubscribe linux-nfs" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html -- Chuck Lever chuck[dot]lever[at]oracle[dot]com -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Tue, Nov 18, 2014 at 12:10 PM, Bruce Fields <bfields@fieldses.org> wrote: > On Wed, Nov 12, 2014 at 09:41:28PM -0500, Jeff Layton wrote: >> On Wed, 12 Nov 2014 18:04:04 -0500 >> Trond Myklebust <trond.myklebust@primarydata.com> wrote: >> >> > Both xprt_lookup_rqst() and xprt_complete_rqst() require that you >> > take the transport lock in order to avoid races with xprt_transmit(). > > Thanks, looks right. > > Have you seen this in practice? (I'm just wondering whether it's worth > a stable cc:.) We have a candidate Oops that shows corruption in that list in the backchannel path. I cannot guarantee that the patch is a full fix, but I believe that the race between transmit and receive is real. > --b. > >> > Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com> >> > Cc: Bruce Fields <bfields@fieldses.org> >> > --- >> > net/sunrpc/svcsock.c | 27 ++++++++++++++++----------- >> > 1 file changed, 16 insertions(+), 11 deletions(-) >> > >> > diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c >> > index 3f959c681885..f9c052d508f0 100644 >> > --- a/net/sunrpc/svcsock.c >> > +++ b/net/sunrpc/svcsock.c >> > @@ -1019,17 +1019,12 @@ static int receive_cb_reply(struct svc_sock *svsk, struct svc_rqst *rqstp) >> > xid = *p++; >> > calldir = *p; >> > >> > - if (bc_xprt) >> > - req = xprt_lookup_rqst(bc_xprt, xid); >> > - >> > - if (!req) { >> > - printk(KERN_NOTICE >> > - "%s: Got unrecognized reply: " >> > - "calldir 0x%x xpt_bc_xprt %p xid %08x\n", >> > - __func__, ntohl(calldir), >> > - bc_xprt, ntohl(xid)); >> > + if (!bc_xprt) >> > return -EAGAIN; >> > - } >> > + spin_lock_bh(&bc_xprt->transport_lock); >> > + req = xprt_lookup_rqst(bc_xprt, xid); >> > + if (!req) >> > + goto unlock_notfound; >> > >> > memcpy(&req->rq_private_buf, &req->rq_rcv_buf, sizeof(struct xdr_buf)); >> > /* >> > @@ -1040,11 +1035,21 @@ static int receive_cb_reply(struct svc_sock *svsk, struct svc_rqst *rqstp) >> > dst = &req->rq_private_buf.head[0]; >> > src = &rqstp->rq_arg.head[0]; >> > if (dst->iov_len < src->iov_len) >> > - return -EAGAIN; /* whatever; just giving up. */ >> > + goto unlock_eagain; /* whatever; just giving up. */ >> > memcpy(dst->iov_base, src->iov_base, src->iov_len); >> > xprt_complete_rqst(req->rq_task, rqstp->rq_arg.len); >> > rqstp->rq_arg.len = 0; >> > + spin_unlock_bh(&bc_xprt->transport_lock); >> > return 0; >> > +unlock_notfound: >> > + printk(KERN_NOTICE >> > + "%s: Got unrecognized reply: " >> > + "calldir 0x%x xpt_bc_xprt %p xid %08x\n", >> > + __func__, ntohl(calldir), >> > + bc_xprt, ntohl(xid)); >> > +unlock_eagain: >> > + spin_unlock_bh(&bc_xprt->transport_lock); >> > + return -EAGAIN; >> > } >> > >> > static int copy_pages_to_kvecs(struct kvec *vec, struct page **pages, int len) >> >> Nice catch. It would also be good to pair this with a >> lockdep_assert_held() call in xprt_lookup_rqst, but that could be added >> in a separate patch. >> >> Reviewed-by: Jeff Layton <jlayton@primarydata.com>
On Tue, Nov 18, 2014 at 12:14 PM, Chuck Lever <chuck.lever@oracle.com> wrote: > > On Nov 18, 2014, at 3:10 PM, Bruce Fields <bfields@fieldses.org> wrote: > >> On Wed, Nov 12, 2014 at 09:41:28PM -0500, Jeff Layton wrote: >>> On Wed, 12 Nov 2014 18:04:04 -0500 >>> Trond Myklebust <trond.myklebust@primarydata.com> wrote: >>> >>>> Both xprt_lookup_rqst() and xprt_complete_rqst() require that you >>>> take the transport lock in order to avoid races with xprt_transmit(). >> >> Thanks, looks right. >> >> Have you seen this in practice? (I'm just wondering whether it's worth >> a stable cc:.) > > Since the backchannel has a single slot, I wonder if it > would be possible to race here. You would think not, but AFAICS the back channel code uses a soft mount with a timeout of lease_period/10. In case of a timeout, the slot is just released and the next request is queued. IOW: I believe that it is perfectly possible for the client to be a little late responding to the callback, and then to have the reply there race with the timeout. Cheers Trond >> --b. >> >>>> Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com> >>>> Cc: Bruce Fields <bfields@fieldses.org> >>>> --- >>>> net/sunrpc/svcsock.c | 27 ++++++++++++++++----------- >>>> 1 file changed, 16 insertions(+), 11 deletions(-) >>>> >>>> diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c >>>> index 3f959c681885..f9c052d508f0 100644 >>>> --- a/net/sunrpc/svcsock.c >>>> +++ b/net/sunrpc/svcsock.c >>>> @@ -1019,17 +1019,12 @@ static int receive_cb_reply(struct svc_sock *svsk, struct svc_rqst *rqstp) >>>> xid = *p++; >>>> calldir = *p; >>>> >>>> - if (bc_xprt) >>>> - req = xprt_lookup_rqst(bc_xprt, xid); >>>> - >>>> - if (!req) { >>>> - printk(KERN_NOTICE >>>> - "%s: Got unrecognized reply: " >>>> - "calldir 0x%x xpt_bc_xprt %p xid %08x\n", >>>> - __func__, ntohl(calldir), >>>> - bc_xprt, ntohl(xid)); >>>> + if (!bc_xprt) >>>> return -EAGAIN; >>>> - } >>>> + spin_lock_bh(&bc_xprt->transport_lock); >>>> + req = xprt_lookup_rqst(bc_xprt, xid); >>>> + if (!req) >>>> + goto unlock_notfound; >>>> >>>> memcpy(&req->rq_private_buf, &req->rq_rcv_buf, sizeof(struct xdr_buf)); >>>> /* >>>> @@ -1040,11 +1035,21 @@ static int receive_cb_reply(struct svc_sock *svsk, struct svc_rqst *rqstp) >>>> dst = &req->rq_private_buf.head[0]; >>>> src = &rqstp->rq_arg.head[0]; >>>> if (dst->iov_len < src->iov_len) >>>> - return -EAGAIN; /* whatever; just giving up. */ >>>> + goto unlock_eagain; /* whatever; just giving up. */ >>>> memcpy(dst->iov_base, src->iov_base, src->iov_len); >>>> xprt_complete_rqst(req->rq_task, rqstp->rq_arg.len); >>>> rqstp->rq_arg.len = 0; >>>> + spin_unlock_bh(&bc_xprt->transport_lock); >>>> return 0; >>>> +unlock_notfound: >>>> + printk(KERN_NOTICE >>>> + "%s: Got unrecognized reply: " >>>> + "calldir 0x%x xpt_bc_xprt %p xid %08x\n", >>>> + __func__, ntohl(calldir), >>>> + bc_xprt, ntohl(xid)); >>>> +unlock_eagain: >>>> + spin_unlock_bh(&bc_xprt->transport_lock); >>>> + return -EAGAIN; >>>> } >>>> >>>> static int copy_pages_to_kvecs(struct kvec *vec, struct page **pages, int len) >>> >>> Nice catch. It would also be good to pair this with a >>> lockdep_assert_held() call in xprt_lookup_rqst, but that could be added >>> in a separate patch. >>> >>> Reviewed-by: Jeff Layton <jlayton@primarydata.com> >> -- >> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in >> the body of a message to majordomo@vger.kernel.org >> More majordomo info at http://vger.kernel.org/majordomo-info.html > > -- > Chuck Lever > chuck[dot]lever[at]oracle[dot]com > > >
On Tue, Nov 18, 2014 at 3:02 PM, Trond Myklebust <trond.myklebust@primarydata.com> wrote: > On Tue, Nov 18, 2014 at 12:14 PM, Chuck Lever <chuck.lever@oracle.com> wrote: >> >> On Nov 18, 2014, at 3:10 PM, Bruce Fields <bfields@fieldses.org> wrote: >> >>> On Wed, Nov 12, 2014 at 09:41:28PM -0500, Jeff Layton wrote: >>>> On Wed, 12 Nov 2014 18:04:04 -0500 >>>> Trond Myklebust <trond.myklebust@primarydata.com> wrote: >>>> >>>>> Both xprt_lookup_rqst() and xprt_complete_rqst() require that you >>>>> take the transport lock in order to avoid races with xprt_transmit(). >>> >>> Thanks, looks right. >>> >>> Have you seen this in practice? (I'm just wondering whether it's worth >>> a stable cc:.) >> >> Since the backchannel has a single slot, I wonder if it >> would be possible to race here. > > You would think not, but AFAICS the back channel code uses a soft > mount with a timeout of lease_period/10. In case of a timeout, the > slot is just released and the next request is queued. > > IOW: I believe that it is perfectly possible for the client to be a > little late responding to the callback, and then to have the reply > there race with the timeout. BTW, Bruce, I also noticed a little race condition in nfsd41_cb_get_slot(). The call to rpc_sleep_on() happens after the test_and_set_bit(), with no locking so it is quite possible to race with the clear_bit+ rpc_wake_up. If we want to do this in a lockless fashion, then we would need to re-test_and_set_bit() in nfsd41_cb_get_slot after the sleep...
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 3f959c681885..f9c052d508f0 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -1019,17 +1019,12 @@ static int receive_cb_reply(struct svc_sock *svsk, struct svc_rqst *rqstp) xid = *p++; calldir = *p; - if (bc_xprt) - req = xprt_lookup_rqst(bc_xprt, xid); - - if (!req) { - printk(KERN_NOTICE - "%s: Got unrecognized reply: " - "calldir 0x%x xpt_bc_xprt %p xid %08x\n", - __func__, ntohl(calldir), - bc_xprt, ntohl(xid)); + if (!bc_xprt) return -EAGAIN; - } + spin_lock_bh(&bc_xprt->transport_lock); + req = xprt_lookup_rqst(bc_xprt, xid); + if (!req) + goto unlock_notfound; memcpy(&req->rq_private_buf, &req->rq_rcv_buf, sizeof(struct xdr_buf)); /* @@ -1040,11 +1035,21 @@ static int receive_cb_reply(struct svc_sock *svsk, struct svc_rqst *rqstp) dst = &req->rq_private_buf.head[0]; src = &rqstp->rq_arg.head[0]; if (dst->iov_len < src->iov_len) - return -EAGAIN; /* whatever; just giving up. */ + goto unlock_eagain; /* whatever; just giving up. */ memcpy(dst->iov_base, src->iov_base, src->iov_len); xprt_complete_rqst(req->rq_task, rqstp->rq_arg.len); rqstp->rq_arg.len = 0; + spin_unlock_bh(&bc_xprt->transport_lock); return 0; +unlock_notfound: + printk(KERN_NOTICE + "%s: Got unrecognized reply: " + "calldir 0x%x xpt_bc_xprt %p xid %08x\n", + __func__, ntohl(calldir), + bc_xprt, ntohl(xid)); +unlock_eagain: + spin_unlock_bh(&bc_xprt->transport_lock); + return -EAGAIN; } static int copy_pages_to_kvecs(struct kvec *vec, struct page **pages, int len)
Both xprt_lookup_rqst() and xprt_complete_rqst() require that you take the transport lock in order to avoid races with xprt_transmit(). Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com> Cc: Bruce Fields <bfields@fieldses.org> --- net/sunrpc/svcsock.c | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-)