Message ID | 20180220164229.65404-11-kolga@netapp.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Tue, Feb 20, 2018 at 11:42:29AM -0500, Olga Kornievskaia wrote: > If client is shutting down and there are still async copies going > on, then stop queued async copies. > > Signed-off-by: Olga Kornievskaia <kolga@netapp.com> > --- > fs/nfsd/nfs4proc.c | 13 +++++++++++++ > fs/nfsd/nfs4state.c | 1 + > fs/nfsd/state.h | 1 + > 3 files changed, 15 insertions(+) > > diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c > index 57d0daa..54039e4 100644 > --- a/fs/nfsd/nfs4proc.c > +++ b/fs/nfsd/nfs4proc.c > @@ -1096,6 +1096,19 @@ static void nfs4_put_copy(struct nfsd4_copy *copy) > kfree(copy); > } > > +void nfsd4_shutdown_copy(struct nfs4_client *clp) > +{ > + struct nfsd4_copy *copy; > + > + spin_lock(&clp->async_lock); > + list_for_each_entry(copy, &clp->async_copies, copies) { > + set_tsk_thread_flag(copy->copy_task, TIF_SIGPENDING); > + kthread_stop(copy->copy_task); > + nfs4_put_copy(copy); Those three lines could go in a common helper to be called here and from nfsd4_offload_cancel(). --b. > + } > + spin_unlock(&clp->async_lock); > +} > + > static void nfsd4_cb_offload_release(struct nfsd4_callback *cb) > { > struct nfsd4_copy *copy = container_of(cb, struct nfsd4_copy, cp_cb); > diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c > index bd76bd1..f9f2383 100644 > --- a/fs/nfsd/nfs4state.c > +++ b/fs/nfsd/nfs4state.c > @@ -1965,6 +1965,7 @@ static __be32 mark_client_expired_locked(struct nfs4_client *clp) > release_openowner(oo); > } > nfsd4_return_all_client_layouts(clp); > + nfsd4_shutdown_copy(clp); > nfsd4_shutdown_callback(clp); > if (clp->cl_cb_conn.cb_xprt) > svc_xprt_put(clp->cl_cb_conn.cb_xprt); > diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h > index 49709d1..131aefa 100644 > --- a/fs/nfsd/state.h > +++ b/fs/nfsd/state.h > @@ -646,6 +646,7 @@ extern void nfsd4_init_cb(struct nfsd4_callback *cb, struct nfs4_client *clp, > extern int nfsd4_create_callback_queue(void); > extern void nfsd4_destroy_callback_queue(void); > extern void nfsd4_shutdown_callback(struct nfs4_client *); > +extern void nfsd4_shutdown_copy(struct nfs4_client *clp); > extern void nfsd4_prepare_cb_recall(struct nfs4_delegation *dp); > extern struct nfs4_client_reclaim *nfs4_client_to_reclaim(const char *name, > struct nfsd_net *nn); > -- > 1.8.3.1 -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Thu, Mar 8, 2018 at 12:05 PM, J. Bruce Fields <bfields@fieldses.org> wrote: > On Tue, Feb 20, 2018 at 11:42:29AM -0500, Olga Kornievskaia wrote: >> If client is shutting down and there are still async copies going >> on, then stop queued async copies. >> >> Signed-off-by: Olga Kornievskaia <kolga@netapp.com> >> --- >> fs/nfsd/nfs4proc.c | 13 +++++++++++++ >> fs/nfsd/nfs4state.c | 1 + >> fs/nfsd/state.h | 1 + >> 3 files changed, 15 insertions(+) >> >> diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c >> index 57d0daa..54039e4 100644 >> --- a/fs/nfsd/nfs4proc.c >> +++ b/fs/nfsd/nfs4proc.c >> @@ -1096,6 +1096,19 @@ static void nfs4_put_copy(struct nfsd4_copy *copy) >> kfree(copy); >> } >> >> +void nfsd4_shutdown_copy(struct nfs4_client *clp) >> +{ >> + struct nfsd4_copy *copy; >> + >> + spin_lock(&clp->async_lock); >> + list_for_each_entry(copy, &clp->async_copies, copies) { >> + set_tsk_thread_flag(copy->copy_task, TIF_SIGPENDING); >> + kthread_stop(copy->copy_task); >> + nfs4_put_copy(copy); > > Those three lines could go in a common helper to be called here and from > nfsd4_offload_cancel(). Hi Bruce, I have a question. I’m testing how the code works in nfsd4_shutdown_copy. I have disabled canceling the copy (just returning ok) and so then the client can stop its copy and do the close and then I can issue an unmount. What I see is that the server returns “err_delay” until the copy finishes. That’s because in nfsd4_destroy_clientid() it calls mark_client_expired_locked() which checks the refcount on the client structure and since copy holds a reference server returns err_delay. Once the copy finished and decrements the reference, and nfsd4_destroy_clientid() gets past that then calling nfsd4_shutdown_copy() doesn’t find any copies. Should the logic of nfsd4_destroy_clientid() be changed to stop copies then instead of during destruction of the client structure? > > --b. > >> + } >> + spin_unlock(&clp->async_lock); >> +} >> + >> static void nfsd4_cb_offload_release(struct nfsd4_callback *cb) >> { >> struct nfsd4_copy *copy = container_of(cb, struct nfsd4_copy, cp_cb); >> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c >> index bd76bd1..f9f2383 100644 >> --- a/fs/nfsd/nfs4state.c >> +++ b/fs/nfsd/nfs4state.c >> @@ -1965,6 +1965,7 @@ static __be32 mark_client_expired_locked(struct nfs4_client *clp) >> release_openowner(oo); >> } >> nfsd4_return_all_client_layouts(clp); >> + nfsd4_shutdown_copy(clp); >> nfsd4_shutdown_callback(clp); >> if (clp->cl_cb_conn.cb_xprt) >> svc_xprt_put(clp->cl_cb_conn.cb_xprt); >> diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h >> index 49709d1..131aefa 100644 >> --- a/fs/nfsd/state.h >> +++ b/fs/nfsd/state.h >> @@ -646,6 +646,7 @@ extern void nfsd4_init_cb(struct nfsd4_callback *cb, struct nfs4_client *clp, >> extern int nfsd4_create_callback_queue(void); >> extern void nfsd4_destroy_callback_queue(void); >> extern void nfsd4_shutdown_callback(struct nfs4_client *); >> +extern void nfsd4_shutdown_copy(struct nfs4_client *clp); >> extern void nfsd4_prepare_cb_recall(struct nfs4_delegation *dp); >> extern struct nfs4_client_reclaim *nfs4_client_to_reclaim(const char *name, >> struct nfsd_net *nn); >> -- >> 1.8.3.1 > -- > To unsubscribe from this list: send the line "unsubscribe linux-nfs" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Tue, Apr 10, 2018 at 04:09:09PM -0400, Olga Kornievskaia wrote: > I have a question. I’m testing how the code works in > nfsd4_shutdown_copy. I have disabled canceling the copy (just > returning ok) and so then the client can stop its copy and do the > close and then I can issue an unmount. What I see is that the server > returns “err_delay” until the copy finishes. That’s because in > nfsd4_destroy_clientid() it calls mark_client_expired_locked() which > checks the refcount on the client structure and since copy holds a > reference server returns err_delay. Once the copy finished and > decrements the reference, and nfsd4_destroy_clientid() gets past that > then calling nfsd4_shutdown_copy() doesn’t find any copies. > > Should the logic of nfsd4_destroy_clientid() be changed to stop copies > then instead of during destruction of the client structure? Oh, good question, I don't know. Thinking about it.... DESTROY_CLIENTID doesn't throw away all the client's state for it, it's only meant to be called after the client has already cleaned up everything else. So: https://tools.ietf.org/html/rfc5661#section-18.50.3 If there are sessions (both idle and non-idle), opens, locks, delegations, layouts, and/or wants (Section 18.49) associated with the unexpired lease of the client ID, the server MUST return NFS4ERR_CLIENTID_BUSY. My feeling is that "ongoing copies" also belongs on that list. So the server behavior you're seeing sounds correct to me--the client should cancel any ongoing copies before calling DESTROY_CLIENTID. --b. -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Tue, Apr 10, 2018 at 4:21 PM, J. Bruce Fields <bfields@redhat.com> wrote: > On Tue, Apr 10, 2018 at 04:09:09PM -0400, Olga Kornievskaia wrote: >> I have a question. I’m testing how the code works in >> nfsd4_shutdown_copy. I have disabled canceling the copy (just >> returning ok) and so then the client can stop its copy and do the >> close and then I can issue an unmount. What I see is that the server >> returns “err_delay” until the copy finishes. That’s because in >> nfsd4_destroy_clientid() it calls mark_client_expired_locked() which >> checks the refcount on the client structure and since copy holds a >> reference server returns err_delay. Once the copy finished and >> decrements the reference, and nfsd4_destroy_clientid() gets past that >> then calling nfsd4_shutdown_copy() doesn’t find any copies. >> >> Should the logic of nfsd4_destroy_clientid() be changed to stop copies >> then instead of during destruction of the client structure? > > Oh, good question, I don't know. Thinking about it.... > > DESTROY_CLIENTID doesn't throw away all the client's state for it, it's > only meant to be called after the client has already cleaned up > everything else. So: > > https://tools.ietf.org/html/rfc5661#section-18.50.3 > > If there are sessions (both idle and non-idle), opens, locks, > delegations, layouts, and/or wants (Section 18.49) associated > with the unexpired lease of the client ID, the server MUST > return NFS4ERR_CLIENTID_BUSY. > > My feeling is that "ongoing copies" also belongs on that list. > > So the server behavior you're seeing sounds correct to me--the client > should cancel any ongoing copies before calling DESTROY_CLIENTID. If the behavior of returning ERR_DELAY until the copy is done is correct one, then I don't think I need this patch at all. Since copy takes a reference on the nfs4_client structure, then in __destroy_client() where nfsd4_shutdown_copy() is called the list will always be empty. -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Tue, Apr 10, 2018 at 05:07:02PM -0400, Olga Kornievskaia wrote: > On Tue, Apr 10, 2018 at 4:21 PM, J. Bruce Fields <bfields@redhat.com> wrote: > > On Tue, Apr 10, 2018 at 04:09:09PM -0400, Olga Kornievskaia wrote: > >> I have a question. I’m testing how the code works in > >> nfsd4_shutdown_copy. I have disabled canceling the copy (just > >> returning ok) and so then the client can stop its copy and do the > >> close and then I can issue an unmount. What I see is that the server > >> returns “err_delay” until the copy finishes. That’s because in > >> nfsd4_destroy_clientid() it calls mark_client_expired_locked() which > >> checks the refcount on the client structure and since copy holds a > >> reference server returns err_delay. Once the copy finished and > >> decrements the reference, and nfsd4_destroy_clientid() gets past that > >> then calling nfsd4_shutdown_copy() doesn’t find any copies. > >> > >> Should the logic of nfsd4_destroy_clientid() be changed to stop copies > >> then instead of during destruction of the client structure? > > > > Oh, good question, I don't know. Thinking about it.... > > > > DESTROY_CLIENTID doesn't throw away all the client's state for it, it's > > only meant to be called after the client has already cleaned up > > everything else. So: > > > > https://tools.ietf.org/html/rfc5661#section-18.50.3 > > > > If there are sessions (both idle and non-idle), opens, locks, > > delegations, layouts, and/or wants (Section 18.49) associated > > with the unexpired lease of the client ID, the server MUST > > return NFS4ERR_CLIENTID_BUSY. > > > > My feeling is that "ongoing copies" also belongs on that list. > > > > So the server behavior you're seeing sounds correct to me--the client > > should cancel any ongoing copies before calling DESTROY_CLIENTID. > > If the behavior of returning ERR_DELAY until the copy is done is > correct one, then I don't think I need this patch at all. Since copy > takes a reference on the nfs4_client structure, then in > __destroy_client() where nfsd4_shutdown_copy() is called the list will > always be empty. Actually I guess it should be returning CLIENTID_BUSY. Maybe that's a preexisting bug. We do still need to be able to handle the case where the lease expires. I guess the copy needs to be forcibly canceled in that case. --b. -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Tue, Apr 10, 2018 at 05:13:07PM -0400, J. Bruce Fields wrote: > On Tue, Apr 10, 2018 at 05:07:02PM -0400, Olga Kornievskaia wrote: > > On Tue, Apr 10, 2018 at 4:21 PM, J. Bruce Fields <bfields@redhat.com> wrote: > > > DESTROY_CLIENTID doesn't throw away all the client's state for it, it's > > > only meant to be called after the client has already cleaned up > > > everything else. So: > > > > > > https://tools.ietf.org/html/rfc5661#section-18.50.3 > > > > > > If there are sessions (both idle and non-idle), opens, locks, > > > delegations, layouts, and/or wants (Section 18.49) associated > > > with the unexpired lease of the client ID, the server MUST > > > return NFS4ERR_CLIENTID_BUSY. > > > > > > My feeling is that "ongoing copies" also belongs on that list. And come to think of it we should actually be adding that check to client_has_state()--it should return clientid_busy if there are any copies in progress. > > > So the server behavior you're seeing sounds correct to me--the client > > > should cancel any ongoing copies before calling DESTROY_CLIENTID. > > > > If the behavior of returning ERR_DELAY until the copy is done is > > correct one, then I don't think I need this patch at all. Since copy > > takes a reference on the nfs4_client structure, then in > > __destroy_client() where nfsd4_shutdown_copy() is called the list will > > always be empty. > > Actually I guess it should be returning CLIENTID_BUSY. Maybe that's a > preexisting bug. So the copy should be caught by the earlier client_has_state() check before it gets to the later mark_client_expired_locked(). And after reminding myself how this works.... We only hold references on clients temporarily such as while we're actually processing an RPC from a client. An elevated cl_refcount prevents the server from removing the client even after the lease expires, or after the client reboots and attempts to clear its old state with a new EXCHANGE_ID/CREATE_SESSION. I don't think that's what we want. Clients still need to renew their lease in the usual way, a long-running async copy doesn't keep the lease renewed automatically. So, the asynchronous copy shouldn't hold a reference on the client. The copy thread can still safely use the client while it's running, because it knows that anyone destroying the client will first cancel the copy and wait for the thread to die. --b. -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Wed, Apr 11, 2018 at 1:33 PM, J. Bruce Fields <bfields@redhat.com> wrote: > On Tue, Apr 10, 2018 at 05:13:07PM -0400, J. Bruce Fields wrote: >> On Tue, Apr 10, 2018 at 05:07:02PM -0400, Olga Kornievskaia wrote: >> > On Tue, Apr 10, 2018 at 4:21 PM, J. Bruce Fields <bfields@redhat.com> wrote: >> > > DESTROY_CLIENTID doesn't throw away all the client's state for it, it's >> > > only meant to be called after the client has already cleaned up >> > > everything else. So: >> > > >> > > https://tools.ietf.org/html/rfc5661#section-18.50.3 >> > > >> > > If there are sessions (both idle and non-idle), opens, locks, >> > > delegations, layouts, and/or wants (Section 18.49) associated >> > > with the unexpired lease of the client ID, the server MUST >> > > return NFS4ERR_CLIENTID_BUSY. >> > > >> > > My feeling is that "ongoing copies" also belongs on that list. > > And come to think of it we should actually be adding that check to > client_has_state()--it should return clientid_busy if there are any > copies in progress. > >> > > So the server behavior you're seeing sounds correct to me--the client >> > > should cancel any ongoing copies before calling DESTROY_CLIENTID. >> > >> > If the behavior of returning ERR_DELAY until the copy is done is >> > correct one, then I don't think I need this patch at all. Since copy >> > takes a reference on the nfs4_client structure, then in >> > __destroy_client() where nfsd4_shutdown_copy() is called the list will >> > always be empty. >> >> Actually I guess it should be returning CLIENTID_BUSY. Maybe that's a >> preexisting bug. > > So the copy should be caught by the earlier client_has_state() check > before it gets to the later mark_client_expired_locked(). > > And after reminding myself how this works.... We only hold references > on clients temporarily such as while we're actually processing an RPC > from a client. > > An elevated cl_refcount prevents the server from removing the client > even after the lease expires, or after the client reboots and attempts > to clear its old state with a new EXCHANGE_ID/CREATE_SESSION. I don't > think that's what we want. Clients still need to renew their lease in > the usual way, a long-running async copy doesn't keep the lease renewed > automatically. > > So, the asynchronous copy shouldn't hold a reference on the client. > > The copy thread can still safely use the client while it's running, > because it knows that anyone destroying the client will first cancel the > copy and wait for the thread to die. Ok no reference on the client. I will add a check to client_has_state() to check for on-going copies and then server would return CLIENTID_BUSY. What was already in the patch was that during client shutdown it would stop copies. I have tested for when the server is expiring client's lease, it also shuts down any on-going copies. I think I'm ready for the next version submission... -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Thu, Apr 12, 2018 at 04:05:39PM -0400, Olga Kornievskaia wrote: > On Wed, Apr 11, 2018 at 1:33 PM, J. Bruce Fields <bfields@redhat.com> wrote: > > On Tue, Apr 10, 2018 at 05:13:07PM -0400, J. Bruce Fields wrote: > >> On Tue, Apr 10, 2018 at 05:07:02PM -0400, Olga Kornievskaia wrote: > >> > On Tue, Apr 10, 2018 at 4:21 PM, J. Bruce Fields <bfields@redhat.com> wrote: > >> > > DESTROY_CLIENTID doesn't throw away all the client's state for it, it's > >> > > only meant to be called after the client has already cleaned up > >> > > everything else. So: > >> > > > >> > > https://tools.ietf.org/html/rfc5661#section-18.50.3 > >> > > > >> > > If there are sessions (both idle and non-idle), opens, locks, > >> > > delegations, layouts, and/or wants (Section 18.49) associated > >> > > with the unexpired lease of the client ID, the server MUST > >> > > return NFS4ERR_CLIENTID_BUSY. > >> > > > >> > > My feeling is that "ongoing copies" also belongs on that list. > > > > And come to think of it we should actually be adding that check to > > client_has_state()--it should return clientid_busy if there are any > > copies in progress. > > > >> > > So the server behavior you're seeing sounds correct to me--the client > >> > > should cancel any ongoing copies before calling DESTROY_CLIENTID. > >> > > >> > If the behavior of returning ERR_DELAY until the copy is done is > >> > correct one, then I don't think I need this patch at all. Since copy > >> > takes a reference on the nfs4_client structure, then in > >> > __destroy_client() where nfsd4_shutdown_copy() is called the list will > >> > always be empty. > >> > >> Actually I guess it should be returning CLIENTID_BUSY. Maybe that's a > >> preexisting bug. > > > > So the copy should be caught by the earlier client_has_state() check > > before it gets to the later mark_client_expired_locked(). > > > > And after reminding myself how this works.... We only hold references > > on clients temporarily such as while we're actually processing an RPC > > from a client. > > > > An elevated cl_refcount prevents the server from removing the client > > even after the lease expires, or after the client reboots and attempts > > to clear its old state with a new EXCHANGE_ID/CREATE_SESSION. I don't > > think that's what we want. Clients still need to renew their lease in > > the usual way, a long-running async copy doesn't keep the lease renewed > > automatically. > > > > So, the asynchronous copy shouldn't hold a reference on the client. > > > > The copy thread can still safely use the client while it's running, > > because it knows that anyone destroying the client will first cancel the > > copy and wait for the thread to die. > > Ok no reference on the client. I will add a check to > client_has_state() to check for on-going copies and then server would > return CLIENTID_BUSY. What was already in the patch was that during > client shutdown it would stop copies. I have tested for when the > server is expiring client's lease, it also shuts down any on-going > copies. I think I'm ready for the next version submission... OK, great.--b. -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 57d0daa..54039e4 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1096,6 +1096,19 @@ static void nfs4_put_copy(struct nfsd4_copy *copy) kfree(copy); } +void nfsd4_shutdown_copy(struct nfs4_client *clp) +{ + struct nfsd4_copy *copy; + + spin_lock(&clp->async_lock); + list_for_each_entry(copy, &clp->async_copies, copies) { + set_tsk_thread_flag(copy->copy_task, TIF_SIGPENDING); + kthread_stop(copy->copy_task); + nfs4_put_copy(copy); + } + spin_unlock(&clp->async_lock); +} + static void nfsd4_cb_offload_release(struct nfsd4_callback *cb) { struct nfsd4_copy *copy = container_of(cb, struct nfsd4_copy, cp_cb); diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index bd76bd1..f9f2383 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -1965,6 +1965,7 @@ static __be32 mark_client_expired_locked(struct nfs4_client *clp) release_openowner(oo); } nfsd4_return_all_client_layouts(clp); + nfsd4_shutdown_copy(clp); nfsd4_shutdown_callback(clp); if (clp->cl_cb_conn.cb_xprt) svc_xprt_put(clp->cl_cb_conn.cb_xprt); diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index 49709d1..131aefa 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -646,6 +646,7 @@ extern void nfsd4_init_cb(struct nfsd4_callback *cb, struct nfs4_client *clp, extern int nfsd4_create_callback_queue(void); extern void nfsd4_destroy_callback_queue(void); extern void nfsd4_shutdown_callback(struct nfs4_client *); +extern void nfsd4_shutdown_copy(struct nfs4_client *clp); extern void nfsd4_prepare_cb_recall(struct nfs4_delegation *dp); extern struct nfs4_client_reclaim *nfs4_client_to_reclaim(const char *name, struct nfsd_net *nn);
If client is shutting down and there are still async copies going on, then stop queued async copies. Signed-off-by: Olga Kornievskaia <kolga@netapp.com> --- fs/nfsd/nfs4proc.c | 13 +++++++++++++ fs/nfsd/nfs4state.c | 1 + fs/nfsd/state.h | 1 + 3 files changed, 15 insertions(+)