Message ID | 20241217192742.97306-1-okorniev@redhat.com (mailing list archive) |
---|---|
State | Changes Requested |
Headers | show |
Series | [1/1] NFSD: fix management of pending async copies | expand |
On 12/17/24 2:27 PM, Olga Kornievskaia wrote: Needs a problem statement. I suggest something like: "Currently the pending_async_copies count is decremented just before a struct nfsd4_copy is destroyed. But now that nfsd4_copy structures stick around for 10 lease periods after the COPY itself has completed, the pending_async_copies count stays high for a long time. This causes NFSD to avoid the use of background copy even though the actual background copy workload might no longer be running." > Consider async copy done once it's done processing the copy work. Doesn't nfsd4_stop_copy() need to decrement as well, if it finds that the kthread is still running? > Fixes: aa0ebd21df9c ("NFSD: Add nfsd4_copy time-to-live") > Signed-off-by: Olga Kornievskaia <okorniev@redhat.com> > --- > fs/nfsd/nfs4proc.c | 13 ++++++++----- > 1 file changed, 8 insertions(+), 5 deletions(-) > > diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c > index f8a10f90bc7a..ad44ad49274f 100644 > --- a/fs/nfsd/nfs4proc.c > +++ b/fs/nfsd/nfs4proc.c > @@ -1347,7 +1347,6 @@ static void nfs4_put_copy(struct nfsd4_copy *copy) > { > if (!refcount_dec_and_test(©->refcount)) > return; > - atomic_dec(©->cp_nn->pending_async_copies); > kfree(copy->cp_src); > kfree(copy); > } > @@ -1870,6 +1869,7 @@ static int nfsd4_do_async_copy(void *data) > set_bit(NFSD4_COPY_F_COMPLETED, ©->cp_flags); > trace_nfsd_copy_async_done(copy); > nfsd4_send_cb_offload(copy); > + atomic_dec(©->cp_nn->pending_async_copies); > return 0; > } > > @@ -1927,19 +1927,19 @@ nfsd4_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, > /* Arbitrary cap on number of pending async copy operations */ > if (atomic_inc_return(&nn->pending_async_copies) > > (int)rqstp->rq_pool->sp_nrthreads) > - goto out_err; > + goto out_dec_async_copy_err; > async_copy->cp_src = kmalloc(sizeof(*async_copy->cp_src), GFP_KERNEL); > if (!async_copy->cp_src) > - goto out_err; > + goto out_dec_async_copy_err; > if (!nfs4_init_copy_state(nn, copy)) > - goto out_err; > + goto out_dec_async_copy_err; > memcpy(&result->cb_stateid, ©->cp_stateid.cs_stid, > sizeof(result->cb_stateid)); > dup_copy_fields(copy, async_copy); > async_copy->copy_task = kthread_create(nfsd4_do_async_copy, > async_copy, "%s", "copy thread"); > if (IS_ERR(async_copy->copy_task)) > - goto out_err; > + goto out_dec_async_copy_err; > spin_lock(&async_copy->cp_clp->async_lock); > list_add(&async_copy->copies, > &async_copy->cp_clp->async_copies); > @@ -1954,6 +1954,9 @@ nfsd4_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, > trace_nfsd_copy_done(copy, status); > release_copy_files(copy); > return status; > +out_dec_async_copy_err: > + if (async_copy) > + atomic_dec(&nn->pending_async_copies); > out_err: > if (nfsd4_ssc_is_inter(copy)) { > /*
On Tue, Dec 17, 2024 at 2:45 PM Chuck Lever <chuck.lever@oracle.com> wrote: > > On 12/17/24 2:27 PM, Olga Kornievskaia wrote: > > Needs a problem statement. I suggest something like: > > "Currently the pending_async_copies count is decremented just before > a struct nfsd4_copy is destroyed. But now that nfsd4_copy structures > stick around for 10 lease periods after the COPY itself has completed, > the pending_async_copies count stays high for a long time. This causes > NFSD to avoid the use of background copy even though the actual > background copy workload might no longer be running." Sure I can re-submit with this change in the comment. > > Consider async copy done once it's done processing the copy work. > > Doesn't nfsd4_stop_copy() need to decrement as well, if it finds that > the kthread is still running? I thought so. So I tested it. kthread_stop() sends a signal to the thread (thread needs to check kthread_should_stop() which we do) that it wants to exit and then waits for its completion. That means the copy thread runs its course and by doing so decrements the pending count. Otherwise, I found out, we do it twice (and end up negative). > > > > Fixes: aa0ebd21df9c ("NFSD: Add nfsd4_copy time-to-live") > > Signed-off-by: Olga Kornievskaia <okorniev@redhat.com> > > --- > > fs/nfsd/nfs4proc.c | 13 ++++++++----- > > 1 file changed, 8 insertions(+), 5 deletions(-) > > > > diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c > > index f8a10f90bc7a..ad44ad49274f 100644 > > --- a/fs/nfsd/nfs4proc.c > > +++ b/fs/nfsd/nfs4proc.c > > @@ -1347,7 +1347,6 @@ static void nfs4_put_copy(struct nfsd4_copy *copy) > > { > > if (!refcount_dec_and_test(©->refcount)) > > return; > > - atomic_dec(©->cp_nn->pending_async_copies); > > kfree(copy->cp_src); > > kfree(copy); > > } > > @@ -1870,6 +1869,7 @@ static int nfsd4_do_async_copy(void *data) > > set_bit(NFSD4_COPY_F_COMPLETED, ©->cp_flags); > > trace_nfsd_copy_async_done(copy); > > nfsd4_send_cb_offload(copy); > > + atomic_dec(©->cp_nn->pending_async_copies); > > return 0; > > } > > > > @@ -1927,19 +1927,19 @@ nfsd4_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, > > /* Arbitrary cap on number of pending async copy operations */ > > if (atomic_inc_return(&nn->pending_async_copies) > > > (int)rqstp->rq_pool->sp_nrthreads) > > - goto out_err; > > + goto out_dec_async_copy_err; > > async_copy->cp_src = kmalloc(sizeof(*async_copy->cp_src), GFP_KERNEL); > > if (!async_copy->cp_src) > > - goto out_err; > > + goto out_dec_async_copy_err; > > if (!nfs4_init_copy_state(nn, copy)) > > - goto out_err; > > + goto out_dec_async_copy_err; > > memcpy(&result->cb_stateid, ©->cp_stateid.cs_stid, > > sizeof(result->cb_stateid)); > > dup_copy_fields(copy, async_copy); > > async_copy->copy_task = kthread_create(nfsd4_do_async_copy, > > async_copy, "%s", "copy thread"); > > if (IS_ERR(async_copy->copy_task)) > > - goto out_err; > > + goto out_dec_async_copy_err; > > spin_lock(&async_copy->cp_clp->async_lock); > > list_add(&async_copy->copies, > > &async_copy->cp_clp->async_copies); > > @@ -1954,6 +1954,9 @@ nfsd4_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, > > trace_nfsd_copy_done(copy, status); > > release_copy_files(copy); > > return status; > > +out_dec_async_copy_err: > > + if (async_copy) > > + atomic_dec(&nn->pending_async_copies); > > out_err: > > if (nfsd4_ssc_is_inter(copy)) { > > /* > > > -- > Chuck Lever >
On 12/17/24 3:47 PM, Olga Kornievskaia wrote: > On Tue, Dec 17, 2024 at 2:45 PM Chuck Lever <chuck.lever@oracle.com> wrote: >> >> On 12/17/24 2:27 PM, Olga Kornievskaia wrote: >> >> Needs a problem statement. I suggest something like: >> >> "Currently the pending_async_copies count is decremented just before >> a struct nfsd4_copy is destroyed. But now that nfsd4_copy structures >> stick around for 10 lease periods after the COPY itself has completed, >> the pending_async_copies count stays high for a long time. This causes >> NFSD to avoid the use of background copy even though the actual >> background copy workload might no longer be running." > > Sure I can re-submit with this change in the comment. > >>> Consider async copy done once it's done processing the copy work. >> >> Doesn't nfsd4_stop_copy() need to decrement as well, if it finds that >> the kthread is still running? > > I thought so. So I tested it. kthread_stop() sends a signal to the > thread (thread needs to check kthread_should_stop() which we do) that > it wants to exit and then waits for its completion. That means the > copy thread runs its course and by doing so decrements the pending > count. Otherwise, I found out, we do it twice (and end up negative). Thanks, that sounds good. No change needed, then. Send a v2, and I can plunk it into nfsd-fixes for broader testing. >>> Fixes: aa0ebd21df9c ("NFSD: Add nfsd4_copy time-to-live") >>> Signed-off-by: Olga Kornievskaia <okorniev@redhat.com> >>> --- >>> fs/nfsd/nfs4proc.c | 13 ++++++++----- >>> 1 file changed, 8 insertions(+), 5 deletions(-) >>> >>> diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c >>> index f8a10f90bc7a..ad44ad49274f 100644 >>> --- a/fs/nfsd/nfs4proc.c >>> +++ b/fs/nfsd/nfs4proc.c >>> @@ -1347,7 +1347,6 @@ static void nfs4_put_copy(struct nfsd4_copy *copy) >>> { >>> if (!refcount_dec_and_test(©->refcount)) >>> return; >>> - atomic_dec(©->cp_nn->pending_async_copies); >>> kfree(copy->cp_src); >>> kfree(copy); >>> } >>> @@ -1870,6 +1869,7 @@ static int nfsd4_do_async_copy(void *data) >>> set_bit(NFSD4_COPY_F_COMPLETED, ©->cp_flags); >>> trace_nfsd_copy_async_done(copy); >>> nfsd4_send_cb_offload(copy); >>> + atomic_dec(©->cp_nn->pending_async_copies); >>> return 0; >>> } >>> >>> @@ -1927,19 +1927,19 @@ nfsd4_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, >>> /* Arbitrary cap on number of pending async copy operations */ >>> if (atomic_inc_return(&nn->pending_async_copies) > >>> (int)rqstp->rq_pool->sp_nrthreads) >>> - goto out_err; >>> + goto out_dec_async_copy_err; >>> async_copy->cp_src = kmalloc(sizeof(*async_copy->cp_src), GFP_KERNEL); >>> if (!async_copy->cp_src) >>> - goto out_err; >>> + goto out_dec_async_copy_err; >>> if (!nfs4_init_copy_state(nn, copy)) >>> - goto out_err; >>> + goto out_dec_async_copy_err; >>> memcpy(&result->cb_stateid, ©->cp_stateid.cs_stid, >>> sizeof(result->cb_stateid)); >>> dup_copy_fields(copy, async_copy); >>> async_copy->copy_task = kthread_create(nfsd4_do_async_copy, >>> async_copy, "%s", "copy thread"); >>> if (IS_ERR(async_copy->copy_task)) >>> - goto out_err; >>> + goto out_dec_async_copy_err; >>> spin_lock(&async_copy->cp_clp->async_lock); >>> list_add(&async_copy->copies, >>> &async_copy->cp_clp->async_copies); >>> @@ -1954,6 +1954,9 @@ nfsd4_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, >>> trace_nfsd_copy_done(copy, status); >>> release_copy_files(copy); >>> return status; >>> +out_dec_async_copy_err: >>> + if (async_copy) >>> + atomic_dec(&nn->pending_async_copies); >>> out_err: >>> if (nfsd4_ssc_is_inter(copy)) { >>> /* >> >> >> -- >> Chuck Lever >>
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index f8a10f90bc7a..ad44ad49274f 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1347,7 +1347,6 @@ static void nfs4_put_copy(struct nfsd4_copy *copy) { if (!refcount_dec_and_test(©->refcount)) return; - atomic_dec(©->cp_nn->pending_async_copies); kfree(copy->cp_src); kfree(copy); } @@ -1870,6 +1869,7 @@ static int nfsd4_do_async_copy(void *data) set_bit(NFSD4_COPY_F_COMPLETED, ©->cp_flags); trace_nfsd_copy_async_done(copy); nfsd4_send_cb_offload(copy); + atomic_dec(©->cp_nn->pending_async_copies); return 0; } @@ -1927,19 +1927,19 @@ nfsd4_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, /* Arbitrary cap on number of pending async copy operations */ if (atomic_inc_return(&nn->pending_async_copies) > (int)rqstp->rq_pool->sp_nrthreads) - goto out_err; + goto out_dec_async_copy_err; async_copy->cp_src = kmalloc(sizeof(*async_copy->cp_src), GFP_KERNEL); if (!async_copy->cp_src) - goto out_err; + goto out_dec_async_copy_err; if (!nfs4_init_copy_state(nn, copy)) - goto out_err; + goto out_dec_async_copy_err; memcpy(&result->cb_stateid, ©->cp_stateid.cs_stid, sizeof(result->cb_stateid)); dup_copy_fields(copy, async_copy); async_copy->copy_task = kthread_create(nfsd4_do_async_copy, async_copy, "%s", "copy thread"); if (IS_ERR(async_copy->copy_task)) - goto out_err; + goto out_dec_async_copy_err; spin_lock(&async_copy->cp_clp->async_lock); list_add(&async_copy->copies, &async_copy->cp_clp->async_copies); @@ -1954,6 +1954,9 @@ nfsd4_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, trace_nfsd_copy_done(copy, status); release_copy_files(copy); return status; +out_dec_async_copy_err: + if (async_copy) + atomic_dec(&nn->pending_async_copies); out_err: if (nfsd4_ssc_is_inter(copy)) { /*
Consider async copy done once it's done processing the copy work. Fixes: aa0ebd21df9c ("NFSD: Add nfsd4_copy time-to-live") Signed-off-by: Olga Kornievskaia <okorniev@redhat.com> --- fs/nfsd/nfs4proc.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-)