diff mbox series

[1/1] NFSD: fix management of pending async copies

Message ID 20241217192742.97306-1-okorniev@redhat.com (mailing list archive)
State Changes Requested
Headers show
Series [1/1] NFSD: fix management of pending async copies | expand

Commit Message

Olga Kornievskaia Dec. 17, 2024, 7:27 p.m. UTC
Consider async copy done once it's done processing the copy work.

Fixes: aa0ebd21df9c ("NFSD: Add nfsd4_copy time-to-live")
Signed-off-by: Olga Kornievskaia <okorniev@redhat.com>
---
 fs/nfsd/nfs4proc.c | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

Comments

Chuck Lever Dec. 17, 2024, 7:44 p.m. UTC | #1
On 12/17/24 2:27 PM, Olga Kornievskaia wrote:

Needs a problem statement. I suggest something like:

"Currently the pending_async_copies count is decremented just before
a struct nfsd4_copy is destroyed. But now that nfsd4_copy structures
stick around for 10 lease periods after the COPY itself has completed,
the pending_async_copies count stays high for a long time. This causes
NFSD to avoid the use of background copy even though the actual
background copy workload might no longer be running."


> Consider async copy done once it's done processing the copy work.

Doesn't nfsd4_stop_copy() need to decrement as well, if it finds that
the kthread is still running?


> Fixes: aa0ebd21df9c ("NFSD: Add nfsd4_copy time-to-live")
> Signed-off-by: Olga Kornievskaia <okorniev@redhat.com>
> ---
>   fs/nfsd/nfs4proc.c | 13 ++++++++-----
>   1 file changed, 8 insertions(+), 5 deletions(-)
> 
> diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
> index f8a10f90bc7a..ad44ad49274f 100644
> --- a/fs/nfsd/nfs4proc.c
> +++ b/fs/nfsd/nfs4proc.c
> @@ -1347,7 +1347,6 @@ static void nfs4_put_copy(struct nfsd4_copy *copy)
>   {
>   	if (!refcount_dec_and_test(&copy->refcount))
>   		return;
> -	atomic_dec(&copy->cp_nn->pending_async_copies);
>   	kfree(copy->cp_src);
>   	kfree(copy);
>   }
> @@ -1870,6 +1869,7 @@ static int nfsd4_do_async_copy(void *data)
>   	set_bit(NFSD4_COPY_F_COMPLETED, &copy->cp_flags);
>   	trace_nfsd_copy_async_done(copy);
>   	nfsd4_send_cb_offload(copy);
> +	atomic_dec(&copy->cp_nn->pending_async_copies);
>   	return 0;
>   }
>   
> @@ -1927,19 +1927,19 @@ nfsd4_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
>   		/* Arbitrary cap on number of pending async copy operations */
>   		if (atomic_inc_return(&nn->pending_async_copies) >
>   				(int)rqstp->rq_pool->sp_nrthreads)
> -			goto out_err;
> +			goto out_dec_async_copy_err;
>   		async_copy->cp_src = kmalloc(sizeof(*async_copy->cp_src), GFP_KERNEL);
>   		if (!async_copy->cp_src)
> -			goto out_err;
> +			goto out_dec_async_copy_err;
>   		if (!nfs4_init_copy_state(nn, copy))
> -			goto out_err;
> +			goto out_dec_async_copy_err;
>   		memcpy(&result->cb_stateid, &copy->cp_stateid.cs_stid,
>   			sizeof(result->cb_stateid));
>   		dup_copy_fields(copy, async_copy);
>   		async_copy->copy_task = kthread_create(nfsd4_do_async_copy,
>   				async_copy, "%s", "copy thread");
>   		if (IS_ERR(async_copy->copy_task))
> -			goto out_err;
> +			goto out_dec_async_copy_err;
>   		spin_lock(&async_copy->cp_clp->async_lock);
>   		list_add(&async_copy->copies,
>   				&async_copy->cp_clp->async_copies);
> @@ -1954,6 +1954,9 @@ nfsd4_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
>   	trace_nfsd_copy_done(copy, status);
>   	release_copy_files(copy);
>   	return status;
> +out_dec_async_copy_err:
> +	if (async_copy)
> +		atomic_dec(&nn->pending_async_copies);
>   out_err:
>   	if (nfsd4_ssc_is_inter(copy)) {
>   		/*
Olga Kornievskaia Dec. 17, 2024, 8:47 p.m. UTC | #2
On Tue, Dec 17, 2024 at 2:45 PM Chuck Lever <chuck.lever@oracle.com> wrote:
>
> On 12/17/24 2:27 PM, Olga Kornievskaia wrote:
>
> Needs a problem statement. I suggest something like:
>
> "Currently the pending_async_copies count is decremented just before
> a struct nfsd4_copy is destroyed. But now that nfsd4_copy structures
> stick around for 10 lease periods after the COPY itself has completed,
> the pending_async_copies count stays high for a long time. This causes
> NFSD to avoid the use of background copy even though the actual
> background copy workload might no longer be running."

Sure I can re-submit with this change in the comment.

> > Consider async copy done once it's done processing the copy work.
>
> Doesn't nfsd4_stop_copy() need to decrement as well, if it finds that
> the kthread is still running?

I thought so. So I tested it. kthread_stop() sends a signal to the
thread (thread needs to check kthread_should_stop() which we do) that
it wants to exit and then waits for its completion. That means the
copy thread runs its course and by doing so decrements the pending
count. Otherwise, I found out, we do it twice (and end up negative).

>
>
> > Fixes: aa0ebd21df9c ("NFSD: Add nfsd4_copy time-to-live")
> > Signed-off-by: Olga Kornievskaia <okorniev@redhat.com>
> > ---
> >   fs/nfsd/nfs4proc.c | 13 ++++++++-----
> >   1 file changed, 8 insertions(+), 5 deletions(-)
> >
> > diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
> > index f8a10f90bc7a..ad44ad49274f 100644
> > --- a/fs/nfsd/nfs4proc.c
> > +++ b/fs/nfsd/nfs4proc.c
> > @@ -1347,7 +1347,6 @@ static void nfs4_put_copy(struct nfsd4_copy *copy)
> >   {
> >       if (!refcount_dec_and_test(&copy->refcount))
> >               return;
> > -     atomic_dec(&copy->cp_nn->pending_async_copies);
> >       kfree(copy->cp_src);
> >       kfree(copy);
> >   }
> > @@ -1870,6 +1869,7 @@ static int nfsd4_do_async_copy(void *data)
> >       set_bit(NFSD4_COPY_F_COMPLETED, &copy->cp_flags);
> >       trace_nfsd_copy_async_done(copy);
> >       nfsd4_send_cb_offload(copy);
> > +     atomic_dec(&copy->cp_nn->pending_async_copies);
> >       return 0;
> >   }
> >
> > @@ -1927,19 +1927,19 @@ nfsd4_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
> >               /* Arbitrary cap on number of pending async copy operations */
> >               if (atomic_inc_return(&nn->pending_async_copies) >
> >                               (int)rqstp->rq_pool->sp_nrthreads)
> > -                     goto out_err;
> > +                     goto out_dec_async_copy_err;
> >               async_copy->cp_src = kmalloc(sizeof(*async_copy->cp_src), GFP_KERNEL);
> >               if (!async_copy->cp_src)
> > -                     goto out_err;
> > +                     goto out_dec_async_copy_err;
> >               if (!nfs4_init_copy_state(nn, copy))
> > -                     goto out_err;
> > +                     goto out_dec_async_copy_err;
> >               memcpy(&result->cb_stateid, &copy->cp_stateid.cs_stid,
> >                       sizeof(result->cb_stateid));
> >               dup_copy_fields(copy, async_copy);
> >               async_copy->copy_task = kthread_create(nfsd4_do_async_copy,
> >                               async_copy, "%s", "copy thread");
> >               if (IS_ERR(async_copy->copy_task))
> > -                     goto out_err;
> > +                     goto out_dec_async_copy_err;
> >               spin_lock(&async_copy->cp_clp->async_lock);
> >               list_add(&async_copy->copies,
> >                               &async_copy->cp_clp->async_copies);
> > @@ -1954,6 +1954,9 @@ nfsd4_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
> >       trace_nfsd_copy_done(copy, status);
> >       release_copy_files(copy);
> >       return status;
> > +out_dec_async_copy_err:
> > +     if (async_copy)
> > +             atomic_dec(&nn->pending_async_copies);
> >   out_err:
> >       if (nfsd4_ssc_is_inter(copy)) {
> >               /*
>
>
> --
> Chuck Lever
>
Chuck Lever Dec. 17, 2024, 8:57 p.m. UTC | #3
On 12/17/24 3:47 PM, Olga Kornievskaia wrote:
> On Tue, Dec 17, 2024 at 2:45 PM Chuck Lever <chuck.lever@oracle.com> wrote:
>>
>> On 12/17/24 2:27 PM, Olga Kornievskaia wrote:
>>
>> Needs a problem statement. I suggest something like:
>>
>> "Currently the pending_async_copies count is decremented just before
>> a struct nfsd4_copy is destroyed. But now that nfsd4_copy structures
>> stick around for 10 lease periods after the COPY itself has completed,
>> the pending_async_copies count stays high for a long time. This causes
>> NFSD to avoid the use of background copy even though the actual
>> background copy workload might no longer be running."
> 
> Sure I can re-submit with this change in the comment.
> 
>>> Consider async copy done once it's done processing the copy work.
>>
>> Doesn't nfsd4_stop_copy() need to decrement as well, if it finds that
>> the kthread is still running?
> 
> I thought so. So I tested it. kthread_stop() sends a signal to the
> thread (thread needs to check kthread_should_stop() which we do) that
> it wants to exit and then waits for its completion. That means the
> copy thread runs its course and by doing so decrements the pending
> count. Otherwise, I found out, we do it twice (and end up negative).

Thanks, that sounds good. No change needed, then.

Send a v2, and I can plunk it into nfsd-fixes for broader testing.


>>> Fixes: aa0ebd21df9c ("NFSD: Add nfsd4_copy time-to-live")
>>> Signed-off-by: Olga Kornievskaia <okorniev@redhat.com>
>>> ---
>>>    fs/nfsd/nfs4proc.c | 13 ++++++++-----
>>>    1 file changed, 8 insertions(+), 5 deletions(-)
>>>
>>> diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
>>> index f8a10f90bc7a..ad44ad49274f 100644
>>> --- a/fs/nfsd/nfs4proc.c
>>> +++ b/fs/nfsd/nfs4proc.c
>>> @@ -1347,7 +1347,6 @@ static void nfs4_put_copy(struct nfsd4_copy *copy)
>>>    {
>>>        if (!refcount_dec_and_test(&copy->refcount))
>>>                return;
>>> -     atomic_dec(&copy->cp_nn->pending_async_copies);
>>>        kfree(copy->cp_src);
>>>        kfree(copy);
>>>    }
>>> @@ -1870,6 +1869,7 @@ static int nfsd4_do_async_copy(void *data)
>>>        set_bit(NFSD4_COPY_F_COMPLETED, &copy->cp_flags);
>>>        trace_nfsd_copy_async_done(copy);
>>>        nfsd4_send_cb_offload(copy);
>>> +     atomic_dec(&copy->cp_nn->pending_async_copies);
>>>        return 0;
>>>    }
>>>
>>> @@ -1927,19 +1927,19 @@ nfsd4_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
>>>                /* Arbitrary cap on number of pending async copy operations */
>>>                if (atomic_inc_return(&nn->pending_async_copies) >
>>>                                (int)rqstp->rq_pool->sp_nrthreads)
>>> -                     goto out_err;
>>> +                     goto out_dec_async_copy_err;
>>>                async_copy->cp_src = kmalloc(sizeof(*async_copy->cp_src), GFP_KERNEL);
>>>                if (!async_copy->cp_src)
>>> -                     goto out_err;
>>> +                     goto out_dec_async_copy_err;
>>>                if (!nfs4_init_copy_state(nn, copy))
>>> -                     goto out_err;
>>> +                     goto out_dec_async_copy_err;
>>>                memcpy(&result->cb_stateid, &copy->cp_stateid.cs_stid,
>>>                        sizeof(result->cb_stateid));
>>>                dup_copy_fields(copy, async_copy);
>>>                async_copy->copy_task = kthread_create(nfsd4_do_async_copy,
>>>                                async_copy, "%s", "copy thread");
>>>                if (IS_ERR(async_copy->copy_task))
>>> -                     goto out_err;
>>> +                     goto out_dec_async_copy_err;
>>>                spin_lock(&async_copy->cp_clp->async_lock);
>>>                list_add(&async_copy->copies,
>>>                                &async_copy->cp_clp->async_copies);
>>> @@ -1954,6 +1954,9 @@ nfsd4_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
>>>        trace_nfsd_copy_done(copy, status);
>>>        release_copy_files(copy);
>>>        return status;
>>> +out_dec_async_copy_err:
>>> +     if (async_copy)
>>> +             atomic_dec(&nn->pending_async_copies);
>>>    out_err:
>>>        if (nfsd4_ssc_is_inter(copy)) {
>>>                /*
>>
>>
>> --
>> Chuck Lever
>>
diff mbox series

Patch

diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index f8a10f90bc7a..ad44ad49274f 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -1347,7 +1347,6 @@  static void nfs4_put_copy(struct nfsd4_copy *copy)
 {
 	if (!refcount_dec_and_test(&copy->refcount))
 		return;
-	atomic_dec(&copy->cp_nn->pending_async_copies);
 	kfree(copy->cp_src);
 	kfree(copy);
 }
@@ -1870,6 +1869,7 @@  static int nfsd4_do_async_copy(void *data)
 	set_bit(NFSD4_COPY_F_COMPLETED, &copy->cp_flags);
 	trace_nfsd_copy_async_done(copy);
 	nfsd4_send_cb_offload(copy);
+	atomic_dec(&copy->cp_nn->pending_async_copies);
 	return 0;
 }
 
@@ -1927,19 +1927,19 @@  nfsd4_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 		/* Arbitrary cap on number of pending async copy operations */
 		if (atomic_inc_return(&nn->pending_async_copies) >
 				(int)rqstp->rq_pool->sp_nrthreads)
-			goto out_err;
+			goto out_dec_async_copy_err;
 		async_copy->cp_src = kmalloc(sizeof(*async_copy->cp_src), GFP_KERNEL);
 		if (!async_copy->cp_src)
-			goto out_err;
+			goto out_dec_async_copy_err;
 		if (!nfs4_init_copy_state(nn, copy))
-			goto out_err;
+			goto out_dec_async_copy_err;
 		memcpy(&result->cb_stateid, &copy->cp_stateid.cs_stid,
 			sizeof(result->cb_stateid));
 		dup_copy_fields(copy, async_copy);
 		async_copy->copy_task = kthread_create(nfsd4_do_async_copy,
 				async_copy, "%s", "copy thread");
 		if (IS_ERR(async_copy->copy_task))
-			goto out_err;
+			goto out_dec_async_copy_err;
 		spin_lock(&async_copy->cp_clp->async_lock);
 		list_add(&async_copy->copies,
 				&async_copy->cp_clp->async_copies);
@@ -1954,6 +1954,9 @@  nfsd4_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	trace_nfsd_copy_done(copy, status);
 	release_copy_files(copy);
 	return status;
+out_dec_async_copy_err:
+	if (async_copy)
+		atomic_dec(&nn->pending_async_copies);
 out_err:
 	if (nfsd4_ssc_is_inter(copy)) {
 		/*