diff mbox series

[1/1] NFSD: fix problems with cleanup on errors in nfsd4_copy

Message ID 1674538340-12882-1-git-send-email-dai.ngo@oracle.com (mailing list archive)
State New, archived
Headers show
Series [1/1] NFSD: fix problems with cleanup on errors in nfsd4_copy | expand

Commit Message

Dai Ngo Jan. 24, 2023, 5:32 a.m. UTC
When nfsd4_copy fails to allocate memory for async_copy->cp_src, or
nfs4_init_copy_state fails, it calls cleanup_async_copy to do the
cleanup for the async_copy which causes page fault since async_copy
is not yet initialized.

This patche sets async_copy to NULL to skip cleanup_async_copy
if async_copy is not yet initialized.

Fixes: ce0887ac96d3 ("NFSD add nfs4 inter ssc to nfsd4_copy")
Fixes: 87689df69491 ("NFSD: Shrink size of struct nfsd4_copy")
Signed-off-by: Dai Ngo <dai.ngo@oracle.com>
---
 fs/nfsd/nfs4proc.c | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

Comments

Jeff Layton Jan. 24, 2023, 11:45 a.m. UTC | #1
On Mon, 2023-01-23 at 21:32 -0800, Dai Ngo wrote:
> When nfsd4_copy fails to allocate memory for async_copy->cp_src, or
> nfs4_init_copy_state fails, it calls cleanup_async_copy to do the
> cleanup for the async_copy which causes page fault since async_copy
> is not yet initialized.
> 
> This patche sets async_copy to NULL to skip cleanup_async_copy
> if async_copy is not yet initialized.
> 
> Fixes: ce0887ac96d3 ("NFSD add nfs4 inter ssc to nfsd4_copy")
> Fixes: 87689df69491 ("NFSD: Shrink size of struct nfsd4_copy")
> Signed-off-by: Dai Ngo <dai.ngo@oracle.com>
> ---
>  fs/nfsd/nfs4proc.c | 13 ++++++++++---
>  1 file changed, 10 insertions(+), 3 deletions(-)
> 
> diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
> index 3b73e4d342bf..b4e7e18e1761 100644
> --- a/fs/nfsd/nfs4proc.c
> +++ b/fs/nfsd/nfs4proc.c
> @@ -1688,7 +1688,8 @@ static void cleanup_async_copy(struct nfsd4_copy *copy)
>  	if (!nfsd4_ssc_is_inter(copy))
>  		nfsd_file_put(copy->nf_src);
>  	spin_lock(&copy->cp_clp->async_lock);
> -	list_del(&copy->copies);
> +	if (!list_empty(&copy->copies))
> +		list_del(&copy->copies);
>  	spin_unlock(&copy->cp_clp->async_lock);
>  	nfs4_put_copy(copy);
>  }
> @@ -1789,9 +1790,15 @@ nfsd4_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
>  			goto out_err;
>  		async_copy->cp_src = kmalloc(sizeof(*async_copy->cp_src), GFP_KERNEL);
>  		if (!async_copy->cp_src)
> +			goto no_mem;
> +		if (!nfs4_init_copy_state(nn, copy)) {
> +			kfree(async_copy->cp_src);
> +no_mem:
> +			kfree(async_copy);
> +			async_copy = NULL;

This seems pretty fragile and the result begins to resemble spaghetti. I
think it'd be cleaner to initialize the list_head and refcount before
you do the allocation of cp_src. Then you can just call
cleanup_async_copy no matter where it fails.

Bear in mind that these are failure codepaths, so we don't need to
optimize for performance here.



>  			goto out_err;
> -		if (!nfs4_init_copy_state(nn, copy))
> -			goto out_err;
> +		}
> +		INIT_LIST_HEAD(&async_copy->copies);
>  		refcount_set(&async_copy->refcount, 1);
>  		memcpy(&copy->cp_res.cb_stateid, &copy->cp_stateid.cs_stid,
>  			sizeof(copy->cp_res.cb_stateid));
Dai Ngo Jan. 24, 2023, 8:10 p.m. UTC | #2
On 1/24/23 3:45 AM, Jeff Layton wrote:
> On Mon, 2023-01-23 at 21:32 -0800, Dai Ngo wrote:
>> When nfsd4_copy fails to allocate memory for async_copy->cp_src, or
>> nfs4_init_copy_state fails, it calls cleanup_async_copy to do the
>> cleanup for the async_copy which causes page fault since async_copy
>> is not yet initialized.
>>
>> This patche sets async_copy to NULL to skip cleanup_async_copy
>> if async_copy is not yet initialized.
>>
>> Fixes: ce0887ac96d3 ("NFSD add nfs4 inter ssc to nfsd4_copy")
>> Fixes: 87689df69491 ("NFSD: Shrink size of struct nfsd4_copy")
>> Signed-off-by: Dai Ngo <dai.ngo@oracle.com>
>> ---
>>   fs/nfsd/nfs4proc.c | 13 ++++++++++---
>>   1 file changed, 10 insertions(+), 3 deletions(-)
>>
>> diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
>> index 3b73e4d342bf..b4e7e18e1761 100644
>> --- a/fs/nfsd/nfs4proc.c
>> +++ b/fs/nfsd/nfs4proc.c
>> @@ -1688,7 +1688,8 @@ static void cleanup_async_copy(struct nfsd4_copy *copy)
>>   	if (!nfsd4_ssc_is_inter(copy))
>>   		nfsd_file_put(copy->nf_src);
>>   	spin_lock(&copy->cp_clp->async_lock);
>> -	list_del(&copy->copies);
>> +	if (!list_empty(&copy->copies))
>> +		list_del(&copy->copies);
>>   	spin_unlock(&copy->cp_clp->async_lock);
>>   	nfs4_put_copy(copy);
>>   }
>> @@ -1789,9 +1790,15 @@ nfsd4_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
>>   			goto out_err;
>>   		async_copy->cp_src = kmalloc(sizeof(*async_copy->cp_src), GFP_KERNEL);
>>   		if (!async_copy->cp_src)
>> +			goto no_mem;
>> +		if (!nfs4_init_copy_state(nn, copy)) {
>> +			kfree(async_copy->cp_src);
>> +no_mem:
>> +			kfree(async_copy);
>> +			async_copy = NULL;
> This seems pretty fragile and the result begins to resemble spaghetti. I
> think it'd be cleaner to initialize the list_head and refcount before
> you do the allocation of cp_src. Then you can just call
> cleanup_async_copy no matter where it fails.

If we initialize the list_head and refcount before doing the allocation
of cp_src, we still can not call cleanup_async_copy if the allocation of
cp_src fails or nfs4_init_copy_state fails since:

. dst->cp_stateid is not initialized
. dst->nf_dst has not been incremented
. dst->ss_nsui is not set

The fields above are initialized by dup_copy_fields and we can not call
dup_copy_fields if allocation of cp_src fails or nfs4_init_copy_state
fails.

-Dai

>
> Bear in mind that these are failure codepaths, so we don't need to
> optimize for performance here.
>
>
>
>>   			goto out_err;
>> -		if (!nfs4_init_copy_state(nn, copy))
>> -			goto out_err;
>> +		}
>> +		INIT_LIST_HEAD(&async_copy->copies);
>>   		refcount_set(&async_copy->refcount, 1);
>>   		memcpy(&copy->cp_res.cb_stateid, &copy->cp_stateid.cs_stid,
>>   			sizeof(copy->cp_res.cb_stateid));
Jeff Layton Jan. 24, 2023, 8:14 p.m. UTC | #3
On Tue, 2023-01-24 at 12:10 -0800, dai.ngo@oracle.com wrote:
> On 1/24/23 3:45 AM, Jeff Layton wrote:
> > On Mon, 2023-01-23 at 21:32 -0800, Dai Ngo wrote:
> > > When nfsd4_copy fails to allocate memory for async_copy->cp_src, or
> > > nfs4_init_copy_state fails, it calls cleanup_async_copy to do the
> > > cleanup for the async_copy which causes page fault since async_copy
> > > is not yet initialized.
> > > 
> > > This patche sets async_copy to NULL to skip cleanup_async_copy
> > > if async_copy is not yet initialized.
> > > 
> > > Fixes: ce0887ac96d3 ("NFSD add nfs4 inter ssc to nfsd4_copy")
> > > Fixes: 87689df69491 ("NFSD: Shrink size of struct nfsd4_copy")
> > > Signed-off-by: Dai Ngo <dai.ngo@oracle.com>
> > > ---
> > >   fs/nfsd/nfs4proc.c | 13 ++++++++++---
> > >   1 file changed, 10 insertions(+), 3 deletions(-)
> > > 
> > > diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
> > > index 3b73e4d342bf..b4e7e18e1761 100644
> > > --- a/fs/nfsd/nfs4proc.c
> > > +++ b/fs/nfsd/nfs4proc.c
> > > @@ -1688,7 +1688,8 @@ static void cleanup_async_copy(struct nfsd4_copy *copy)
> > >   	if (!nfsd4_ssc_is_inter(copy))
> > >   		nfsd_file_put(copy->nf_src);
> > >   	spin_lock(&copy->cp_clp->async_lock);
> > > -	list_del(&copy->copies);
> > > +	if (!list_empty(&copy->copies))
> > > +		list_del(&copy->copies);
> > >   	spin_unlock(&copy->cp_clp->async_lock);
> > >   	nfs4_put_copy(copy);
> > >   }
> > > @@ -1789,9 +1790,15 @@ nfsd4_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
> > >   			goto out_err;
> > >   		async_copy->cp_src = kmalloc(sizeof(*async_copy->cp_src), GFP_KERNEL);
> > >   		if (!async_copy->cp_src)
> > > +			goto no_mem;
> > > +		if (!nfs4_init_copy_state(nn, copy)) {
> > > +			kfree(async_copy->cp_src);
> > > +no_mem:
> > > +			kfree(async_copy);
> > > +			async_copy = NULL;
> > This seems pretty fragile and the result begins to resemble spaghetti. I
> > think it'd be cleaner to initialize the list_head and refcount before
> > you do the allocation of cp_src. Then you can just call
> > cleanup_async_copy no matter where it fails.
> 
> If we initialize the list_head and refcount before doing the allocation
> of cp_src, we still can not call cleanup_async_copy if the allocation of
> cp_src fails or nfs4_init_copy_state fails since:
> 
> . dst->cp_stateid is not initialized
> . dst->nf_dst has not been incremented
> . dst->ss_nsui is not set
> 
> The fields above are initialized by dup_copy_fields and we can not call
> dup_copy_fields if allocation of cp_src fails or nfs4_init_copy_state
> fails.
> 
> 

That's what I meant by "fragile". It would be nice if the structure were
properly initialized after allocation, so we didn't have to call *just*
the right teardown procedure. It's slightly more work to do it that way,
but I doubt that would show up in any benchmarks.

I worry that later changes to this code might introduce subtle bugs
because of these fields not being fully initialized. This code is very
complex, and I think some defensive coding is warranted here.


> > 
> > Bear in mind that these are failure codepaths, so we don't need to
> > optimize for performance here.
> > 
> > 
> > 
> > >   			goto out_err;
> > > -		if (!nfs4_init_copy_state(nn, copy))
> > > -			goto out_err;
> > > +		}
> > > +		INIT_LIST_HEAD(&async_copy->copies);
> > >   		refcount_set(&async_copy->refcount, 1);
> > >   		memcpy(&copy->cp_res.cb_stateid, &copy->cp_stateid.cs_stid,
> > >   			sizeof(copy->cp_res.cb_stateid));
diff mbox series

Patch

diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 3b73e4d342bf..b4e7e18e1761 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -1688,7 +1688,8 @@  static void cleanup_async_copy(struct nfsd4_copy *copy)
 	if (!nfsd4_ssc_is_inter(copy))
 		nfsd_file_put(copy->nf_src);
 	spin_lock(&copy->cp_clp->async_lock);
-	list_del(&copy->copies);
+	if (!list_empty(&copy->copies))
+		list_del(&copy->copies);
 	spin_unlock(&copy->cp_clp->async_lock);
 	nfs4_put_copy(copy);
 }
@@ -1789,9 +1790,15 @@  nfsd4_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 			goto out_err;
 		async_copy->cp_src = kmalloc(sizeof(*async_copy->cp_src), GFP_KERNEL);
 		if (!async_copy->cp_src)
+			goto no_mem;
+		if (!nfs4_init_copy_state(nn, copy)) {
+			kfree(async_copy->cp_src);
+no_mem:
+			kfree(async_copy);
+			async_copy = NULL;
 			goto out_err;
-		if (!nfs4_init_copy_state(nn, copy))
-			goto out_err;
+		}
+		INIT_LIST_HEAD(&async_copy->copies);
 		refcount_set(&async_copy->refcount, 1);
 		memcpy(&copy->cp_res.cb_stateid, &copy->cp_stateid.cs_stid,
 			sizeof(copy->cp_res.cb_stateid));