diff mbox series

nfsd: call op_release, even when op_func returns an error

Message ID 20230327102137.15412-1-jlayton@kernel.org (mailing list archive)
State New, archived
Headers show
Series nfsd: call op_release, even when op_func returns an error | expand

Commit Message

Jeff Layton March 27, 2023, 10:21 a.m. UTC
For ops with "trivial" replies, nfsd4_encode_operation will shortcut
most of the encoding work and skip to just marshalling up the status.
One of the things it skips is calling op_release. This could cause a
memory leak in the layoutget codepath if there is an error at an
inopportune time.

Have the compound processing engine always call op_release, even when
op_func sets an error in op->status. With this change, we also need
nfsd4_block_get_device_info_scsi to set the gd_device pointer to NULL
on error to avoid a double free.

Reported-by: Zhi Li <yieli@redhat.com>
Link: https://bugzilla.redhat.com/show_bug.cgi?id=2181403
Signed-off-by: Jeff Layton <jlayton@kernel.org>
---
 fs/nfsd/blocklayout.c |  1 +
 fs/nfsd/nfs4xdr.c     | 13 +++++++------
 2 files changed, 8 insertions(+), 6 deletions(-)

Comments

Chuck Lever March 27, 2023, 1:14 p.m. UTC | #1
> On Mar 27, 2023, at 6:21 AM, Jeff Layton <jlayton@kernel.org> wrote:
> 
> For ops with "trivial" replies, nfsd4_encode_operation will shortcut
> most of the encoding work and skip to just marshalling up the status.
> One of the things it skips is calling op_release. This could cause a
> memory leak in the layoutget codepath if there is an error at an
> inopportune time.
> 
> Have the compound processing engine always call op_release, even when
> op_func sets an error in op->status. With this change, we also need
> nfsd4_block_get_device_info_scsi to set the gd_device pointer to NULL
> on error to avoid a double free.
> 
> Reported-by: Zhi Li <yieli@redhat.com>
> Link: https://bugzilla.redhat.com/show_bug.cgi?id=2181403
> Signed-off-by: Jeff Layton <jlayton@kernel.org>

Thanks, Jeff.

May I add: Fixes: 34b1744c91cc ("nfsd4: define ->op_release for
compound ops") ?


> ---
> fs/nfsd/blocklayout.c |  1 +
> fs/nfsd/nfs4xdr.c     | 13 +++++++------
> 2 files changed, 8 insertions(+), 6 deletions(-)
> 
> diff --git a/fs/nfsd/blocklayout.c b/fs/nfsd/blocklayout.c
> index 04697f8dc37d..01d7fd108cf3 100644
> --- a/fs/nfsd/blocklayout.c
> +++ b/fs/nfsd/blocklayout.c
> @@ -297,6 +297,7 @@ nfsd4_block_get_device_info_scsi(struct super_block *sb,
> 
> out_free_dev:
> 	kfree(dev);
> +	gdp->gd_device = NULL;
> 	return ret;
> }
> 
> diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
> index e12e5a4ad502..6b675fbdabd0 100644
> --- a/fs/nfsd/nfs4xdr.c
> +++ b/fs/nfsd/nfs4xdr.c
> @@ -5402,7 +5402,7 @@ nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op)
> 	p = xdr_reserve_space(xdr, 8);
> 	if (!p) {
> 		WARN_ON_ONCE(1);
> -		return;
> +		goto release;
> 	}
> 	*p++ = cpu_to_be32(op->opnum);
> 	post_err_offset = xdr->buf->len;
> @@ -5418,8 +5418,6 @@ nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op)
> 	op->status = encoder(resp, op->status, &op->u);
> 	if (op->status)
> 		trace_nfsd_compound_encode_err(rqstp, op->opnum, op->status);
> -	if (opdesc && opdesc->op_release)
> -		opdesc->op_release(&op->u);
> 	xdr_commit_encode(xdr);
> 
> 	/* nfsd4_check_resp_size guarantees enough room for error status */
> @@ -5460,11 +5458,14 @@ nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op)
> 	}
> status:
> 	*p = op->status;
> +release:
> +	if (opdesc && opdesc->op_release)
> +		opdesc->op_release(&op->u);
> }
> 
> -/* 
> - * Encode the reply stored in the stateowner reply cache 
> - * 
> +/*
> + * Encode the reply stored in the stateowner reply cache
> + *
>  * XDR note: do not encode rp->rp_buflen: the buffer contains the
>  * previously sent already encoded operation.
>  */
> -- 
> 2.39.2
> 

--
Chuck Lever
Jeff Layton March 27, 2023, 2:32 p.m. UTC | #2
On Mon, 2023-03-27 at 13:14 +0000, Chuck Lever III wrote:
> 
> > On Mar 27, 2023, at 6:21 AM, Jeff Layton <jlayton@kernel.org> wrote:
> > 
> > For ops with "trivial" replies, nfsd4_encode_operation will shortcut
> > most of the encoding work and skip to just marshalling up the status.
> > One of the things it skips is calling op_release. This could cause a
> > memory leak in the layoutget codepath if there is an error at an
> > inopportune time.
> > 
> > Have the compound processing engine always call op_release, even when
> > op_func sets an error in op->status. With this change, we also need
> > nfsd4_block_get_device_info_scsi to set the gd_device pointer to NULL
> > on error to avoid a double free.
> > 
> > Reported-by: Zhi Li <yieli@redhat.com>
> > Link: https://bugzilla.redhat.com/show_bug.cgi?id=2181403
> > Signed-off-by: Jeff Layton <jlayton@kernel.org>
> 
> Thanks, Jeff.
> 
> May I add: Fixes: 34b1744c91cc ("nfsd4: define ->op_release for
> compound ops") ?
> 
> 

Sure. It does look like the leaks stretch back at least that far.
 
> > ---
> > fs/nfsd/blocklayout.c |  1 +
> > fs/nfsd/nfs4xdr.c     | 13 +++++++------
> > 2 files changed, 8 insertions(+), 6 deletions(-)
> > 
> > diff --git a/fs/nfsd/blocklayout.c b/fs/nfsd/blocklayout.c
> > index 04697f8dc37d..01d7fd108cf3 100644
> > --- a/fs/nfsd/blocklayout.c
> > +++ b/fs/nfsd/blocklayout.c
> > @@ -297,6 +297,7 @@ nfsd4_block_get_device_info_scsi(struct super_block *sb,
> > 
> > out_free_dev:
> > 	kfree(dev);
> > +	gdp->gd_device = NULL;
> > 	return ret;
> > }
> > 
> > diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
> > index e12e5a4ad502..6b675fbdabd0 100644
> > --- a/fs/nfsd/nfs4xdr.c
> > +++ b/fs/nfsd/nfs4xdr.c
> > @@ -5402,7 +5402,7 @@ nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op)
> > 	p = xdr_reserve_space(xdr, 8);
> > 	if (!p) {
> > 		WARN_ON_ONCE(1);
> > -		return;
> > +		goto release;
> > 	}
> > 	*p++ = cpu_to_be32(op->opnum);
> > 	post_err_offset = xdr->buf->len;
> > @@ -5418,8 +5418,6 @@ nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op)
> > 	op->status = encoder(resp, op->status, &op->u);
> > 	if (op->status)
> > 		trace_nfsd_compound_encode_err(rqstp, op->opnum, op->status);
> > -	if (opdesc && opdesc->op_release)
> > -		opdesc->op_release(&op->u);
> > 	xdr_commit_encode(xdr);
> > 
> > 	/* nfsd4_check_resp_size guarantees enough room for error status */
> > @@ -5460,11 +5458,14 @@ nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op)
> > 	}
> > status:
> > 	*p = op->status;
> > +release:
> > +	if (opdesc && opdesc->op_release)
> > +		opdesc->op_release(&op->u);
> > }
> > 
> > -/* 
> > - * Encode the reply stored in the stateowner reply cache 
> > - * 
> > +/*
> > + * Encode the reply stored in the stateowner reply cache
> > + *
> >  * XDR note: do not encode rp->rp_buflen: the buffer contains the
> >  * previously sent already encoded operation.
> >  */
> > -- 
> > 2.39.2
> > 
> 
> --
> Chuck Lever
> 
>
Jeff Layton March 30, 2023, 6:15 p.m. UTC | #3
On Mon, 2023-03-27 at 13:14 +0000, Chuck Lever III wrote:
> 
> > On Mar 27, 2023, at 6:21 AM, Jeff Layton <jlayton@kernel.org> wrote:
> > 
> > For ops with "trivial" replies, nfsd4_encode_operation will shortcut
> > most of the encoding work and skip to just marshalling up the status.
> > One of the things it skips is calling op_release. This could cause a
> > memory leak in the layoutget codepath if there is an error at an
> > inopportune time.
> > 
> > Have the compound processing engine always call op_release, even when
> > op_func sets an error in op->status. With this change, we also need
> > nfsd4_block_get_device_info_scsi to set the gd_device pointer to NULL
> > on error to avoid a double free.
> > 
> > Reported-by: Zhi Li <yieli@redhat.com>
> > Link: https://bugzilla.redhat.com/show_bug.cgi?id=2181403
> > Signed-off-by: Jeff Layton <jlayton@kernel.org>
> 
> Thanks, Jeff.
> 
> May I add: Fixes: 34b1744c91cc ("nfsd4: define ->op_release for
> compound ops") ?
> 

I've seen some problems with this patch in testing and I have a fix
forthcoming (once I finish testing it):

The root cause is the OPDESC() function which can walk off the end of
the nfsd4_ops array when passed a large value (like OP_ILLEGAL). I think
we'll want to fix that to do something more sane before merging this
patch.

> 
> > ---
> > fs/nfsd/blocklayout.c |  1 +
> > fs/nfsd/nfs4xdr.c     | 13 +++++++------
> > 2 files changed, 8 insertions(+), 6 deletions(-)
> > 
> > diff --git a/fs/nfsd/blocklayout.c b/fs/nfsd/blocklayout.c
> > index 04697f8dc37d..01d7fd108cf3 100644
> > --- a/fs/nfsd/blocklayout.c
> > +++ b/fs/nfsd/blocklayout.c
> > @@ -297,6 +297,7 @@ nfsd4_block_get_device_info_scsi(struct super_block *sb,
> > 
> > out_free_dev:
> > 	kfree(dev);
> > +	gdp->gd_device = NULL;
> > 	return ret;
> > }
> > 
> > diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
> > index e12e5a4ad502..6b675fbdabd0 100644
> > --- a/fs/nfsd/nfs4xdr.c
> > +++ b/fs/nfsd/nfs4xdr.c
> > @@ -5402,7 +5402,7 @@ nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op)
> > 	p = xdr_reserve_space(xdr, 8);
> > 	if (!p) {
> > 		WARN_ON_ONCE(1);
> > -		return;
> > +		goto release;
> > 	}
> > 	*p++ = cpu_to_be32(op->opnum);
> > 	post_err_offset = xdr->buf->len;
> > @@ -5418,8 +5418,6 @@ nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op)
> > 	op->status = encoder(resp, op->status, &op->u);
> > 	if (op->status)
> > 		trace_nfsd_compound_encode_err(rqstp, op->opnum, op->status);
> > -	if (opdesc && opdesc->op_release)
> > -		opdesc->op_release(&op->u);
> > 	xdr_commit_encode(xdr);
> > 
> > 	/* nfsd4_check_resp_size guarantees enough room for error status */
> > @@ -5460,11 +5458,14 @@ nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op)
> > 	}
> > status:
> > 	*p = op->status;
> > +release:
> > +	if (opdesc && opdesc->op_release)
> > +		opdesc->op_release(&op->u);
> > }
> > 
> > -/* 
> > - * Encode the reply stored in the stateowner reply cache 
> > - * 
> > +/*
> > + * Encode the reply stored in the stateowner reply cache
> > + *
> >  * XDR note: do not encode rp->rp_buflen: the buffer contains the
> >  * previously sent already encoded operation.
> >  */
> > -- 
> > 2.39.2
> > 
> 
> --
> Chuck Lever
> 
>
diff mbox series

Patch

diff --git a/fs/nfsd/blocklayout.c b/fs/nfsd/blocklayout.c
index 04697f8dc37d..01d7fd108cf3 100644
--- a/fs/nfsd/blocklayout.c
+++ b/fs/nfsd/blocklayout.c
@@ -297,6 +297,7 @@  nfsd4_block_get_device_info_scsi(struct super_block *sb,
 
 out_free_dev:
 	kfree(dev);
+	gdp->gd_device = NULL;
 	return ret;
 }
 
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index e12e5a4ad502..6b675fbdabd0 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -5402,7 +5402,7 @@  nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op)
 	p = xdr_reserve_space(xdr, 8);
 	if (!p) {
 		WARN_ON_ONCE(1);
-		return;
+		goto release;
 	}
 	*p++ = cpu_to_be32(op->opnum);
 	post_err_offset = xdr->buf->len;
@@ -5418,8 +5418,6 @@  nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op)
 	op->status = encoder(resp, op->status, &op->u);
 	if (op->status)
 		trace_nfsd_compound_encode_err(rqstp, op->opnum, op->status);
-	if (opdesc && opdesc->op_release)
-		opdesc->op_release(&op->u);
 	xdr_commit_encode(xdr);
 
 	/* nfsd4_check_resp_size guarantees enough room for error status */
@@ -5460,11 +5458,14 @@  nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op)
 	}
 status:
 	*p = op->status;
+release:
+	if (opdesc && opdesc->op_release)
+		opdesc->op_release(&op->u);
 }
 
-/* 
- * Encode the reply stored in the stateowner reply cache 
- * 
+/*
+ * Encode the reply stored in the stateowner reply cache
+ *
  * XDR note: do not encode rp->rp_buflen: the buffer contains the
  * previously sent already encoded operation.
  */