diff mbox series

[v5,4/5] NFSD: Return both a hole and a data segment

Message ID 20200908162559.509113-5-Anna.Schumaker@Netapp.com (mailing list archive)
State New, archived
Headers show
Series NFSD: Add support for the v4.2 READ_PLUS operation | expand

Commit Message

Anna Schumaker Sept. 8, 2020, 4:25 p.m. UTC
From: Anna Schumaker <Anna.Schumaker@Netapp.com>

But only one of each right now. We'll expand on this in the next patch.

Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
v5: If we've already encoded a segment, then return a short read if
    later segments return an error for some reason.
---
 fs/nfsd/nfs4xdr.c | 54 ++++++++++++++++++++++++++++++++++-------------
 1 file changed, 39 insertions(+), 15 deletions(-)

Comments

Bruce Fields Sept. 8, 2020, 7:49 p.m. UTC | #1
On Tue, Sep 08, 2020 at 12:25:58PM -0400, schumaker.anna@gmail.com wrote:
> From: Anna Schumaker <Anna.Schumaker@Netapp.com>
> 
> But only one of each right now. We'll expand on this in the next patch.
> 
> Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
> ---
> v5: If we've already encoded a segment, then return a short read if
>     later segments return an error for some reason.
> ---
>  fs/nfsd/nfs4xdr.c | 54 ++++++++++++++++++++++++++++++++++-------------
>  1 file changed, 39 insertions(+), 15 deletions(-)
> 
> diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
> index 45159bd9e9a4..856606263c1d 100644
> --- a/fs/nfsd/nfs4xdr.c
> +++ b/fs/nfsd/nfs4xdr.c
> @@ -4603,7 +4603,7 @@ nfsd4_encode_offload_status(struct nfsd4_compoundres *resp, __be32 nfserr,
>  static __be32
>  nfsd4_encode_read_plus_data(struct nfsd4_compoundres *resp,
>  			    struct nfsd4_read *read,
> -			    unsigned long maxcount,  u32 *eof)
> +			    unsigned long *maxcount, u32 *eof)
>  {
>  	struct xdr_stream *xdr = &resp->xdr;
>  	struct file *file = read->rd_nf->nf_file;
> @@ -4614,19 +4614,19 @@ nfsd4_encode_read_plus_data(struct nfsd4_compoundres *resp,
>  	__be64 tmp64;
>  
>  	if (hole_pos > read->rd_offset)
> -		maxcount = min_t(unsigned long, maxcount, hole_pos - read->rd_offset);
> +		*maxcount = min_t(unsigned long, *maxcount, hole_pos - read->rd_offset);
>  
>  	/* Content type, offset, byte count */
>  	p = xdr_reserve_space(xdr, 4 + 8 + 4);
>  	if (!p)
>  		return nfserr_resource;
>  
> -	read->rd_vlen = xdr_reserve_space_vec(xdr, resp->rqstp->rq_vec, maxcount);
> +	read->rd_vlen = xdr_reserve_space_vec(xdr, resp->rqstp->rq_vec, *maxcount);
>  	if (read->rd_vlen < 0)
>  		return nfserr_resource;
>  
>  	nfserr = nfsd_readv(resp->rqstp, read->rd_fhp, file, read->rd_offset,
> -			    resp->rqstp->rq_vec, read->rd_vlen, &maxcount, eof);
> +			    resp->rqstp->rq_vec, read->rd_vlen, maxcount, eof);
>  	if (nfserr)
>  		return nfserr;
>  
> @@ -4634,7 +4634,7 @@ nfsd4_encode_read_plus_data(struct nfsd4_compoundres *resp,
>  	write_bytes_to_xdr_buf(xdr->buf, starting_len,      &tmp,   4);
>  	tmp64 = cpu_to_be64(read->rd_offset);
>  	write_bytes_to_xdr_buf(xdr->buf, starting_len + 4,  &tmp64, 8);
> -	tmp = htonl(maxcount);
> +	tmp = htonl(*maxcount);
>  	write_bytes_to_xdr_buf(xdr->buf, starting_len + 12, &tmp,   4);
>  	return nfs_ok;
>  }
> @@ -4642,11 +4642,19 @@ nfsd4_encode_read_plus_data(struct nfsd4_compoundres *resp,
>  static __be32
>  nfsd4_encode_read_plus_hole(struct nfsd4_compoundres *resp,
>  			    struct nfsd4_read *read,
> -			    unsigned long maxcount, u32 *eof)
> +			    unsigned long *maxcount, u32 *eof)
>  {
>  	struct file *file = read->rd_nf->nf_file;
> +	loff_t data_pos = vfs_llseek(file, read->rd_offset, SEEK_DATA);
> +	unsigned long count;
>  	__be32 *p;
>  
> +	if (data_pos == -ENXIO)
> +		data_pos = i_size_read(file_inode(file));
> +	else if (data_pos <= read->rd_offset)
> +		return nfserr_resource;

I think there's still a race here:

	vfs_llseek(.,0,SEEK_HOLE) returns 1024
	read 1024 bytes of data
					another process fills the hole
	vfs_llseek(.,1024,SEEK_DATA) returns 1024
	code above returns nfserr_resource

We end up returning an error to the client when we should have just
returned more data.

--b.

> +	count = data_pos - read->rd_offset;
> +
>  	/* Content type, offset, byte count */
>  	p = xdr_reserve_space(&resp->xdr, 4 + 8 + 8);
>  	if (!p)
> @@ -4654,9 +4662,10 @@ nfsd4_encode_read_plus_hole(struct nfsd4_compoundres *resp,
>  
>  	*p++ = htonl(NFS4_CONTENT_HOLE);
>  	 p   = xdr_encode_hyper(p, read->rd_offset);
> -	 p   = xdr_encode_hyper(p, maxcount);
> +	 p   = xdr_encode_hyper(p, count);
>  
> -	*eof = (read->rd_offset + maxcount) >= i_size_read(file_inode(file));
> +	*eof = (read->rd_offset + count) >= i_size_read(file_inode(file));
> +	*maxcount = min_t(unsigned long, count, *maxcount);
>  	return nfs_ok;
>  }
>  
> @@ -4664,7 +4673,7 @@ static __be32
>  nfsd4_encode_read_plus(struct nfsd4_compoundres *resp, __be32 nfserr,
>  		       struct nfsd4_read *read)
>  {
> -	unsigned long maxcount;
> +	unsigned long maxcount, count;
>  	struct xdr_stream *xdr = &resp->xdr;
>  	struct file *file;
>  	int starting_len = xdr->buf->len;
> @@ -4687,6 +4696,7 @@ nfsd4_encode_read_plus(struct nfsd4_compoundres *resp, __be32 nfserr,
>  	maxcount = min_t(unsigned long, maxcount,
>  			 (xdr->buf->buflen - xdr->buf->len));
>  	maxcount = min_t(unsigned long, maxcount, read->rd_length);
> +	count    = maxcount;
>  
>  	eof = read->rd_offset >= i_size_read(file_inode(file));
>  	if (eof)
> @@ -4695,24 +4705,38 @@ nfsd4_encode_read_plus(struct nfsd4_compoundres *resp, __be32 nfserr,
>  	pos = vfs_llseek(file, read->rd_offset, SEEK_DATA);
>  	if (pos == -ENXIO)
>  		pos = i_size_read(file_inode(file));
> +	else if (pos < 0)
> +		pos = read->rd_offset;
>  
> -	if (pos > read->rd_offset) {
> -		maxcount = pos - read->rd_offset;
> -		nfserr = nfsd4_encode_read_plus_hole(resp, read, maxcount, &eof);
> +	if (pos == read->rd_offset) {
> +		maxcount = count;
> +		nfserr = nfsd4_encode_read_plus_data(resp, read, &maxcount, &eof);
> +		if (nfserr)
> +			goto out;
> +		count -= maxcount;
> +		read->rd_offset += maxcount;
>  		segments++;
> -	} else {
> -		nfserr = nfsd4_encode_read_plus_data(resp, read, maxcount, &eof);
> +	}
> +
> +	if (count > 0 && !eof) {
> +		maxcount = count;
> +		nfserr = nfsd4_encode_read_plus_hole(resp, read, &maxcount, &eof);
> +		if (nfserr)
> +			goto out;
> +		count -= maxcount;
> +		read->rd_offset += maxcount;
>  		segments++;
>  	}
>  
>  out:
> -	if (nfserr)
> +	if (nfserr && segments == 0)
>  		xdr_truncate_encode(xdr, starting_len);
>  	else {
>  		tmp = htonl(eof);
>  		write_bytes_to_xdr_buf(xdr->buf, starting_len,     &tmp, 4);
>  		tmp = htonl(segments);
>  		write_bytes_to_xdr_buf(xdr->buf, starting_len + 4, &tmp, 4);
> +		nfserr = nfs_ok;
>  	}
>  
>  	return nfserr;
> -- 
> 2.28.0
>
Anna Schumaker Sept. 9, 2020, 4:51 p.m. UTC | #2
On Tue, Sep 8, 2020 at 3:49 PM J. Bruce Fields <bfields@redhat.com> wrote:
>
> On Tue, Sep 08, 2020 at 12:25:58PM -0400, schumaker.anna@gmail.com wrote:
> > From: Anna Schumaker <Anna.Schumaker@Netapp.com>
> >
> > But only one of each right now. We'll expand on this in the next patch.
> >
> > Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
> > ---
> > v5: If we've already encoded a segment, then return a short read if
> >     later segments return an error for some reason.
> > ---
> >  fs/nfsd/nfs4xdr.c | 54 ++++++++++++++++++++++++++++++++++-------------
> >  1 file changed, 39 insertions(+), 15 deletions(-)
> >
> > diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
> > index 45159bd9e9a4..856606263c1d 100644
> > --- a/fs/nfsd/nfs4xdr.c
> > +++ b/fs/nfsd/nfs4xdr.c
> > @@ -4603,7 +4603,7 @@ nfsd4_encode_offload_status(struct nfsd4_compoundres *resp, __be32 nfserr,
> >  static __be32
> >  nfsd4_encode_read_plus_data(struct nfsd4_compoundres *resp,
> >                           struct nfsd4_read *read,
> > -                         unsigned long maxcount,  u32 *eof)
> > +                         unsigned long *maxcount, u32 *eof)
> >  {
> >       struct xdr_stream *xdr = &resp->xdr;
> >       struct file *file = read->rd_nf->nf_file;
> > @@ -4614,19 +4614,19 @@ nfsd4_encode_read_plus_data(struct nfsd4_compoundres *resp,
> >       __be64 tmp64;
> >
> >       if (hole_pos > read->rd_offset)
> > -             maxcount = min_t(unsigned long, maxcount, hole_pos - read->rd_offset);
> > +             *maxcount = min_t(unsigned long, *maxcount, hole_pos - read->rd_offset);
> >
> >       /* Content type, offset, byte count */
> >       p = xdr_reserve_space(xdr, 4 + 8 + 4);
> >       if (!p)
> >               return nfserr_resource;
> >
> > -     read->rd_vlen = xdr_reserve_space_vec(xdr, resp->rqstp->rq_vec, maxcount);
> > +     read->rd_vlen = xdr_reserve_space_vec(xdr, resp->rqstp->rq_vec, *maxcount);
> >       if (read->rd_vlen < 0)
> >               return nfserr_resource;
> >
> >       nfserr = nfsd_readv(resp->rqstp, read->rd_fhp, file, read->rd_offset,
> > -                         resp->rqstp->rq_vec, read->rd_vlen, &maxcount, eof);
> > +                         resp->rqstp->rq_vec, read->rd_vlen, maxcount, eof);
> >       if (nfserr)
> >               return nfserr;
> >
> > @@ -4634,7 +4634,7 @@ nfsd4_encode_read_plus_data(struct nfsd4_compoundres *resp,
> >       write_bytes_to_xdr_buf(xdr->buf, starting_len,      &tmp,   4);
> >       tmp64 = cpu_to_be64(read->rd_offset);
> >       write_bytes_to_xdr_buf(xdr->buf, starting_len + 4,  &tmp64, 8);
> > -     tmp = htonl(maxcount);
> > +     tmp = htonl(*maxcount);
> >       write_bytes_to_xdr_buf(xdr->buf, starting_len + 12, &tmp,   4);
> >       return nfs_ok;
> >  }
> > @@ -4642,11 +4642,19 @@ nfsd4_encode_read_plus_data(struct nfsd4_compoundres *resp,
> >  static __be32
> >  nfsd4_encode_read_plus_hole(struct nfsd4_compoundres *resp,
> >                           struct nfsd4_read *read,
> > -                         unsigned long maxcount, u32 *eof)
> > +                         unsigned long *maxcount, u32 *eof)
> >  {
> >       struct file *file = read->rd_nf->nf_file;
> > +     loff_t data_pos = vfs_llseek(file, read->rd_offset, SEEK_DATA);
> > +     unsigned long count;
> >       __be32 *p;
> >
> > +     if (data_pos == -ENXIO)
> > +             data_pos = i_size_read(file_inode(file));
> > +     else if (data_pos <= read->rd_offset)
> > +             return nfserr_resource;
>
> I think there's still a race here:
>
>         vfs_llseek(.,0,SEEK_HOLE) returns 1024
>         read 1024 bytes of data
>                                         another process fills the hole
>         vfs_llseek(.,1024,SEEK_DATA) returns 1024
>         code above returns nfserr_resource
>
> We end up returning an error to the client when we should have just
> returned more data.

As long as we've encoded at least one segment successfully, we'll
actually return a short read in this case (as of the most recent
patches). I tried implementing a check for what each segment actually
was before encoding, but it lead to a lot of extra lseeks (so
potential for races / performance problems on btrfs). Returning a
short read seemed like a better approach to me.

Anna

>
> --b.
>
> > +     count = data_pos - read->rd_offset;
> > +
> >       /* Content type, offset, byte count */
> >       p = xdr_reserve_space(&resp->xdr, 4 + 8 + 8);
> >       if (!p)
> > @@ -4654,9 +4662,10 @@ nfsd4_encode_read_plus_hole(struct nfsd4_compoundres *resp,
> >
> >       *p++ = htonl(NFS4_CONTENT_HOLE);
> >        p   = xdr_encode_hyper(p, read->rd_offset);
> > -      p   = xdr_encode_hyper(p, maxcount);
> > +      p   = xdr_encode_hyper(p, count);
> >
> > -     *eof = (read->rd_offset + maxcount) >= i_size_read(file_inode(file));
> > +     *eof = (read->rd_offset + count) >= i_size_read(file_inode(file));
> > +     *maxcount = min_t(unsigned long, count, *maxcount);
> >       return nfs_ok;
> >  }
> >
> > @@ -4664,7 +4673,7 @@ static __be32
> >  nfsd4_encode_read_plus(struct nfsd4_compoundres *resp, __be32 nfserr,
> >                      struct nfsd4_read *read)
> >  {
> > -     unsigned long maxcount;
> > +     unsigned long maxcount, count;
> >       struct xdr_stream *xdr = &resp->xdr;
> >       struct file *file;
> >       int starting_len = xdr->buf->len;
> > @@ -4687,6 +4696,7 @@ nfsd4_encode_read_plus(struct nfsd4_compoundres *resp, __be32 nfserr,
> >       maxcount = min_t(unsigned long, maxcount,
> >                        (xdr->buf->buflen - xdr->buf->len));
> >       maxcount = min_t(unsigned long, maxcount, read->rd_length);
> > +     count    = maxcount;
> >
> >       eof = read->rd_offset >= i_size_read(file_inode(file));
> >       if (eof)
> > @@ -4695,24 +4705,38 @@ nfsd4_encode_read_plus(struct nfsd4_compoundres *resp, __be32 nfserr,
> >       pos = vfs_llseek(file, read->rd_offset, SEEK_DATA);
> >       if (pos == -ENXIO)
> >               pos = i_size_read(file_inode(file));
> > +     else if (pos < 0)
> > +             pos = read->rd_offset;
> >
> > -     if (pos > read->rd_offset) {
> > -             maxcount = pos - read->rd_offset;
> > -             nfserr = nfsd4_encode_read_plus_hole(resp, read, maxcount, &eof);
> > +     if (pos == read->rd_offset) {
> > +             maxcount = count;
> > +             nfserr = nfsd4_encode_read_plus_data(resp, read, &maxcount, &eof);
> > +             if (nfserr)
> > +                     goto out;
> > +             count -= maxcount;
> > +             read->rd_offset += maxcount;
> >               segments++;
> > -     } else {
> > -             nfserr = nfsd4_encode_read_plus_data(resp, read, maxcount, &eof);
> > +     }
> > +
> > +     if (count > 0 && !eof) {
> > +             maxcount = count;
> > +             nfserr = nfsd4_encode_read_plus_hole(resp, read, &maxcount, &eof);
> > +             if (nfserr)
> > +                     goto out;
> > +             count -= maxcount;
> > +             read->rd_offset += maxcount;
> >               segments++;
> >       }
> >
> >  out:
> > -     if (nfserr)
> > +     if (nfserr && segments == 0)
> >               xdr_truncate_encode(xdr, starting_len);
> >       else {
> >               tmp = htonl(eof);
> >               write_bytes_to_xdr_buf(xdr->buf, starting_len,     &tmp, 4);
> >               tmp = htonl(segments);
> >               write_bytes_to_xdr_buf(xdr->buf, starting_len + 4, &tmp, 4);
> > +             nfserr = nfs_ok;
> >       }
> >
> >       return nfserr;
> > --
> > 2.28.0
> >
>
J. Bruce Fields Sept. 9, 2020, 8:24 p.m. UTC | #3
On Wed, Sep 09, 2020 at 12:51:38PM -0400, Anna Schumaker wrote:
> On Tue, Sep 8, 2020 at 3:49 PM J. Bruce Fields <bfields@redhat.com> wrote:
> >
> > On Tue, Sep 08, 2020 at 12:25:58PM -0400, schumaker.anna@gmail.com wrote:
> > > From: Anna Schumaker <Anna.Schumaker@Netapp.com>
> > >
> > > But only one of each right now. We'll expand on this in the next patch.
> > >
> > > Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
> > > ---
> > > v5: If we've already encoded a segment, then return a short read if
> > >     later segments return an error for some reason.
> > > ---
> > >  fs/nfsd/nfs4xdr.c | 54 ++++++++++++++++++++++++++++++++++-------------
> > >  1 file changed, 39 insertions(+), 15 deletions(-)
> > >
> > > diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
> > > index 45159bd9e9a4..856606263c1d 100644
> > > --- a/fs/nfsd/nfs4xdr.c
> > > +++ b/fs/nfsd/nfs4xdr.c
> > > @@ -4603,7 +4603,7 @@ nfsd4_encode_offload_status(struct nfsd4_compoundres *resp, __be32 nfserr,
> > >  static __be32
> > >  nfsd4_encode_read_plus_data(struct nfsd4_compoundres *resp,
> > >                           struct nfsd4_read *read,
> > > -                         unsigned long maxcount,  u32 *eof)
> > > +                         unsigned long *maxcount, u32 *eof)
> > >  {
> > >       struct xdr_stream *xdr = &resp->xdr;
> > >       struct file *file = read->rd_nf->nf_file;
> > > @@ -4614,19 +4614,19 @@ nfsd4_encode_read_plus_data(struct nfsd4_compoundres *resp,
> > >       __be64 tmp64;
> > >
> > >       if (hole_pos > read->rd_offset)
> > > -             maxcount = min_t(unsigned long, maxcount, hole_pos - read->rd_offset);
> > > +             *maxcount = min_t(unsigned long, *maxcount, hole_pos - read->rd_offset);
> > >
> > >       /* Content type, offset, byte count */
> > >       p = xdr_reserve_space(xdr, 4 + 8 + 4);
> > >       if (!p)
> > >               return nfserr_resource;
> > >
> > > -     read->rd_vlen = xdr_reserve_space_vec(xdr, resp->rqstp->rq_vec, maxcount);
> > > +     read->rd_vlen = xdr_reserve_space_vec(xdr, resp->rqstp->rq_vec, *maxcount);
> > >       if (read->rd_vlen < 0)
> > >               return nfserr_resource;
> > >
> > >       nfserr = nfsd_readv(resp->rqstp, read->rd_fhp, file, read->rd_offset,
> > > -                         resp->rqstp->rq_vec, read->rd_vlen, &maxcount, eof);
> > > +                         resp->rqstp->rq_vec, read->rd_vlen, maxcount, eof);
> > >       if (nfserr)
> > >               return nfserr;
> > >
> > > @@ -4634,7 +4634,7 @@ nfsd4_encode_read_plus_data(struct nfsd4_compoundres *resp,
> > >       write_bytes_to_xdr_buf(xdr->buf, starting_len,      &tmp,   4);
> > >       tmp64 = cpu_to_be64(read->rd_offset);
> > >       write_bytes_to_xdr_buf(xdr->buf, starting_len + 4,  &tmp64, 8);
> > > -     tmp = htonl(maxcount);
> > > +     tmp = htonl(*maxcount);
> > >       write_bytes_to_xdr_buf(xdr->buf, starting_len + 12, &tmp,   4);
> > >       return nfs_ok;
> > >  }
> > > @@ -4642,11 +4642,19 @@ nfsd4_encode_read_plus_data(struct nfsd4_compoundres *resp,
> > >  static __be32
> > >  nfsd4_encode_read_plus_hole(struct nfsd4_compoundres *resp,
> > >                           struct nfsd4_read *read,
> > > -                         unsigned long maxcount, u32 *eof)
> > > +                         unsigned long *maxcount, u32 *eof)
> > >  {
> > >       struct file *file = read->rd_nf->nf_file;
> > > +     loff_t data_pos = vfs_llseek(file, read->rd_offset, SEEK_DATA);
> > > +     unsigned long count;
> > >       __be32 *p;
> > >
> > > +     if (data_pos == -ENXIO)
> > > +             data_pos = i_size_read(file_inode(file));
> > > +     else if (data_pos <= read->rd_offset)
> > > +             return nfserr_resource;
> >
> > I think there's still a race here:
> >
> >         vfs_llseek(.,0,SEEK_HOLE) returns 1024
> >         read 1024 bytes of data
> >                                         another process fills the hole
> >         vfs_llseek(.,1024,SEEK_DATA) returns 1024
> >         code above returns nfserr_resource
> >
> > We end up returning an error to the client when we should have just
> > returned more data.
> 
> As long as we've encoded at least one segment successfully, we'll
> actually return a short read in this case (as of the most recent
> patches). I tried implementing a check for what each segment actually
> was before encoding, but it lead to a lot of extra lseeks (so
> potential for races / performance problems on btrfs). Returning a
> short read seemed like a better approach to me.

Argh, right, I forgot the "if (nfserr && segments == 0)" at the end of
nfsd4_encode_read_plus().

It's still possible to get a spurious error return if this happens at
the very start of the READ_PLUS, though.  Hm.  Might be better to just
encode another data segment?

--b.

> 
> Anna
> 
> >
> > --b.
> >
> > > +     count = data_pos - read->rd_offset;
> > > +
> > >       /* Content type, offset, byte count */
> > >       p = xdr_reserve_space(&resp->xdr, 4 + 8 + 8);
> > >       if (!p)
> > > @@ -4654,9 +4662,10 @@ nfsd4_encode_read_plus_hole(struct nfsd4_compoundres *resp,
> > >
> > >       *p++ = htonl(NFS4_CONTENT_HOLE);
> > >        p   = xdr_encode_hyper(p, read->rd_offset);
> > > -      p   = xdr_encode_hyper(p, maxcount);
> > > +      p   = xdr_encode_hyper(p, count);
> > >
> > > -     *eof = (read->rd_offset + maxcount) >= i_size_read(file_inode(file));
> > > +     *eof = (read->rd_offset + count) >= i_size_read(file_inode(file));
> > > +     *maxcount = min_t(unsigned long, count, *maxcount);
> > >       return nfs_ok;
> > >  }
> > >
> > > @@ -4664,7 +4673,7 @@ static __be32
> > >  nfsd4_encode_read_plus(struct nfsd4_compoundres *resp, __be32 nfserr,
> > >                      struct nfsd4_read *read)
> > >  {
> > > -     unsigned long maxcount;
> > > +     unsigned long maxcount, count;
> > >       struct xdr_stream *xdr = &resp->xdr;
> > >       struct file *file;
> > >       int starting_len = xdr->buf->len;
> > > @@ -4687,6 +4696,7 @@ nfsd4_encode_read_plus(struct nfsd4_compoundres *resp, __be32 nfserr,
> > >       maxcount = min_t(unsigned long, maxcount,
> > >                        (xdr->buf->buflen - xdr->buf->len));
> > >       maxcount = min_t(unsigned long, maxcount, read->rd_length);
> > > +     count    = maxcount;
> > >
> > >       eof = read->rd_offset >= i_size_read(file_inode(file));
> > >       if (eof)
> > > @@ -4695,24 +4705,38 @@ nfsd4_encode_read_plus(struct nfsd4_compoundres *resp, __be32 nfserr,
> > >       pos = vfs_llseek(file, read->rd_offset, SEEK_DATA);
> > >       if (pos == -ENXIO)
> > >               pos = i_size_read(file_inode(file));
> > > +     else if (pos < 0)
> > > +             pos = read->rd_offset;
> > >
> > > -     if (pos > read->rd_offset) {
> > > -             maxcount = pos - read->rd_offset;
> > > -             nfserr = nfsd4_encode_read_plus_hole(resp, read, maxcount, &eof);
> > > +     if (pos == read->rd_offset) {
> > > +             maxcount = count;
> > > +             nfserr = nfsd4_encode_read_plus_data(resp, read, &maxcount, &eof);
> > > +             if (nfserr)
> > > +                     goto out;
> > > +             count -= maxcount;
> > > +             read->rd_offset += maxcount;
> > >               segments++;
> > > -     } else {
> > > -             nfserr = nfsd4_encode_read_plus_data(resp, read, maxcount, &eof);
> > > +     }
> > > +
> > > +     if (count > 0 && !eof) {
> > > +             maxcount = count;
> > > +             nfserr = nfsd4_encode_read_plus_hole(resp, read, &maxcount, &eof);
> > > +             if (nfserr)
> > > +                     goto out;
> > > +             count -= maxcount;
> > > +             read->rd_offset += maxcount;
> > >               segments++;
> > >       }
> > >
> > >  out:
> > > -     if (nfserr)
> > > +     if (nfserr && segments == 0)
> > >               xdr_truncate_encode(xdr, starting_len);
> > >       else {
> > >               tmp = htonl(eof);
> > >               write_bytes_to_xdr_buf(xdr->buf, starting_len,     &tmp, 4);
> > >               tmp = htonl(segments);
> > >               write_bytes_to_xdr_buf(xdr->buf, starting_len + 4, &tmp, 4);
> > > +             nfserr = nfs_ok;
> > >       }
> > >
> > >       return nfserr;
> > > --
> > > 2.28.0
> > >
> >
J. Bruce Fields Sept. 9, 2020, 8:44 p.m. UTC | #4
On Wed, Sep 09, 2020 at 04:24:00PM -0400, bfields wrote:
> On Wed, Sep 09, 2020 at 12:51:38PM -0400, Anna Schumaker wrote:
> > On Tue, Sep 8, 2020 at 3:49 PM J. Bruce Fields <bfields@redhat.com> wrote:
> > > I think there's still a race here:
> > >
> > >         vfs_llseek(.,0,SEEK_HOLE) returns 1024
> > >         read 1024 bytes of data
> > >                                         another process fills the hole
> > >         vfs_llseek(.,1024,SEEK_DATA) returns 1024
> > >         code above returns nfserr_resource
> > >
> > > We end up returning an error to the client when we should have just
> > > returned more data.
> > 
> > As long as we've encoded at least one segment successfully, we'll
> > actually return a short read in this case (as of the most recent
> > patches). I tried implementing a check for what each segment actually
> > was before encoding, but it lead to a lot of extra lseeks (so
> > potential for races / performance problems on btrfs). Returning a
> > short read seemed like a better approach to me.
> 
> Argh, right, I forgot the "if (nfserr && segments == 0)" at the end of
> nfsd4_encode_read_plus().

(Oops, it's actually the "if (nfserr)" below that handles that case.)

> It's still possible to get a spurious error return if this happens at
> the very start of the READ_PLUS, though.  Hm.  Might be better to just
> encode another data segment?

So, I mean, if nfsd4_encode_read_plus_hole() doesn't find a hole after
all, just keep going, and create a data segment.

--b.

> 
> --b.
> 
> > 
> > Anna
> > 
> > >
> > > --b.
> > >
> > > > +     count = data_pos - read->rd_offset;
> > > > +
> > > >       /* Content type, offset, byte count */
> > > >       p = xdr_reserve_space(&resp->xdr, 4 + 8 + 8);
> > > >       if (!p)
> > > > @@ -4654,9 +4662,10 @@ nfsd4_encode_read_plus_hole(struct nfsd4_compoundres *resp,
> > > >
> > > >       *p++ = htonl(NFS4_CONTENT_HOLE);
> > > >        p   = xdr_encode_hyper(p, read->rd_offset);
> > > > -      p   = xdr_encode_hyper(p, maxcount);
> > > > +      p   = xdr_encode_hyper(p, count);
> > > >
> > > > -     *eof = (read->rd_offset + maxcount) >= i_size_read(file_inode(file));
> > > > +     *eof = (read->rd_offset + count) >= i_size_read(file_inode(file));
> > > > +     *maxcount = min_t(unsigned long, count, *maxcount);
> > > >       return nfs_ok;
> > > >  }
> > > >
> > > > @@ -4664,7 +4673,7 @@ static __be32
> > > >  nfsd4_encode_read_plus(struct nfsd4_compoundres *resp, __be32 nfserr,
> > > >                      struct nfsd4_read *read)
> > > >  {
> > > > -     unsigned long maxcount;
> > > > +     unsigned long maxcount, count;
> > > >       struct xdr_stream *xdr = &resp->xdr;
> > > >       struct file *file;
> > > >       int starting_len = xdr->buf->len;
> > > > @@ -4687,6 +4696,7 @@ nfsd4_encode_read_plus(struct nfsd4_compoundres *resp, __be32 nfserr,
> > > >       maxcount = min_t(unsigned long, maxcount,
> > > >                        (xdr->buf->buflen - xdr->buf->len));
> > > >       maxcount = min_t(unsigned long, maxcount, read->rd_length);
> > > > +     count    = maxcount;
> > > >
> > > >       eof = read->rd_offset >= i_size_read(file_inode(file));
> > > >       if (eof)
> > > > @@ -4695,24 +4705,38 @@ nfsd4_encode_read_plus(struct nfsd4_compoundres *resp, __be32 nfserr,
> > > >       pos = vfs_llseek(file, read->rd_offset, SEEK_DATA);
> > > >       if (pos == -ENXIO)
> > > >               pos = i_size_read(file_inode(file));
> > > > +     else if (pos < 0)
> > > > +             pos = read->rd_offset;
> > > >
> > > > -     if (pos > read->rd_offset) {
> > > > -             maxcount = pos - read->rd_offset;
> > > > -             nfserr = nfsd4_encode_read_plus_hole(resp, read, maxcount, &eof);
> > > > +     if (pos == read->rd_offset) {
> > > > +             maxcount = count;
> > > > +             nfserr = nfsd4_encode_read_plus_data(resp, read, &maxcount, &eof);
> > > > +             if (nfserr)
> > > > +                     goto out;
> > > > +             count -= maxcount;
> > > > +             read->rd_offset += maxcount;
> > > >               segments++;
> > > > -     } else {
> > > > -             nfserr = nfsd4_encode_read_plus_data(resp, read, maxcount, &eof);
> > > > +     }
> > > > +
> > > > +     if (count > 0 && !eof) {
> > > > +             maxcount = count;
> > > > +             nfserr = nfsd4_encode_read_plus_hole(resp, read, &maxcount, &eof);
> > > > +             if (nfserr)
> > > > +                     goto out;
> > > > +             count -= maxcount;
> > > > +             read->rd_offset += maxcount;
> > > >               segments++;
> > > >       }
> > > >
> > > >  out:
> > > > -     if (nfserr)
> > > > +     if (nfserr && segments == 0)
> > > >               xdr_truncate_encode(xdr, starting_len);
> > > >       else {
> > > >               tmp = htonl(eof);
> > > >               write_bytes_to_xdr_buf(xdr->buf, starting_len,     &tmp, 4);
> > > >               tmp = htonl(segments);
> > > >               write_bytes_to_xdr_buf(xdr->buf, starting_len + 4, &tmp, 4);
> > > > +             nfserr = nfs_ok;
> > > >       }
> > > >
> > > >       return nfserr;
> > > > --
> > > > 2.28.0
> > > >
> > >
diff mbox series

Patch

diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 45159bd9e9a4..856606263c1d 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -4603,7 +4603,7 @@  nfsd4_encode_offload_status(struct nfsd4_compoundres *resp, __be32 nfserr,
 static __be32
 nfsd4_encode_read_plus_data(struct nfsd4_compoundres *resp,
 			    struct nfsd4_read *read,
-			    unsigned long maxcount,  u32 *eof)
+			    unsigned long *maxcount, u32 *eof)
 {
 	struct xdr_stream *xdr = &resp->xdr;
 	struct file *file = read->rd_nf->nf_file;
@@ -4614,19 +4614,19 @@  nfsd4_encode_read_plus_data(struct nfsd4_compoundres *resp,
 	__be64 tmp64;
 
 	if (hole_pos > read->rd_offset)
-		maxcount = min_t(unsigned long, maxcount, hole_pos - read->rd_offset);
+		*maxcount = min_t(unsigned long, *maxcount, hole_pos - read->rd_offset);
 
 	/* Content type, offset, byte count */
 	p = xdr_reserve_space(xdr, 4 + 8 + 4);
 	if (!p)
 		return nfserr_resource;
 
-	read->rd_vlen = xdr_reserve_space_vec(xdr, resp->rqstp->rq_vec, maxcount);
+	read->rd_vlen = xdr_reserve_space_vec(xdr, resp->rqstp->rq_vec, *maxcount);
 	if (read->rd_vlen < 0)
 		return nfserr_resource;
 
 	nfserr = nfsd_readv(resp->rqstp, read->rd_fhp, file, read->rd_offset,
-			    resp->rqstp->rq_vec, read->rd_vlen, &maxcount, eof);
+			    resp->rqstp->rq_vec, read->rd_vlen, maxcount, eof);
 	if (nfserr)
 		return nfserr;
 
@@ -4634,7 +4634,7 @@  nfsd4_encode_read_plus_data(struct nfsd4_compoundres *resp,
 	write_bytes_to_xdr_buf(xdr->buf, starting_len,      &tmp,   4);
 	tmp64 = cpu_to_be64(read->rd_offset);
 	write_bytes_to_xdr_buf(xdr->buf, starting_len + 4,  &tmp64, 8);
-	tmp = htonl(maxcount);
+	tmp = htonl(*maxcount);
 	write_bytes_to_xdr_buf(xdr->buf, starting_len + 12, &tmp,   4);
 	return nfs_ok;
 }
@@ -4642,11 +4642,19 @@  nfsd4_encode_read_plus_data(struct nfsd4_compoundres *resp,
 static __be32
 nfsd4_encode_read_plus_hole(struct nfsd4_compoundres *resp,
 			    struct nfsd4_read *read,
-			    unsigned long maxcount, u32 *eof)
+			    unsigned long *maxcount, u32 *eof)
 {
 	struct file *file = read->rd_nf->nf_file;
+	loff_t data_pos = vfs_llseek(file, read->rd_offset, SEEK_DATA);
+	unsigned long count;
 	__be32 *p;
 
+	if (data_pos == -ENXIO)
+		data_pos = i_size_read(file_inode(file));
+	else if (data_pos <= read->rd_offset)
+		return nfserr_resource;
+	count = data_pos - read->rd_offset;
+
 	/* Content type, offset, byte count */
 	p = xdr_reserve_space(&resp->xdr, 4 + 8 + 8);
 	if (!p)
@@ -4654,9 +4662,10 @@  nfsd4_encode_read_plus_hole(struct nfsd4_compoundres *resp,
 
 	*p++ = htonl(NFS4_CONTENT_HOLE);
 	 p   = xdr_encode_hyper(p, read->rd_offset);
-	 p   = xdr_encode_hyper(p, maxcount);
+	 p   = xdr_encode_hyper(p, count);
 
-	*eof = (read->rd_offset + maxcount) >= i_size_read(file_inode(file));
+	*eof = (read->rd_offset + count) >= i_size_read(file_inode(file));
+	*maxcount = min_t(unsigned long, count, *maxcount);
 	return nfs_ok;
 }
 
@@ -4664,7 +4673,7 @@  static __be32
 nfsd4_encode_read_plus(struct nfsd4_compoundres *resp, __be32 nfserr,
 		       struct nfsd4_read *read)
 {
-	unsigned long maxcount;
+	unsigned long maxcount, count;
 	struct xdr_stream *xdr = &resp->xdr;
 	struct file *file;
 	int starting_len = xdr->buf->len;
@@ -4687,6 +4696,7 @@  nfsd4_encode_read_plus(struct nfsd4_compoundres *resp, __be32 nfserr,
 	maxcount = min_t(unsigned long, maxcount,
 			 (xdr->buf->buflen - xdr->buf->len));
 	maxcount = min_t(unsigned long, maxcount, read->rd_length);
+	count    = maxcount;
 
 	eof = read->rd_offset >= i_size_read(file_inode(file));
 	if (eof)
@@ -4695,24 +4705,38 @@  nfsd4_encode_read_plus(struct nfsd4_compoundres *resp, __be32 nfserr,
 	pos = vfs_llseek(file, read->rd_offset, SEEK_DATA);
 	if (pos == -ENXIO)
 		pos = i_size_read(file_inode(file));
+	else if (pos < 0)
+		pos = read->rd_offset;
 
-	if (pos > read->rd_offset) {
-		maxcount = pos - read->rd_offset;
-		nfserr = nfsd4_encode_read_plus_hole(resp, read, maxcount, &eof);
+	if (pos == read->rd_offset) {
+		maxcount = count;
+		nfserr = nfsd4_encode_read_plus_data(resp, read, &maxcount, &eof);
+		if (nfserr)
+			goto out;
+		count -= maxcount;
+		read->rd_offset += maxcount;
 		segments++;
-	} else {
-		nfserr = nfsd4_encode_read_plus_data(resp, read, maxcount, &eof);
+	}
+
+	if (count > 0 && !eof) {
+		maxcount = count;
+		nfserr = nfsd4_encode_read_plus_hole(resp, read, &maxcount, &eof);
+		if (nfserr)
+			goto out;
+		count -= maxcount;
+		read->rd_offset += maxcount;
 		segments++;
 	}
 
 out:
-	if (nfserr)
+	if (nfserr && segments == 0)
 		xdr_truncate_encode(xdr, starting_len);
 	else {
 		tmp = htonl(eof);
 		write_bytes_to_xdr_buf(xdr->buf, starting_len,     &tmp, 4);
 		tmp = htonl(segments);
 		write_bytes_to_xdr_buf(xdr->buf, starting_len + 4, &tmp, 4);
+		nfserr = nfs_ok;
 	}
 
 	return nfserr;