diff mbox series

[v5,7/9] iomap/xfs: Eliminate the iomap_valid handler

Message ID 20221231150919.659533-8-agruenba@redhat.com (mailing list archive)
State New, archived
Headers show
Series Turn iomap_page_ops into iomap_folio_ops | expand

Commit Message

Andreas Gruenbacher Dec. 31, 2022, 3:09 p.m. UTC
Eliminate the ->iomap_valid() handler by switching to a ->get_folio()
handler and validating the mapping there.

Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
---
 fs/iomap/buffered-io.c | 25 +++++--------------------
 fs/xfs/xfs_iomap.c     | 37 ++++++++++++++++++++++++++-----------
 include/linux/iomap.h  | 22 +++++-----------------
 3 files changed, 36 insertions(+), 48 deletions(-)

Comments

Darrick J. Wong Jan. 4, 2023, 5:53 p.m. UTC | #1
On Sat, Dec 31, 2022 at 04:09:17PM +0100, Andreas Gruenbacher wrote:
> Eliminate the ->iomap_valid() handler by switching to a ->get_folio()
> handler and validating the mapping there.
> 
> Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
> ---
>  fs/iomap/buffered-io.c | 25 +++++--------------------
>  fs/xfs/xfs_iomap.c     | 37 ++++++++++++++++++++++++++-----------
>  include/linux/iomap.h  | 22 +++++-----------------
>  3 files changed, 36 insertions(+), 48 deletions(-)
> 
> diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
> index 4f363d42dbaf..df6fca11f18c 100644
> --- a/fs/iomap/buffered-io.c
> +++ b/fs/iomap/buffered-io.c
> @@ -630,7 +630,7 @@ static int iomap_write_begin(struct iomap_iter *iter, loff_t pos,
>  	const struct iomap_page_ops *page_ops = iter->iomap.page_ops;
>  	const struct iomap *srcmap = iomap_iter_srcmap(iter);
>  	struct folio *folio;
> -	int status = 0;
> +	int status;
>  
>  	BUG_ON(pos + len > iter->iomap.offset + iter->iomap.length);
>  	if (srcmap != &iter->iomap)
> @@ -646,27 +646,12 @@ static int iomap_write_begin(struct iomap_iter *iter, loff_t pos,
>  		folio = page_ops->get_folio(iter, pos, len);
>  	else
>  		folio = iomap_get_folio(iter, pos);
> -	if (IS_ERR(folio))
> -		return PTR_ERR(folio);
> -
> -	/*
> -	 * Now we have a locked folio, before we do anything with it we need to
> -	 * check that the iomap we have cached is not stale. The inode extent
> -	 * mapping can change due to concurrent IO in flight (e.g.
> -	 * IOMAP_UNWRITTEN state can change and memory reclaim could have
> -	 * reclaimed a previously partially written page at this index after IO
> -	 * completion before this write reaches this file offset) and hence we
> -	 * could do the wrong thing here (zero a page range incorrectly or fail
> -	 * to zero) and corrupt data.
> -	 */
> -	if (page_ops && page_ops->iomap_valid) {
> -		bool iomap_valid = page_ops->iomap_valid(iter->inode,
> -							&iter->iomap);
> -		if (!iomap_valid) {
> +	if (IS_ERR(folio)) {
> +		if (folio == ERR_PTR(-ESTALE)) {
>  			iter->iomap.flags |= IOMAP_F_STALE;
> -			status = 0;
> -			goto out_unlock;
> +			return 0;
>  		}
> +		return PTR_ERR(folio);

I wonder if this should be reworked a bit to reduce indenting:

	if (PTR_ERR(folio) == -ESTALE) {
		iter->iomap.flags |= IOMAP_F_STALE;
		return 0;
	}
	if (IS_ERR(folio))
		return PTR_ERR(folio);

But I don't have any strong opinions about that.

>  	}
>  
>  	if (pos + len > folio_pos(folio) + folio_size(folio))
> diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
> index 669c1bc5c3a7..d0bf99539180 100644
> --- a/fs/xfs/xfs_iomap.c
> +++ b/fs/xfs/xfs_iomap.c
> @@ -62,29 +62,44 @@ xfs_iomap_inode_sequence(
>  	return cookie | READ_ONCE(ip->i_df.if_seq);
>  }
>  
> -/*
> - * Check that the iomap passed to us is still valid for the given offset and
> - * length.
> - */
> -static bool
> -xfs_iomap_valid(
> -	struct inode		*inode,
> -	const struct iomap	*iomap)
> +static struct folio *
> +xfs_get_folio(
> +	struct iomap_iter	*iter,
> +	loff_t			pos,
> +	unsigned		len)
>  {
> +	struct inode		*inode = iter->inode;
> +	struct iomap		*iomap = &iter->iomap;
>  	struct xfs_inode	*ip = XFS_I(inode);
> +	struct folio		*folio;
>  
> +	folio = iomap_get_folio(iter, pos);
> +	if (IS_ERR(folio))
> +		return folio;
> +
> +	/*
> +	 * Now that we have a locked folio, we need to check that the iomap we
> +	 * have cached is not stale.  The inode extent mapping can change due to
> +	 * concurrent IO in flight (e.g., IOMAP_UNWRITTEN state can change and
> +	 * memory reclaim could have reclaimed a previously partially written
> +	 * page at this index after IO completion before this write reaches
> +	 * this file offset) and hence we could do the wrong thing here (zero a
> +	 * page range incorrectly or fail to zero) and corrupt data.
> +	 */
>  	if (iomap->validity_cookie !=
>  			xfs_iomap_inode_sequence(ip, iomap->flags)) {
>  		trace_xfs_iomap_invalid(ip, iomap);
> -		return false;
> +		folio_unlock(folio);
> +		folio_put(folio);
> +		return ERR_PTR(-ESTALE);
>  	}
>  
>  	XFS_ERRORTAG_DELAY(ip->i_mount, XFS_ERRTAG_WRITE_DELAY_MS);
> -	return true;
> +	return folio;
>  }
>  
>  const struct iomap_page_ops xfs_iomap_page_ops = {
> -	.iomap_valid		= xfs_iomap_valid,
> +	.get_folio		= xfs_get_folio,
>  };
>  
>  int
> diff --git a/include/linux/iomap.h b/include/linux/iomap.h
> index dd3575ada5d1..6f8e3321e475 100644
> --- a/include/linux/iomap.h
> +++ b/include/linux/iomap.h
> @@ -134,29 +134,17 @@ static inline bool iomap_inline_data_valid(const struct iomap *iomap)
>   * When get_folio succeeds, put_folio will always be called to do any
>   * cleanup work necessary.  put_folio is responsible for unlocking and putting
>   * @folio.
> + *
> + * When an iomap is created, the filesystem can store internal state (e.g., a
> + * sequence number) in iomap->validity_cookie.  When it then detects in the

I would reword this to:

"When an iomap is created, the filesystem can store internal state (e.g.
sequence number) in iomap->validity_cookie.  The get_folio handler can
use this validity cookie to detect if the iomap is no longer up to date
and needs to be refreshed.  If this is the case, the function should
return ERR_PTR(-ESTALE) to retry the operation with a fresh mapping."

--D

> + * get_folio handler that the iomap is no longer up to date and needs to be
> + * refreshed, it can return ERR_PTR(-ESTALE) to trigger a retry.
>   */
>  struct iomap_page_ops {
>  	struct folio *(*get_folio)(struct iomap_iter *iter, loff_t pos,
>  			unsigned len);
>  	void (*put_folio)(struct inode *inode, loff_t pos, unsigned copied,
>  			struct folio *folio);
> -
> -	/*
> -	 * Check that the cached iomap still maps correctly to the filesystem's
> -	 * internal extent map. FS internal extent maps can change while iomap
> -	 * is iterating a cached iomap, so this hook allows iomap to detect that
> -	 * the iomap needs to be refreshed during a long running write
> -	 * operation.
> -	 *
> -	 * The filesystem can store internal state (e.g. a sequence number) in
> -	 * iomap->validity_cookie when the iomap is first mapped to be able to
> -	 * detect changes between mapping time and whenever .iomap_valid() is
> -	 * called.
> -	 *
> -	 * This is called with the folio over the specified file position held
> -	 * locked by the iomap code.
> -	 */
> -	bool (*iomap_valid)(struct inode *inode, const struct iomap *iomap);
>  };
>  
>  /*
> -- 
> 2.38.1
>
Andreas Grünbacher Jan. 4, 2023, 7:02 p.m. UTC | #2
Am Mi., 4. Jan. 2023 um 19:00 Uhr schrieb Darrick J. Wong <djwong@kernel.org>:
> On Sat, Dec 31, 2022 at 04:09:17PM +0100, Andreas Gruenbacher wrote:
> > Eliminate the ->iomap_valid() handler by switching to a ->get_folio()
> > handler and validating the mapping there.
> >
> > Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
> > ---
> >  fs/iomap/buffered-io.c | 25 +++++--------------------
> >  fs/xfs/xfs_iomap.c     | 37 ++++++++++++++++++++++++++-----------
> >  include/linux/iomap.h  | 22 +++++-----------------
> >  3 files changed, 36 insertions(+), 48 deletions(-)
> >
> > diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
> > index 4f363d42dbaf..df6fca11f18c 100644
> > --- a/fs/iomap/buffered-io.c
> > +++ b/fs/iomap/buffered-io.c
> > @@ -630,7 +630,7 @@ static int iomap_write_begin(struct iomap_iter *iter, loff_t pos,
> >       const struct iomap_page_ops *page_ops = iter->iomap.page_ops;
> >       const struct iomap *srcmap = iomap_iter_srcmap(iter);
> >       struct folio *folio;
> > -     int status = 0;
> > +     int status;
> >
> >       BUG_ON(pos + len > iter->iomap.offset + iter->iomap.length);
> >       if (srcmap != &iter->iomap)
> > @@ -646,27 +646,12 @@ static int iomap_write_begin(struct iomap_iter *iter, loff_t pos,
> >               folio = page_ops->get_folio(iter, pos, len);
> >       else
> >               folio = iomap_get_folio(iter, pos);
> > -     if (IS_ERR(folio))
> > -             return PTR_ERR(folio);
> > -
> > -     /*
> > -      * Now we have a locked folio, before we do anything with it we need to
> > -      * check that the iomap we have cached is not stale. The inode extent
> > -      * mapping can change due to concurrent IO in flight (e.g.
> > -      * IOMAP_UNWRITTEN state can change and memory reclaim could have
> > -      * reclaimed a previously partially written page at this index after IO
> > -      * completion before this write reaches this file offset) and hence we
> > -      * could do the wrong thing here (zero a page range incorrectly or fail
> > -      * to zero) and corrupt data.
> > -      */
> > -     if (page_ops && page_ops->iomap_valid) {
> > -             bool iomap_valid = page_ops->iomap_valid(iter->inode,
> > -                                                     &iter->iomap);
> > -             if (!iomap_valid) {
> > +     if (IS_ERR(folio)) {
> > +             if (folio == ERR_PTR(-ESTALE)) {
> >                       iter->iomap.flags |= IOMAP_F_STALE;
> > -                     status = 0;
> > -                     goto out_unlock;
> > +                     return 0;
> >               }
> > +             return PTR_ERR(folio);
>
> I wonder if this should be reworked a bit to reduce indenting:
>
>         if (PTR_ERR(folio) == -ESTALE) {
>                 iter->iomap.flags |= IOMAP_F_STALE;
>                 return 0;
>         }
>         if (IS_ERR(folio))
>                 return PTR_ERR(folio);
>
> But I don't have any strong opinions about that.

This is a relatively hot path; the compiler would have to convert the
flattened version back to the nested version for the best possible
result to avoid a redundant check. Not sure it would always do that.

> >       }
> >
> >       if (pos + len > folio_pos(folio) + folio_size(folio))
> > diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
> > index 669c1bc5c3a7..d0bf99539180 100644
> > --- a/fs/xfs/xfs_iomap.c
> > +++ b/fs/xfs/xfs_iomap.c
> > @@ -62,29 +62,44 @@ xfs_iomap_inode_sequence(
> >       return cookie | READ_ONCE(ip->i_df.if_seq);
> >  }
> >
> > -/*
> > - * Check that the iomap passed to us is still valid for the given offset and
> > - * length.
> > - */
> > -static bool
> > -xfs_iomap_valid(
> > -     struct inode            *inode,
> > -     const struct iomap      *iomap)
> > +static struct folio *
> > +xfs_get_folio(
> > +     struct iomap_iter       *iter,
> > +     loff_t                  pos,
> > +     unsigned                len)
> >  {
> > +     struct inode            *inode = iter->inode;
> > +     struct iomap            *iomap = &iter->iomap;
> >       struct xfs_inode        *ip = XFS_I(inode);
> > +     struct folio            *folio;
> >
> > +     folio = iomap_get_folio(iter, pos);
> > +     if (IS_ERR(folio))
> > +             return folio;
> > +
> > +     /*
> > +      * Now that we have a locked folio, we need to check that the iomap we
> > +      * have cached is not stale.  The inode extent mapping can change due to
> > +      * concurrent IO in flight (e.g., IOMAP_UNWRITTEN state can change and
> > +      * memory reclaim could have reclaimed a previously partially written
> > +      * page at this index after IO completion before this write reaches
> > +      * this file offset) and hence we could do the wrong thing here (zero a
> > +      * page range incorrectly or fail to zero) and corrupt data.
> > +      */
> >       if (iomap->validity_cookie !=
> >                       xfs_iomap_inode_sequence(ip, iomap->flags)) {
> >               trace_xfs_iomap_invalid(ip, iomap);
> > -             return false;
> > +             folio_unlock(folio);
> > +             folio_put(folio);
> > +             return ERR_PTR(-ESTALE);
> >       }
> >
> >       XFS_ERRORTAG_DELAY(ip->i_mount, XFS_ERRTAG_WRITE_DELAY_MS);
> > -     return true;
> > +     return folio;
> >  }
> >
> >  const struct iomap_page_ops xfs_iomap_page_ops = {
> > -     .iomap_valid            = xfs_iomap_valid,
> > +     .get_folio              = xfs_get_folio,
> >  };
> >
> >  int
> > diff --git a/include/linux/iomap.h b/include/linux/iomap.h
> > index dd3575ada5d1..6f8e3321e475 100644
> > --- a/include/linux/iomap.h
> > +++ b/include/linux/iomap.h
> > @@ -134,29 +134,17 @@ static inline bool iomap_inline_data_valid(const struct iomap *iomap)
> >   * When get_folio succeeds, put_folio will always be called to do any
> >   * cleanup work necessary.  put_folio is responsible for unlocking and putting
> >   * @folio.
> > + *
> > + * When an iomap is created, the filesystem can store internal state (e.g., a
> > + * sequence number) in iomap->validity_cookie.  When it then detects in the
>
> I would reword this to:
>
> "When an iomap is created, the filesystem can store internal state (e.g.
> sequence number) in iomap->validity_cookie.  The get_folio handler can
> use this validity cookie to detect if the iomap is no longer up to date
> and needs to be refreshed.  If this is the case, the function should
> return ERR_PTR(-ESTALE) to retry the operation with a fresh mapping."

Yes, but "e.g." is always followed by a comma and it's "when", not
"if" here. So how about:

 * When an iomap is created, the filesystem can store internal state (e.g., a
 * sequence number) in iomap->validity_cookie.  The get_folio handler can use
 * this validity cookie to detect when the iomap needs to be refreshed because
 * it is no longer up to date.  In that case, the function should return
 * ERR_PTR(-ESTALE) to retry the operation with a fresh mapping.

I've incorporated all your feedback into this branch for now:

https://git.kernel.org/pub/scm/linux/kernel/git/gfs2/linux-gfs2.git/log/?h=iomap-race

Thank you,
Andreas


> --D
>
> > + * get_folio handler that the iomap is no longer up to date and needs to be
> > + * refreshed, it can return ERR_PTR(-ESTALE) to trigger a retry.
> >   */
> >  struct iomap_page_ops {
> >       struct folio *(*get_folio)(struct iomap_iter *iter, loff_t pos,
> >                       unsigned len);
> >       void (*put_folio)(struct inode *inode, loff_t pos, unsigned copied,
> >                       struct folio *folio);
> > -
> > -     /*
> > -      * Check that the cached iomap still maps correctly to the filesystem's
> > -      * internal extent map. FS internal extent maps can change while iomap
> > -      * is iterating a cached iomap, so this hook allows iomap to detect that
> > -      * the iomap needs to be refreshed during a long running write
> > -      * operation.
> > -      *
> > -      * The filesystem can store internal state (e.g. a sequence number) in
> > -      * iomap->validity_cookie when the iomap is first mapped to be able to
> > -      * detect changes between mapping time and whenever .iomap_valid() is
> > -      * called.
> > -      *
> > -      * This is called with the folio over the specified file position held
> > -      * locked by the iomap code.
> > -      */
> > -     bool (*iomap_valid)(struct inode *inode, const struct iomap *iomap);
> >  };
> >
> >  /*
> > --
> > 2.38.1
> >
Matthew Wilcox Jan. 4, 2023, 7:08 p.m. UTC | #3
On Wed, Jan 04, 2023 at 09:53:17AM -0800, Darrick J. Wong wrote:
> I wonder if this should be reworked a bit to reduce indenting:
> 
> 	if (PTR_ERR(folio) == -ESTALE) {

FYI this is a bad habit to be in.  The compiler can optimise

	if (folio == ERR_PTR(-ESTALE))

better than it can optimise the other way around.
Christoph Hellwig Jan. 8, 2023, 5:32 p.m. UTC | #4
On Wed, Jan 04, 2023 at 07:08:17PM +0000, Matthew Wilcox wrote:
> On Wed, Jan 04, 2023 at 09:53:17AM -0800, Darrick J. Wong wrote:
> > I wonder if this should be reworked a bit to reduce indenting:
> > 
> > 	if (PTR_ERR(folio) == -ESTALE) {
> 
> FYI this is a bad habit to be in.  The compiler can optimise
> 
> 	if (folio == ERR_PTR(-ESTALE))
> 
> better than it can optimise the other way around.

Yes.  I think doing the recording that Darrick suggested combined
with this style would be best:

	if (folio == ERR_PTR(-ESTALE)) {
		iter->iomap.flags |= IOMAP_F_STALE;
		return 0;
	}
	if (IS_ERR(folio))
		return PTR_ERR(folio);
Andreas Gruenbacher Jan. 8, 2023, 6:50 p.m. UTC | #5
On Sun, Jan 8, 2023 at 6:32 PM Christoph Hellwig <hch@infradead.org> wrote:
> On Wed, Jan 04, 2023 at 07:08:17PM +0000, Matthew Wilcox wrote:
> > On Wed, Jan 04, 2023 at 09:53:17AM -0800, Darrick J. Wong wrote:
> > > I wonder if this should be reworked a bit to reduce indenting:
> > >
> > >     if (PTR_ERR(folio) == -ESTALE) {
> >
> > FYI this is a bad habit to be in.  The compiler can optimise
> >
> >       if (folio == ERR_PTR(-ESTALE))
> >
> > better than it can optimise the other way around.
>
> Yes.  I think doing the recording that Darrick suggested combined
> with this style would be best:
>
>         if (folio == ERR_PTR(-ESTALE)) {
>                 iter->iomap.flags |= IOMAP_F_STALE;
>                 return 0;
>         }
>         if (IS_ERR(folio))
>                 return PTR_ERR(folio);

Again, I've implemented this as a nested if because the -ESTALE case
should be pretty rare, and if we unnest, we end up with an additional
check on the main code path. To be specific, the "before" code here on
my current system is this:

------------------------------------
        if (IS_ERR(folio)) {
    22ad:       48 81 fd 00 f0 ff ff    cmp    $0xfffffffffffff000,%rbp
    22b4:       0f 87 bf 03 00 00       ja     2679 <iomap_write_begin+0x499>
                        return 0;
                }
                return PTR_ERR(folio);
        }
[...]
    2679:       89 e8                   mov    %ebp,%eax
                if (folio == ERR_PTR(-ESTALE)) {
    267b:       48 83 fd 8c             cmp    $0xffffffffffffff8c,%rbp
    267f:       0f 85 b7 fc ff ff       jne    233c <iomap_write_begin+0x15c>
                        iter->iomap.flags |= IOMAP_F_STALE;
    2685:       66 81 4b 42 00 02       orw    $0x200,0x42(%rbx)
                        return 0;
    268b:       e9 aa fc ff ff          jmp    233a <iomap_write_begin+0x15a>
------------------------------------

While the "after" code is this:

------------------------------------
        if (folio == ERR_PTR(-ESTALE)) {
    22ad:       48 83 fd 8c             cmp    $0xffffffffffffff8c,%rbp
    22b1:       0f 84 bc 00 00 00       je     2373 <iomap_write_begin+0x193>
                iter->iomap.flags |= IOMAP_F_STALE;
                return 0;
        }
        if (IS_ERR(folio))
                return PTR_ERR(folio);
    22b7:       89 e8                   mov    %ebp,%eax
        if (IS_ERR(folio))
    22b9:       48 81 fd 00 f0 ff ff    cmp    $0xfffffffffffff000,%rbp
    22c0:       0f 87 82 00 00 00       ja     2348 <iomap_write_begin+0x168>
------------------------------------

The compiler isn't smart enough to re-nest the ifs by recognizing that
folio == ERR_PTR(-ESTALE) is a subset of IS_ERR(folio).

So do you still insist on that un-nesting even though it produces worse code?

Thanks,
Andreas
Darrick J. Wong Jan. 10, 2023, 9:56 p.m. UTC | #6
On Sun, Jan 08, 2023 at 07:50:01PM +0100, Andreas Gruenbacher wrote:
> On Sun, Jan 8, 2023 at 6:32 PM Christoph Hellwig <hch@infradead.org> wrote:
> > On Wed, Jan 04, 2023 at 07:08:17PM +0000, Matthew Wilcox wrote:
> > > On Wed, Jan 04, 2023 at 09:53:17AM -0800, Darrick J. Wong wrote:
> > > > I wonder if this should be reworked a bit to reduce indenting:
> > > >
> > > >     if (PTR_ERR(folio) == -ESTALE) {
> > >
> > > FYI this is a bad habit to be in.  The compiler can optimise
> > >
> > >       if (folio == ERR_PTR(-ESTALE))
> > >
> > > better than it can optimise the other way around.
> >
> > Yes.  I think doing the recording that Darrick suggested combined
> > with this style would be best:
> >
> >         if (folio == ERR_PTR(-ESTALE)) {
> >                 iter->iomap.flags |= IOMAP_F_STALE;
> >                 return 0;
> >         }
> >         if (IS_ERR(folio))
> >                 return PTR_ERR(folio);
> 
> Again, I've implemented this as a nested if because the -ESTALE case
> should be pretty rare, and if we unnest, we end up with an additional
> check on the main code path. To be specific, the "before" code here on
> my current system is this:
> 
> ------------------------------------
>         if (IS_ERR(folio)) {
>     22ad:       48 81 fd 00 f0 ff ff    cmp    $0xfffffffffffff000,%rbp
>     22b4:       0f 87 bf 03 00 00       ja     2679 <iomap_write_begin+0x499>
>                         return 0;
>                 }
>                 return PTR_ERR(folio);
>         }
> [...]
>     2679:       89 e8                   mov    %ebp,%eax
>                 if (folio == ERR_PTR(-ESTALE)) {
>     267b:       48 83 fd 8c             cmp    $0xffffffffffffff8c,%rbp
>     267f:       0f 85 b7 fc ff ff       jne    233c <iomap_write_begin+0x15c>
>                         iter->iomap.flags |= IOMAP_F_STALE;
>     2685:       66 81 4b 42 00 02       orw    $0x200,0x42(%rbx)
>                         return 0;
>     268b:       e9 aa fc ff ff          jmp    233a <iomap_write_begin+0x15a>
> ------------------------------------
> 
> While the "after" code is this:
> 
> ------------------------------------
>         if (folio == ERR_PTR(-ESTALE)) {
>     22ad:       48 83 fd 8c             cmp    $0xffffffffffffff8c,%rbp
>     22b1:       0f 84 bc 00 00 00       je     2373 <iomap_write_begin+0x193>
>                 iter->iomap.flags |= IOMAP_F_STALE;
>                 return 0;
>         }
>         if (IS_ERR(folio))
>                 return PTR_ERR(folio);
>     22b7:       89 e8                   mov    %ebp,%eax
>         if (IS_ERR(folio))
>     22b9:       48 81 fd 00 f0 ff ff    cmp    $0xfffffffffffff000,%rbp
>     22c0:       0f 87 82 00 00 00       ja     2348 <iomap_write_begin+0x168>
> ------------------------------------
> 
> The compiler isn't smart enough to re-nest the ifs by recognizing that
> folio == ERR_PTR(-ESTALE) is a subset of IS_ERR(folio).
> 
> So do you still insist on that un-nesting even though it produces worse code?

Me?  Not anymore. :)

--D

> Thanks,
> Andreas
>
diff mbox series

Patch

diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
index 4f363d42dbaf..df6fca11f18c 100644
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@ -630,7 +630,7 @@  static int iomap_write_begin(struct iomap_iter *iter, loff_t pos,
 	const struct iomap_page_ops *page_ops = iter->iomap.page_ops;
 	const struct iomap *srcmap = iomap_iter_srcmap(iter);
 	struct folio *folio;
-	int status = 0;
+	int status;
 
 	BUG_ON(pos + len > iter->iomap.offset + iter->iomap.length);
 	if (srcmap != &iter->iomap)
@@ -646,27 +646,12 @@  static int iomap_write_begin(struct iomap_iter *iter, loff_t pos,
 		folio = page_ops->get_folio(iter, pos, len);
 	else
 		folio = iomap_get_folio(iter, pos);
-	if (IS_ERR(folio))
-		return PTR_ERR(folio);
-
-	/*
-	 * Now we have a locked folio, before we do anything with it we need to
-	 * check that the iomap we have cached is not stale. The inode extent
-	 * mapping can change due to concurrent IO in flight (e.g.
-	 * IOMAP_UNWRITTEN state can change and memory reclaim could have
-	 * reclaimed a previously partially written page at this index after IO
-	 * completion before this write reaches this file offset) and hence we
-	 * could do the wrong thing here (zero a page range incorrectly or fail
-	 * to zero) and corrupt data.
-	 */
-	if (page_ops && page_ops->iomap_valid) {
-		bool iomap_valid = page_ops->iomap_valid(iter->inode,
-							&iter->iomap);
-		if (!iomap_valid) {
+	if (IS_ERR(folio)) {
+		if (folio == ERR_PTR(-ESTALE)) {
 			iter->iomap.flags |= IOMAP_F_STALE;
-			status = 0;
-			goto out_unlock;
+			return 0;
 		}
+		return PTR_ERR(folio);
 	}
 
 	if (pos + len > folio_pos(folio) + folio_size(folio))
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 669c1bc5c3a7..d0bf99539180 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -62,29 +62,44 @@  xfs_iomap_inode_sequence(
 	return cookie | READ_ONCE(ip->i_df.if_seq);
 }
 
-/*
- * Check that the iomap passed to us is still valid for the given offset and
- * length.
- */
-static bool
-xfs_iomap_valid(
-	struct inode		*inode,
-	const struct iomap	*iomap)
+static struct folio *
+xfs_get_folio(
+	struct iomap_iter	*iter,
+	loff_t			pos,
+	unsigned		len)
 {
+	struct inode		*inode = iter->inode;
+	struct iomap		*iomap = &iter->iomap;
 	struct xfs_inode	*ip = XFS_I(inode);
+	struct folio		*folio;
 
+	folio = iomap_get_folio(iter, pos);
+	if (IS_ERR(folio))
+		return folio;
+
+	/*
+	 * Now that we have a locked folio, we need to check that the iomap we
+	 * have cached is not stale.  The inode extent mapping can change due to
+	 * concurrent IO in flight (e.g., IOMAP_UNWRITTEN state can change and
+	 * memory reclaim could have reclaimed a previously partially written
+	 * page at this index after IO completion before this write reaches
+	 * this file offset) and hence we could do the wrong thing here (zero a
+	 * page range incorrectly or fail to zero) and corrupt data.
+	 */
 	if (iomap->validity_cookie !=
 			xfs_iomap_inode_sequence(ip, iomap->flags)) {
 		trace_xfs_iomap_invalid(ip, iomap);
-		return false;
+		folio_unlock(folio);
+		folio_put(folio);
+		return ERR_PTR(-ESTALE);
 	}
 
 	XFS_ERRORTAG_DELAY(ip->i_mount, XFS_ERRTAG_WRITE_DELAY_MS);
-	return true;
+	return folio;
 }
 
 const struct iomap_page_ops xfs_iomap_page_ops = {
-	.iomap_valid		= xfs_iomap_valid,
+	.get_folio		= xfs_get_folio,
 };
 
 int
diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index dd3575ada5d1..6f8e3321e475 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -134,29 +134,17 @@  static inline bool iomap_inline_data_valid(const struct iomap *iomap)
  * When get_folio succeeds, put_folio will always be called to do any
  * cleanup work necessary.  put_folio is responsible for unlocking and putting
  * @folio.
+ *
+ * When an iomap is created, the filesystem can store internal state (e.g., a
+ * sequence number) in iomap->validity_cookie.  When it then detects in the
+ * get_folio handler that the iomap is no longer up to date and needs to be
+ * refreshed, it can return ERR_PTR(-ESTALE) to trigger a retry.
  */
 struct iomap_page_ops {
 	struct folio *(*get_folio)(struct iomap_iter *iter, loff_t pos,
 			unsigned len);
 	void (*put_folio)(struct inode *inode, loff_t pos, unsigned copied,
 			struct folio *folio);
-
-	/*
-	 * Check that the cached iomap still maps correctly to the filesystem's
-	 * internal extent map. FS internal extent maps can change while iomap
-	 * is iterating a cached iomap, so this hook allows iomap to detect that
-	 * the iomap needs to be refreshed during a long running write
-	 * operation.
-	 *
-	 * The filesystem can store internal state (e.g. a sequence number) in
-	 * iomap->validity_cookie when the iomap is first mapped to be able to
-	 * detect changes between mapping time and whenever .iomap_valid() is
-	 * called.
-	 *
-	 * This is called with the folio over the specified file position held
-	 * locked by the iomap code.
-	 */
-	bool (*iomap_valid)(struct inode *inode, const struct iomap *iomap);
 };
 
 /*