Message ID | 20221231150919.659533-8-agruenba@redhat.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Turn iomap_page_ops into iomap_folio_ops | expand |
On Sat, Dec 31, 2022 at 04:09:17PM +0100, Andreas Gruenbacher wrote: > Eliminate the ->iomap_valid() handler by switching to a ->get_folio() > handler and validating the mapping there. > > Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com> > --- > fs/iomap/buffered-io.c | 25 +++++-------------------- > fs/xfs/xfs_iomap.c | 37 ++++++++++++++++++++++++++----------- > include/linux/iomap.h | 22 +++++----------------- > 3 files changed, 36 insertions(+), 48 deletions(-) > > diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c > index 4f363d42dbaf..df6fca11f18c 100644 > --- a/fs/iomap/buffered-io.c > +++ b/fs/iomap/buffered-io.c > @@ -630,7 +630,7 @@ static int iomap_write_begin(struct iomap_iter *iter, loff_t pos, > const struct iomap_page_ops *page_ops = iter->iomap.page_ops; > const struct iomap *srcmap = iomap_iter_srcmap(iter); > struct folio *folio; > - int status = 0; > + int status; > > BUG_ON(pos + len > iter->iomap.offset + iter->iomap.length); > if (srcmap != &iter->iomap) > @@ -646,27 +646,12 @@ static int iomap_write_begin(struct iomap_iter *iter, loff_t pos, > folio = page_ops->get_folio(iter, pos, len); > else > folio = iomap_get_folio(iter, pos); > - if (IS_ERR(folio)) > - return PTR_ERR(folio); > - > - /* > - * Now we have a locked folio, before we do anything with it we need to > - * check that the iomap we have cached is not stale. The inode extent > - * mapping can change due to concurrent IO in flight (e.g. > - * IOMAP_UNWRITTEN state can change and memory reclaim could have > - * reclaimed a previously partially written page at this index after IO > - * completion before this write reaches this file offset) and hence we > - * could do the wrong thing here (zero a page range incorrectly or fail > - * to zero) and corrupt data. > - */ > - if (page_ops && page_ops->iomap_valid) { > - bool iomap_valid = page_ops->iomap_valid(iter->inode, > - &iter->iomap); > - if (!iomap_valid) { > + if (IS_ERR(folio)) { > + if (folio == ERR_PTR(-ESTALE)) { > iter->iomap.flags |= IOMAP_F_STALE; > - status = 0; > - goto out_unlock; > + return 0; > } > + return PTR_ERR(folio); I wonder if this should be reworked a bit to reduce indenting: if (PTR_ERR(folio) == -ESTALE) { iter->iomap.flags |= IOMAP_F_STALE; return 0; } if (IS_ERR(folio)) return PTR_ERR(folio); But I don't have any strong opinions about that. > } > > if (pos + len > folio_pos(folio) + folio_size(folio)) > diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c > index 669c1bc5c3a7..d0bf99539180 100644 > --- a/fs/xfs/xfs_iomap.c > +++ b/fs/xfs/xfs_iomap.c > @@ -62,29 +62,44 @@ xfs_iomap_inode_sequence( > return cookie | READ_ONCE(ip->i_df.if_seq); > } > > -/* > - * Check that the iomap passed to us is still valid for the given offset and > - * length. > - */ > -static bool > -xfs_iomap_valid( > - struct inode *inode, > - const struct iomap *iomap) > +static struct folio * > +xfs_get_folio( > + struct iomap_iter *iter, > + loff_t pos, > + unsigned len) > { > + struct inode *inode = iter->inode; > + struct iomap *iomap = &iter->iomap; > struct xfs_inode *ip = XFS_I(inode); > + struct folio *folio; > > + folio = iomap_get_folio(iter, pos); > + if (IS_ERR(folio)) > + return folio; > + > + /* > + * Now that we have a locked folio, we need to check that the iomap we > + * have cached is not stale. The inode extent mapping can change due to > + * concurrent IO in flight (e.g., IOMAP_UNWRITTEN state can change and > + * memory reclaim could have reclaimed a previously partially written > + * page at this index after IO completion before this write reaches > + * this file offset) and hence we could do the wrong thing here (zero a > + * page range incorrectly or fail to zero) and corrupt data. > + */ > if (iomap->validity_cookie != > xfs_iomap_inode_sequence(ip, iomap->flags)) { > trace_xfs_iomap_invalid(ip, iomap); > - return false; > + folio_unlock(folio); > + folio_put(folio); > + return ERR_PTR(-ESTALE); > } > > XFS_ERRORTAG_DELAY(ip->i_mount, XFS_ERRTAG_WRITE_DELAY_MS); > - return true; > + return folio; > } > > const struct iomap_page_ops xfs_iomap_page_ops = { > - .iomap_valid = xfs_iomap_valid, > + .get_folio = xfs_get_folio, > }; > > int > diff --git a/include/linux/iomap.h b/include/linux/iomap.h > index dd3575ada5d1..6f8e3321e475 100644 > --- a/include/linux/iomap.h > +++ b/include/linux/iomap.h > @@ -134,29 +134,17 @@ static inline bool iomap_inline_data_valid(const struct iomap *iomap) > * When get_folio succeeds, put_folio will always be called to do any > * cleanup work necessary. put_folio is responsible for unlocking and putting > * @folio. > + * > + * When an iomap is created, the filesystem can store internal state (e.g., a > + * sequence number) in iomap->validity_cookie. When it then detects in the I would reword this to: "When an iomap is created, the filesystem can store internal state (e.g. sequence number) in iomap->validity_cookie. The get_folio handler can use this validity cookie to detect if the iomap is no longer up to date and needs to be refreshed. If this is the case, the function should return ERR_PTR(-ESTALE) to retry the operation with a fresh mapping." --D > + * get_folio handler that the iomap is no longer up to date and needs to be > + * refreshed, it can return ERR_PTR(-ESTALE) to trigger a retry. > */ > struct iomap_page_ops { > struct folio *(*get_folio)(struct iomap_iter *iter, loff_t pos, > unsigned len); > void (*put_folio)(struct inode *inode, loff_t pos, unsigned copied, > struct folio *folio); > - > - /* > - * Check that the cached iomap still maps correctly to the filesystem's > - * internal extent map. FS internal extent maps can change while iomap > - * is iterating a cached iomap, so this hook allows iomap to detect that > - * the iomap needs to be refreshed during a long running write > - * operation. > - * > - * The filesystem can store internal state (e.g. a sequence number) in > - * iomap->validity_cookie when the iomap is first mapped to be able to > - * detect changes between mapping time and whenever .iomap_valid() is > - * called. > - * > - * This is called with the folio over the specified file position held > - * locked by the iomap code. > - */ > - bool (*iomap_valid)(struct inode *inode, const struct iomap *iomap); > }; > > /* > -- > 2.38.1 >
Am Mi., 4. Jan. 2023 um 19:00 Uhr schrieb Darrick J. Wong <djwong@kernel.org>: > On Sat, Dec 31, 2022 at 04:09:17PM +0100, Andreas Gruenbacher wrote: > > Eliminate the ->iomap_valid() handler by switching to a ->get_folio() > > handler and validating the mapping there. > > > > Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com> > > --- > > fs/iomap/buffered-io.c | 25 +++++-------------------- > > fs/xfs/xfs_iomap.c | 37 ++++++++++++++++++++++++++----------- > > include/linux/iomap.h | 22 +++++----------------- > > 3 files changed, 36 insertions(+), 48 deletions(-) > > > > diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c > > index 4f363d42dbaf..df6fca11f18c 100644 > > --- a/fs/iomap/buffered-io.c > > +++ b/fs/iomap/buffered-io.c > > @@ -630,7 +630,7 @@ static int iomap_write_begin(struct iomap_iter *iter, loff_t pos, > > const struct iomap_page_ops *page_ops = iter->iomap.page_ops; > > const struct iomap *srcmap = iomap_iter_srcmap(iter); > > struct folio *folio; > > - int status = 0; > > + int status; > > > > BUG_ON(pos + len > iter->iomap.offset + iter->iomap.length); > > if (srcmap != &iter->iomap) > > @@ -646,27 +646,12 @@ static int iomap_write_begin(struct iomap_iter *iter, loff_t pos, > > folio = page_ops->get_folio(iter, pos, len); > > else > > folio = iomap_get_folio(iter, pos); > > - if (IS_ERR(folio)) > > - return PTR_ERR(folio); > > - > > - /* > > - * Now we have a locked folio, before we do anything with it we need to > > - * check that the iomap we have cached is not stale. The inode extent > > - * mapping can change due to concurrent IO in flight (e.g. > > - * IOMAP_UNWRITTEN state can change and memory reclaim could have > > - * reclaimed a previously partially written page at this index after IO > > - * completion before this write reaches this file offset) and hence we > > - * could do the wrong thing here (zero a page range incorrectly or fail > > - * to zero) and corrupt data. > > - */ > > - if (page_ops && page_ops->iomap_valid) { > > - bool iomap_valid = page_ops->iomap_valid(iter->inode, > > - &iter->iomap); > > - if (!iomap_valid) { > > + if (IS_ERR(folio)) { > > + if (folio == ERR_PTR(-ESTALE)) { > > iter->iomap.flags |= IOMAP_F_STALE; > > - status = 0; > > - goto out_unlock; > > + return 0; > > } > > + return PTR_ERR(folio); > > I wonder if this should be reworked a bit to reduce indenting: > > if (PTR_ERR(folio) == -ESTALE) { > iter->iomap.flags |= IOMAP_F_STALE; > return 0; > } > if (IS_ERR(folio)) > return PTR_ERR(folio); > > But I don't have any strong opinions about that. This is a relatively hot path; the compiler would have to convert the flattened version back to the nested version for the best possible result to avoid a redundant check. Not sure it would always do that. > > } > > > > if (pos + len > folio_pos(folio) + folio_size(folio)) > > diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c > > index 669c1bc5c3a7..d0bf99539180 100644 > > --- a/fs/xfs/xfs_iomap.c > > +++ b/fs/xfs/xfs_iomap.c > > @@ -62,29 +62,44 @@ xfs_iomap_inode_sequence( > > return cookie | READ_ONCE(ip->i_df.if_seq); > > } > > > > -/* > > - * Check that the iomap passed to us is still valid for the given offset and > > - * length. > > - */ > > -static bool > > -xfs_iomap_valid( > > - struct inode *inode, > > - const struct iomap *iomap) > > +static struct folio * > > +xfs_get_folio( > > + struct iomap_iter *iter, > > + loff_t pos, > > + unsigned len) > > { > > + struct inode *inode = iter->inode; > > + struct iomap *iomap = &iter->iomap; > > struct xfs_inode *ip = XFS_I(inode); > > + struct folio *folio; > > > > + folio = iomap_get_folio(iter, pos); > > + if (IS_ERR(folio)) > > + return folio; > > + > > + /* > > + * Now that we have a locked folio, we need to check that the iomap we > > + * have cached is not stale. The inode extent mapping can change due to > > + * concurrent IO in flight (e.g., IOMAP_UNWRITTEN state can change and > > + * memory reclaim could have reclaimed a previously partially written > > + * page at this index after IO completion before this write reaches > > + * this file offset) and hence we could do the wrong thing here (zero a > > + * page range incorrectly or fail to zero) and corrupt data. > > + */ > > if (iomap->validity_cookie != > > xfs_iomap_inode_sequence(ip, iomap->flags)) { > > trace_xfs_iomap_invalid(ip, iomap); > > - return false; > > + folio_unlock(folio); > > + folio_put(folio); > > + return ERR_PTR(-ESTALE); > > } > > > > XFS_ERRORTAG_DELAY(ip->i_mount, XFS_ERRTAG_WRITE_DELAY_MS); > > - return true; > > + return folio; > > } > > > > const struct iomap_page_ops xfs_iomap_page_ops = { > > - .iomap_valid = xfs_iomap_valid, > > + .get_folio = xfs_get_folio, > > }; > > > > int > > diff --git a/include/linux/iomap.h b/include/linux/iomap.h > > index dd3575ada5d1..6f8e3321e475 100644 > > --- a/include/linux/iomap.h > > +++ b/include/linux/iomap.h > > @@ -134,29 +134,17 @@ static inline bool iomap_inline_data_valid(const struct iomap *iomap) > > * When get_folio succeeds, put_folio will always be called to do any > > * cleanup work necessary. put_folio is responsible for unlocking and putting > > * @folio. > > + * > > + * When an iomap is created, the filesystem can store internal state (e.g., a > > + * sequence number) in iomap->validity_cookie. When it then detects in the > > I would reword this to: > > "When an iomap is created, the filesystem can store internal state (e.g. > sequence number) in iomap->validity_cookie. The get_folio handler can > use this validity cookie to detect if the iomap is no longer up to date > and needs to be refreshed. If this is the case, the function should > return ERR_PTR(-ESTALE) to retry the operation with a fresh mapping." Yes, but "e.g." is always followed by a comma and it's "when", not "if" here. So how about: * When an iomap is created, the filesystem can store internal state (e.g., a * sequence number) in iomap->validity_cookie. The get_folio handler can use * this validity cookie to detect when the iomap needs to be refreshed because * it is no longer up to date. In that case, the function should return * ERR_PTR(-ESTALE) to retry the operation with a fresh mapping. I've incorporated all your feedback into this branch for now: https://git.kernel.org/pub/scm/linux/kernel/git/gfs2/linux-gfs2.git/log/?h=iomap-race Thank you, Andreas > --D > > > + * get_folio handler that the iomap is no longer up to date and needs to be > > + * refreshed, it can return ERR_PTR(-ESTALE) to trigger a retry. > > */ > > struct iomap_page_ops { > > struct folio *(*get_folio)(struct iomap_iter *iter, loff_t pos, > > unsigned len); > > void (*put_folio)(struct inode *inode, loff_t pos, unsigned copied, > > struct folio *folio); > > - > > - /* > > - * Check that the cached iomap still maps correctly to the filesystem's > > - * internal extent map. FS internal extent maps can change while iomap > > - * is iterating a cached iomap, so this hook allows iomap to detect that > > - * the iomap needs to be refreshed during a long running write > > - * operation. > > - * > > - * The filesystem can store internal state (e.g. a sequence number) in > > - * iomap->validity_cookie when the iomap is first mapped to be able to > > - * detect changes between mapping time and whenever .iomap_valid() is > > - * called. > > - * > > - * This is called with the folio over the specified file position held > > - * locked by the iomap code. > > - */ > > - bool (*iomap_valid)(struct inode *inode, const struct iomap *iomap); > > }; > > > > /* > > -- > > 2.38.1 > >
On Wed, Jan 04, 2023 at 09:53:17AM -0800, Darrick J. Wong wrote: > I wonder if this should be reworked a bit to reduce indenting: > > if (PTR_ERR(folio) == -ESTALE) { FYI this is a bad habit to be in. The compiler can optimise if (folio == ERR_PTR(-ESTALE)) better than it can optimise the other way around.
On Wed, Jan 04, 2023 at 07:08:17PM +0000, Matthew Wilcox wrote: > On Wed, Jan 04, 2023 at 09:53:17AM -0800, Darrick J. Wong wrote: > > I wonder if this should be reworked a bit to reduce indenting: > > > > if (PTR_ERR(folio) == -ESTALE) { > > FYI this is a bad habit to be in. The compiler can optimise > > if (folio == ERR_PTR(-ESTALE)) > > better than it can optimise the other way around. Yes. I think doing the recording that Darrick suggested combined with this style would be best: if (folio == ERR_PTR(-ESTALE)) { iter->iomap.flags |= IOMAP_F_STALE; return 0; } if (IS_ERR(folio)) return PTR_ERR(folio);
On Sun, Jan 8, 2023 at 6:32 PM Christoph Hellwig <hch@infradead.org> wrote: > On Wed, Jan 04, 2023 at 07:08:17PM +0000, Matthew Wilcox wrote: > > On Wed, Jan 04, 2023 at 09:53:17AM -0800, Darrick J. Wong wrote: > > > I wonder if this should be reworked a bit to reduce indenting: > > > > > > if (PTR_ERR(folio) == -ESTALE) { > > > > FYI this is a bad habit to be in. The compiler can optimise > > > > if (folio == ERR_PTR(-ESTALE)) > > > > better than it can optimise the other way around. > > Yes. I think doing the recording that Darrick suggested combined > with this style would be best: > > if (folio == ERR_PTR(-ESTALE)) { > iter->iomap.flags |= IOMAP_F_STALE; > return 0; > } > if (IS_ERR(folio)) > return PTR_ERR(folio); Again, I've implemented this as a nested if because the -ESTALE case should be pretty rare, and if we unnest, we end up with an additional check on the main code path. To be specific, the "before" code here on my current system is this: ------------------------------------ if (IS_ERR(folio)) { 22ad: 48 81 fd 00 f0 ff ff cmp $0xfffffffffffff000,%rbp 22b4: 0f 87 bf 03 00 00 ja 2679 <iomap_write_begin+0x499> return 0; } return PTR_ERR(folio); } [...] 2679: 89 e8 mov %ebp,%eax if (folio == ERR_PTR(-ESTALE)) { 267b: 48 83 fd 8c cmp $0xffffffffffffff8c,%rbp 267f: 0f 85 b7 fc ff ff jne 233c <iomap_write_begin+0x15c> iter->iomap.flags |= IOMAP_F_STALE; 2685: 66 81 4b 42 00 02 orw $0x200,0x42(%rbx) return 0; 268b: e9 aa fc ff ff jmp 233a <iomap_write_begin+0x15a> ------------------------------------ While the "after" code is this: ------------------------------------ if (folio == ERR_PTR(-ESTALE)) { 22ad: 48 83 fd 8c cmp $0xffffffffffffff8c,%rbp 22b1: 0f 84 bc 00 00 00 je 2373 <iomap_write_begin+0x193> iter->iomap.flags |= IOMAP_F_STALE; return 0; } if (IS_ERR(folio)) return PTR_ERR(folio); 22b7: 89 e8 mov %ebp,%eax if (IS_ERR(folio)) 22b9: 48 81 fd 00 f0 ff ff cmp $0xfffffffffffff000,%rbp 22c0: 0f 87 82 00 00 00 ja 2348 <iomap_write_begin+0x168> ------------------------------------ The compiler isn't smart enough to re-nest the ifs by recognizing that folio == ERR_PTR(-ESTALE) is a subset of IS_ERR(folio). So do you still insist on that un-nesting even though it produces worse code? Thanks, Andreas
On Sun, Jan 08, 2023 at 07:50:01PM +0100, Andreas Gruenbacher wrote: > On Sun, Jan 8, 2023 at 6:32 PM Christoph Hellwig <hch@infradead.org> wrote: > > On Wed, Jan 04, 2023 at 07:08:17PM +0000, Matthew Wilcox wrote: > > > On Wed, Jan 04, 2023 at 09:53:17AM -0800, Darrick J. Wong wrote: > > > > I wonder if this should be reworked a bit to reduce indenting: > > > > > > > > if (PTR_ERR(folio) == -ESTALE) { > > > > > > FYI this is a bad habit to be in. The compiler can optimise > > > > > > if (folio == ERR_PTR(-ESTALE)) > > > > > > better than it can optimise the other way around. > > > > Yes. I think doing the recording that Darrick suggested combined > > with this style would be best: > > > > if (folio == ERR_PTR(-ESTALE)) { > > iter->iomap.flags |= IOMAP_F_STALE; > > return 0; > > } > > if (IS_ERR(folio)) > > return PTR_ERR(folio); > > Again, I've implemented this as a nested if because the -ESTALE case > should be pretty rare, and if we unnest, we end up with an additional > check on the main code path. To be specific, the "before" code here on > my current system is this: > > ------------------------------------ > if (IS_ERR(folio)) { > 22ad: 48 81 fd 00 f0 ff ff cmp $0xfffffffffffff000,%rbp > 22b4: 0f 87 bf 03 00 00 ja 2679 <iomap_write_begin+0x499> > return 0; > } > return PTR_ERR(folio); > } > [...] > 2679: 89 e8 mov %ebp,%eax > if (folio == ERR_PTR(-ESTALE)) { > 267b: 48 83 fd 8c cmp $0xffffffffffffff8c,%rbp > 267f: 0f 85 b7 fc ff ff jne 233c <iomap_write_begin+0x15c> > iter->iomap.flags |= IOMAP_F_STALE; > 2685: 66 81 4b 42 00 02 orw $0x200,0x42(%rbx) > return 0; > 268b: e9 aa fc ff ff jmp 233a <iomap_write_begin+0x15a> > ------------------------------------ > > While the "after" code is this: > > ------------------------------------ > if (folio == ERR_PTR(-ESTALE)) { > 22ad: 48 83 fd 8c cmp $0xffffffffffffff8c,%rbp > 22b1: 0f 84 bc 00 00 00 je 2373 <iomap_write_begin+0x193> > iter->iomap.flags |= IOMAP_F_STALE; > return 0; > } > if (IS_ERR(folio)) > return PTR_ERR(folio); > 22b7: 89 e8 mov %ebp,%eax > if (IS_ERR(folio)) > 22b9: 48 81 fd 00 f0 ff ff cmp $0xfffffffffffff000,%rbp > 22c0: 0f 87 82 00 00 00 ja 2348 <iomap_write_begin+0x168> > ------------------------------------ > > The compiler isn't smart enough to re-nest the ifs by recognizing that > folio == ERR_PTR(-ESTALE) is a subset of IS_ERR(folio). > > So do you still insist on that un-nesting even though it produces worse code? Me? Not anymore. :) --D > Thanks, > Andreas >
diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index 4f363d42dbaf..df6fca11f18c 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -630,7 +630,7 @@ static int iomap_write_begin(struct iomap_iter *iter, loff_t pos, const struct iomap_page_ops *page_ops = iter->iomap.page_ops; const struct iomap *srcmap = iomap_iter_srcmap(iter); struct folio *folio; - int status = 0; + int status; BUG_ON(pos + len > iter->iomap.offset + iter->iomap.length); if (srcmap != &iter->iomap) @@ -646,27 +646,12 @@ static int iomap_write_begin(struct iomap_iter *iter, loff_t pos, folio = page_ops->get_folio(iter, pos, len); else folio = iomap_get_folio(iter, pos); - if (IS_ERR(folio)) - return PTR_ERR(folio); - - /* - * Now we have a locked folio, before we do anything with it we need to - * check that the iomap we have cached is not stale. The inode extent - * mapping can change due to concurrent IO in flight (e.g. - * IOMAP_UNWRITTEN state can change and memory reclaim could have - * reclaimed a previously partially written page at this index after IO - * completion before this write reaches this file offset) and hence we - * could do the wrong thing here (zero a page range incorrectly or fail - * to zero) and corrupt data. - */ - if (page_ops && page_ops->iomap_valid) { - bool iomap_valid = page_ops->iomap_valid(iter->inode, - &iter->iomap); - if (!iomap_valid) { + if (IS_ERR(folio)) { + if (folio == ERR_PTR(-ESTALE)) { iter->iomap.flags |= IOMAP_F_STALE; - status = 0; - goto out_unlock; + return 0; } + return PTR_ERR(folio); } if (pos + len > folio_pos(folio) + folio_size(folio)) diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 669c1bc5c3a7..d0bf99539180 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -62,29 +62,44 @@ xfs_iomap_inode_sequence( return cookie | READ_ONCE(ip->i_df.if_seq); } -/* - * Check that the iomap passed to us is still valid for the given offset and - * length. - */ -static bool -xfs_iomap_valid( - struct inode *inode, - const struct iomap *iomap) +static struct folio * +xfs_get_folio( + struct iomap_iter *iter, + loff_t pos, + unsigned len) { + struct inode *inode = iter->inode; + struct iomap *iomap = &iter->iomap; struct xfs_inode *ip = XFS_I(inode); + struct folio *folio; + folio = iomap_get_folio(iter, pos); + if (IS_ERR(folio)) + return folio; + + /* + * Now that we have a locked folio, we need to check that the iomap we + * have cached is not stale. The inode extent mapping can change due to + * concurrent IO in flight (e.g., IOMAP_UNWRITTEN state can change and + * memory reclaim could have reclaimed a previously partially written + * page at this index after IO completion before this write reaches + * this file offset) and hence we could do the wrong thing here (zero a + * page range incorrectly or fail to zero) and corrupt data. + */ if (iomap->validity_cookie != xfs_iomap_inode_sequence(ip, iomap->flags)) { trace_xfs_iomap_invalid(ip, iomap); - return false; + folio_unlock(folio); + folio_put(folio); + return ERR_PTR(-ESTALE); } XFS_ERRORTAG_DELAY(ip->i_mount, XFS_ERRTAG_WRITE_DELAY_MS); - return true; + return folio; } const struct iomap_page_ops xfs_iomap_page_ops = { - .iomap_valid = xfs_iomap_valid, + .get_folio = xfs_get_folio, }; int diff --git a/include/linux/iomap.h b/include/linux/iomap.h index dd3575ada5d1..6f8e3321e475 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -134,29 +134,17 @@ static inline bool iomap_inline_data_valid(const struct iomap *iomap) * When get_folio succeeds, put_folio will always be called to do any * cleanup work necessary. put_folio is responsible for unlocking and putting * @folio. + * + * When an iomap is created, the filesystem can store internal state (e.g., a + * sequence number) in iomap->validity_cookie. When it then detects in the + * get_folio handler that the iomap is no longer up to date and needs to be + * refreshed, it can return ERR_PTR(-ESTALE) to trigger a retry. */ struct iomap_page_ops { struct folio *(*get_folio)(struct iomap_iter *iter, loff_t pos, unsigned len); void (*put_folio)(struct inode *inode, loff_t pos, unsigned copied, struct folio *folio); - - /* - * Check that the cached iomap still maps correctly to the filesystem's - * internal extent map. FS internal extent maps can change while iomap - * is iterating a cached iomap, so this hook allows iomap to detect that - * the iomap needs to be refreshed during a long running write - * operation. - * - * The filesystem can store internal state (e.g. a sequence number) in - * iomap->validity_cookie when the iomap is first mapped to be able to - * detect changes between mapping time and whenever .iomap_valid() is - * called. - * - * This is called with the folio over the specified file position held - * locked by the iomap code. - */ - bool (*iomap_valid)(struct inode *inode, const struct iomap *iomap); }; /*
Eliminate the ->iomap_valid() handler by switching to a ->get_folio() handler and validating the mapping there. Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com> --- fs/iomap/buffered-io.c | 25 +++++-------------------- fs/xfs/xfs_iomap.c | 37 ++++++++++++++++++++++++++----------- include/linux/iomap.h | 22 +++++----------------- 3 files changed, 36 insertions(+), 48 deletions(-)