@@ -1400,12 +1400,14 @@ static noinline_for_stack int extent_writepage_io(struct btrfs_inode *inode,
start + len <= folio_start + folio_size(folio));
ret = btrfs_writepage_cow_fixup(folio);
- if (ret) {
+ if (ret == -EAGAIN) {
/* Fixup worker will requeue */
folio_redirty_for_writepage(bio_ctrl->wbc, folio);
folio_unlock(folio);
return 1;
}
+ if (ret < 0)
+ goto out;
for (cur = start; cur < start + len; cur += fs_info->sectorsize)
set_bit((cur - folio_start) >> fs_info->sectorsize_bits, &range_bitmap);
@@ -1492,6 +1494,29 @@ static int extent_writepage(struct folio *folio, struct btrfs_bio_ctrl *bio_ctrl
* The proper bitmap can only be initialized until writepage_delalloc().
*/
bio_ctrl->submit_bitmap = (unsigned long)-1;
+ /*
+ * If the page is dirty but without private set, it's marked dirty
+ * without informing the fs.
+ * Nowadays that is a bug, since the introduction of
+ * pin_user_pages_locked().
+ *
+ * So here we check if the page has private set to rule out such
+ * case.
+ * But we also have a long history of relying on the COW fixup,
+ * so here we only enable this check for experimental builds until
+ * we're sure it's safe.
+ */
+ if (IS_ENABLED(CONFIG_BTRFS_EXPERIMENTAL) && !folio_test_private(folio)) {
+ WARN_ON(IS_ENABLED(CONFIG_BTRFS_DEBUG));
+ btrfs_err_rl(fs_info,
+ "root %lld ino %llu folio %llu is marked dirty without notifying the fs",
+ BTRFS_I(inode)->root->root_key.objectid,
+ btrfs_ino(BTRFS_I(inode)),
+ folio_pos(folio));
+ ret = -EUCLEAN;
+ goto done;
+ }
+
ret = set_folio_extent_mapped(folio);
if (ret < 0)
goto done;
@@ -2832,6 +2832,21 @@ int btrfs_writepage_cow_fixup(struct folio *folio)
if (folio_test_ordered(folio))
return 0;
+ /*
+ * For experimental build, we error out instead of EAGAIN.
+ *
+ * We should not hit such out-of-band dirty folios anymore.
+ */
+ if (IS_ENABLED(CONFIG_BTRFS_EXPERIMENTAL)) {
+ WARN_ON(IS_ENABLED(CONFIG_BTRFS_DEBUG));
+ btrfs_err_rl(fs_info,
+ "root %lld ino %llu folio %llu is marked dirty without notifying the fs",
+ BTRFS_I(inode)->root->root_key.objectid,
+ btrfs_ino(BTRFS_I(inode)),
+ folio_pos(folio));
+ return -EUCLEAN;
+ }
+
/*
* folio_checked is set below when we create a fixup worker for this
* folio, don't try to create another one if we're already
An out-of-band folio means the folio is marked dirty but without notifying the filesystem. This used to be a problem related to get_user_page(), but with the introduction of pin_user_pages_locked(), we should no longer hit such case anymore. In btrfs, we have a long history of handling such out-of-band dirty folios by: - Mark extent io tree EXTENT_DELALLOC during btrfs_dirty_folio() So that any buffered write into the page cache will have EXTENT_DEALLOC flag set for the corresponding range in btrfs' extent io tree. - Marking the folio ordered during delalloc This is based on EXTENT_DELALLOC flag in the extent io tree. - During folio submission for writeback check the ordered flag If the folio has no ordered folio, it means it doesn't go through the initial btrfs_dirty_folio(), thus it's definitely an out-of-band one. If we got one, we go through COW fixup, which will re-dirty the folio with proper handling in another workqueue. Such workaround is a blockage for us to migrate to iomap (it requires extra flags to trace if a folio is dirtied by the fs or not) and I'd argue it's not data checksum safe, since the folio can be modified halfway. But with the introduction of pin_user_pages_locked() during v5.8 merge window, such out-of-band dirty folio such be treated as a bug. Ext4 has treated such case by warning and erroring out even before pin_user_pages_locked(). Here we take one step between ext4 and the COW fixup workaround, that we go the ext4 way (warning and error out) for experimental builds, so that we can test if the no more out-of-band dirty folios expectation is true. For non-experimental builds we still keep the existing COW fixup, but I hope in several releases we can get rid of the COW fixup completely, making it much easier to migrate to iomap. The new checks happen in two locations: - extent_writepage() If an out-of-band dirty folio is marked dirty, it may not even have page->private properly set. Reject it early here. - btrfs_writepage_cow_fix() Error out with -EUCLEAN instead if we're sure it's an out-of-band one, and make the caller to check errors other than -EAGAIN. Signed-off-by: Qu Wenruo <wqu@suse.com> --- fs/btrfs/extent_io.c | 27 ++++++++++++++++++++++++++- fs/btrfs/inode.c | 15 +++++++++++++++ 2 files changed, 41 insertions(+), 1 deletion(-)