@@ -575,7 +575,7 @@ static int check_tree_block_fsid(struct extent_buffer *eb)
}
/* Do basic extent buffer check at read time */
-static int btrfs_check_extent_buffer(struct extent_buffer *eb)
+int btrfs_check_extent_buffer(struct extent_buffer *eb)
{
struct btrfs_fs_info *fs_info = eb->fs_info;
u16 csum_size;
@@ -661,6 +661,9 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
if (!page->private)
goto out;
+ if (page_to_fs_info(page)->sectorsize < PAGE_SIZE)
+ return btrfs_verify_subpage_extent_buffers(page, mirror);
+
eb = (struct extent_buffer *)page->private;
/*
@@ -129,6 +129,7 @@ struct extent_map *btree_get_extent(struct btrfs_inode *inode,
int btrfs_get_num_tolerated_disk_barrier_failures(u64 flags);
int __init btrfs_end_io_wq_init(void);
void __cold btrfs_end_io_wq_exit(void);
+int btrfs_check_extent_buffer(struct extent_buffer *eb);
#ifdef CONFIG_DEBUG_LOCK_ALLOC
void btrfs_init_lockdep(void);
@@ -46,6 +46,9 @@ struct subpage_eb_mapping {
*/
unsigned long bitmap;
+ /* Which range of ebs has been verified */
+ unsigned long verified;
+
/* We only support 64K PAGE_SIZE system to mount 4K sectorsize fs */
struct extent_buffer *buffers[SUBPAGE_NR_EXTENT_BUFFERS];
};
@@ -5017,6 +5020,7 @@ static void detach_subpage_mapping(struct extent_buffer *eb, struct page *page)
if (test_bit(i, &mapping->bitmap) &&
mapping->buffers[i] == eb) {
clear_bit(i, &mapping->bitmap);
+ clear_bit(i, &mapping->verified);
mapping->buffers[i] = NULL;
}
}
@@ -5696,6 +5700,38 @@ void set_extent_buffer_uptodate(struct extent_buffer *eb)
}
}
+/*
+ * For subpage, one btree page can already be uptodate (read by other tree
+ * blocks in the same page), but we haven't verified the csum of the tree
+ * block.
+ *
+ * So we need to do extra check for uptodate page of the extent buffer.
+ */
+static int check_uptodate_extent_buffer_page(struct extent_buffer *eb)
+{
+ struct btrfs_fs_info *fs_info = eb->fs_info;
+ struct subpage_eb_mapping *eb_mapping;
+ struct page *page = eb->pages[0];
+ int nr_bit;
+ int ret;
+
+ if (fs_info->sectorsize == PAGE_SIZE)
+ return 0;
+
+ nr_bit = (eb->start - page_offset(page)) / fs_info->sectorsize;
+ spin_lock(&page->mapping->private_lock);
+ eb_mapping = (struct subpage_eb_mapping *)page->private;
+ if (test_bit(nr_bit, &eb_mapping->verified)) {
+ spin_unlock(&page->mapping->private_lock);
+ return 0;
+ }
+ spin_unlock(&page->mapping->private_lock);
+ ret = btrfs_check_extent_buffer(eb);
+ if (!ret)
+ set_bit(nr_bit, &eb_mapping->verified);
+ return ret;
+}
+
int read_extent_buffer_pages(struct extent_buffer *eb, int wait, int mirror_num)
{
int i;
@@ -5737,7 +5773,9 @@ int read_extent_buffer_pages(struct extent_buffer *eb, int wait, int mirror_num)
}
if (all_uptodate) {
- set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
+ ret = check_uptodate_extent_buffer_page(eb);
+ if (!ret)
+ set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
goto unlock_exit;
}
@@ -6396,3 +6434,74 @@ int try_release_extent_buffer(struct page *page)
return release_extent_buffer(eb);
}
+
+/*
+ * Verify all referred extent buffers in one page for subpage support.
+ *
+ * This is called in btree_readpage_end_io_hook(), where we still have the
+ * page locked.
+ * Here we only check the extent buffer who triggers the page read, so it
+ * doesn't cover all extent buffers contained by this page.
+ *
+ * We still need to do the same check for read_extent_buffer_pages() where
+ * the page of the extent buffer is already uptodate.
+ */
+int btrfs_verify_subpage_extent_buffers(struct page *page, int mirror)
+{
+ struct btrfs_fs_info *fs_info = page_to_fs_info(page);
+ struct extent_buffer *eb;
+ struct subpage_eb_mapping *eb_mapping;
+ int nr_bits = (fs_info->nodesize / fs_info->sectorsize);
+ int i;
+ int ret = 0;
+
+ spin_lock(&page->mapping->private_lock);
+ eb_mapping = (struct subpage_eb_mapping *)page->private;
+ for (i = 0; i < SUBPAGE_NR_EXTENT_BUFFERS; i++) {
+ int reads_done;
+ int j;
+
+ if (!test_bit(i, &eb_mapping->bitmap))
+ continue;
+
+ eb = eb_mapping->buffers[i];
+ spin_unlock(&page->mapping->private_lock);
+
+ atomic_inc(&eb->refs);
+ reads_done = atomic_dec_and_test(&eb->io_pages);
+
+ /*
+ * For subpage tree block, all tree read should be contained in
+ * one page, thus the read should always be done.
+ */
+ ASSERT(reads_done);
+
+ eb->read_mirror = mirror;
+ if (test_bit(EXTENT_BUFFER_READ_ERR, &eb->bflags)) {
+ ret = -EIO;
+ atomic_inc(&eb->io_pages);
+ clear_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
+ free_extent_buffer(eb);
+ goto out;
+ }
+
+ ret = btrfs_check_extent_buffer(eb);
+ if (ret < 0) {
+ atomic_inc(&eb->io_pages);
+ clear_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
+ free_extent_buffer(eb);
+ goto out;
+ }
+ for (j = i; j < i + nr_bits; j++)
+ set_bit(j, &eb_mapping->verified);
+
+ /* Go to next eb directly */
+ i += (nr_bits - 1);
+
+ free_extent_buffer(eb);
+ spin_lock(&page->mapping->private_lock);
+ }
+ spin_unlock(&page->mapping->private_lock);
+out:
+ return ret;
+}
@@ -330,6 +330,7 @@ blk_status_t btrfs_submit_read_repair(struct inode *inode,
struct page *page, unsigned int pgoff,
u64 start, u64 end, int failed_mirror,
submit_bio_hook_t *submit_bio_hook);
+int btrfs_verify_subpage_extent_buffers(struct page *page, int mirror);
#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
bool find_lock_delalloc_range(struct inode *inode,
Unlike regular PAGE_SIZE == sectorsize case, one btree inode page can contain several tree blocks. This makes the csum and other basic tree block verification very tricky, as in btree_readpage_end_io_hook(), we can only check the extent buffer who triggers this page read, not the remaining tree blocks in the same page. So this patch will change the timing of tree block verification to the following timings: - btree_readpage_end_io_hook() This is the old timing, but now we check all known extent buffers of the page. - read_extent_buffer_pages() This is the new timing exclusive for subpage support. Now if an extent buffer finds all its page (only 1 for subpage) is already uptodate, it still needs to check if we have already checked the extent buffer. If not, then call btrfs_check_extent_buffer() to verify the extent buffer. Signed-off-by: Qu Wenruo <wqu@suse.com> --- fs/btrfs/disk-io.c | 5 +- fs/btrfs/disk-io.h | 1 + fs/btrfs/extent_io.c | 111 ++++++++++++++++++++++++++++++++++++++++++- fs/btrfs/extent_io.h | 1 + 4 files changed, 116 insertions(+), 2 deletions(-)