diff mbox series

[29/32] btrfs: scrub: introduce scrub_page::page_len for subpage support

Message ID 20201103133108.148112-30-wqu@suse.com (mailing list archive)
State New, archived
Headers show
Series [01/32] btrfs: extent_io: remove the extent_start/extent_len for end_bio_extent_readpage() | expand

Commit Message

Qu Wenruo Nov. 3, 2020, 1:31 p.m. UTC
Currently scrub_page only has one csum for each page, this is fine if
page size == sector size, then each page has one csum for it.

But for subpage support, we could have cases where only part of the page
is utilized. E.g one 4K sector is read into a 64K page.
In that case, we need a way to determine which range is really utilized.

This patch will introduce scrub_page::page_len so that we can know
where the utilized range ends.

This is especially important for subpage. As write bio can overwrite
existing data if we just submit full page bio.

Signed-off-by: Qu Wenruo <wqu@suse.com>
---
 fs/btrfs/scrub.c | 36 +++++++++++++++++++++++++-----------
 1 file changed, 25 insertions(+), 11 deletions(-)

Comments

David Sterba Nov. 9, 2020, 6:17 p.m. UTC | #1
On Tue, Nov 03, 2020 at 09:31:05PM +0800, Qu Wenruo wrote:
> --- a/fs/btrfs/scrub.c
> +++ b/fs/btrfs/scrub.c
> @@ -72,9 +72,15 @@ struct scrub_page {
>  	u64			physical_for_dev_replace;
>  	atomic_t		refs;
>  	struct {
> -		unsigned int	mirror_num:8;
> -		unsigned int	have_csum:1;
> -		unsigned int	io_error:1;
> +		/*
> +		 * For subpage case, where only part of the page is utilized
> +		 * Note that 16 bits can only go 65535, not 65536, thus we have
> +		 * to use 17 bits here.
> +		 */
> +		u32	page_len:17;
> +		u32	mirror_num:8;
> +		u32	have_csum:1;
> +		u32	io_error:1;
>  	};

The embedded struct is some relic so this can be cleaned up further.
Mirror_num can become u8. The page length size is a bit awkward, 17 is
the lowest number to contain the size up to 64k but there's still some
space left so it can go up to 22 without increasing the structure size.

Source:

struct scrub_page {
        struct scrub_block      *sblock;
        struct page             *page;
        struct btrfs_device     *dev;
        struct list_head        list;
        u64                     flags;  /* extent flags */
        u64                     generation;
        u64                     logical;
        u64                     physical;
        u64                     physical_for_dev_replace;
        atomic_t                refs;
        u8                      mirror_num;
        /*
         * For subpage case, where only part of the page is utilized Note that
         * 16 bits can only go 65535, not 65536, thus we have to use 17 bits
         * here.
         */
        u32     page_len:20;
        u32     have_csum:1;
        u32     io_error:1;
        u8                      csum[BTRFS_CSUM_SIZE];

        struct scrub_recover    *recover;
};

pahole:

struct scrub_page {
        struct scrub_block *       sblock;               /*     0     8 */
        struct page *              page;                 /*     8     8 */
        struct btrfs_device *      dev;                  /*    16     8 */
        struct list_head           list;                 /*    24    16 */
        u64                        flags;                /*    40     8 */
        u64                        generation;           /*    48     8 */
        u64                        logical;              /*    56     8 */
        /* --- cacheline 1 boundary (64 bytes) --- */
        u64                        physical;             /*    64     8 */
        u64                        physical_for_dev_replace; /*    72     8 */
        atomic_t                   refs;                 /*    80     4 */
        u8                         mirror_num;           /*    84     1 */

        /* Bitfield combined with previous fields */

        u32                        page_len:20;          /*    84: 8  4 */
        u32                        have_csum:1;          /*    84:28  4 */
        u32                        io_error:1;           /*    84:29  4 */

        /* XXX 2 bits hole, try to pack */

        u8                         csum[32];             /*    88    32 */
        struct scrub_recover *     recover;              /*   120     8 */

        /* size: 128, cachelines: 2, members: 16 */
        /* sum members: 125 */
        /* sum bitfield members: 22 bits, bit holes: 1, sum bit holes: 2 bits */
};
David Sterba Nov. 9, 2020, 6:25 p.m. UTC | #2
On Tue, Nov 03, 2020 at 09:31:05PM +0800, Qu Wenruo wrote:
> Currently scrub_page only has one csum for each page, this is fine if
> page size == sector size, then each page has one csum for it.
> 
> But for subpage support, we could have cases where only part of the page
> is utilized. E.g one 4K sector is read into a 64K page.
> In that case, we need a way to determine which range is really utilized.
> 
> This patch will introduce scrub_page::page_len so that we can know
> where the utilized range ends.

Actually, this should be sectorsize or nodesize? Ie. is it necessary to
track the length inside scrub_page at all? It might make sense for
convenience though.
Qu Wenruo Nov. 10, 2020, 12:54 a.m. UTC | #3
On 2020/11/10 上午2:17, David Sterba wrote:
> On Tue, Nov 03, 2020 at 09:31:05PM +0800, Qu Wenruo wrote:
>> --- a/fs/btrfs/scrub.c
>> +++ b/fs/btrfs/scrub.c
>> @@ -72,9 +72,15 @@ struct scrub_page {
>>  	u64			physical_for_dev_replace;
>>  	atomic_t		refs;
>>  	struct {
>> -		unsigned int	mirror_num:8;
>> -		unsigned int	have_csum:1;
>> -		unsigned int	io_error:1;
>> +		/*
>> +		 * For subpage case, where only part of the page is utilized
>> +		 * Note that 16 bits can only go 65535, not 65536, thus we have
>> +		 * to use 17 bits here.
>> +		 */
>> +		u32	page_len:17;
>> +		u32	mirror_num:8;
>> +		u32	have_csum:1;
>> +		u32	io_error:1;
>>  	};
> 
> The embedded struct is some relic so this can be cleaned up further.
> Mirror_num can become u8. The page length size is a bit awkward, 17 is
> the lowest number to contain the size up to 64k but there's still some
> space left so it can go up to 22 without increasing the structure size.
> 
> Source:
> 
> struct scrub_page {
>         struct scrub_block      *sblock;
>         struct page             *page;
>         struct btrfs_device     *dev;
>         struct list_head        list;
>         u64                     flags;  /* extent flags */
>         u64                     generation;
>         u64                     logical;
>         u64                     physical;
>         u64                     physical_for_dev_replace;
>         atomic_t                refs;
>         u8                      mirror_num;
>         /*
>          * For subpage case, where only part of the page is utilized Note that
>          * 16 bits can only go 65535, not 65536, thus we have to use 17 bits
>          * here.
>          */
>         u32     page_len:20;
>         u32     have_csum:1;
>         u32     io_error:1;
>         u8                      csum[BTRFS_CSUM_SIZE];
> 
>         struct scrub_recover    *recover;
> };
> 
> pahole:
> 
> struct scrub_page {
>         struct scrub_block *       sblock;               /*     0     8 */
>         struct page *              page;                 /*     8     8 */
>         struct btrfs_device *      dev;                  /*    16     8 */
>         struct list_head           list;                 /*    24    16 */
>         u64                        flags;                /*    40     8 */
>         u64                        generation;           /*    48     8 */
>         u64                        logical;              /*    56     8 */
>         /* --- cacheline 1 boundary (64 bytes) --- */
>         u64                        physical;             /*    64     8 */
>         u64                        physical_for_dev_replace; /*    72     8 */
>         atomic_t                   refs;                 /*    80     4 */
>         u8                         mirror_num;           /*    84     1 */
> 
>         /* Bitfield combined with previous fields */
> 
>         u32                        page_len:20;          /*    84: 8  4 */
>         u32                        have_csum:1;          /*    84:28  4 */
>         u32                        io_error:1;           /*    84:29  4 */
> 
>         /* XXX 2 bits hole, try to pack */
> 
>         u8                         csum[32];             /*    88    32 */
>         struct scrub_recover *     recover;              /*   120     8 */
> 
>         /* size: 128, cachelines: 2, members: 16 */
>         /* sum members: 125 */
>         /* sum bitfield members: 22 bits, bit holes: 1, sum bit holes: 2 bits */
> };
> 
Thanks, looks indeed much better.

Would go this direction.

Thanks,
Qu
Qu Wenruo Nov. 10, 2020, 12:56 a.m. UTC | #4
On 2020/11/10 上午2:25, David Sterba wrote:
> On Tue, Nov 03, 2020 at 09:31:05PM +0800, Qu Wenruo wrote:
>> Currently scrub_page only has one csum for each page, this is fine if
>> page size == sector size, then each page has one csum for it.
>>
>> But for subpage support, we could have cases where only part of the page
>> is utilized. E.g one 4K sector is read into a 64K page.
>> In that case, we need a way to determine which range is really utilized.
>>
>> This patch will introduce scrub_page::page_len so that we can know
>> where the utilized range ends.
> 
> Actually, this should be sectorsize or nodesize? Ie. is it necessary to
> track the length inside scrub_page at all? It might make sense for
> convenience though.
> 
In the end, no need to track page_len for current implement.
It's always sector size.

But that conflicts with the name "scrub_page", making it more
"scrub_sector".

Anyway, I need to update the scrub support patchset to follow the one
sector one scrub_page policy.

Thanks,
Qu
David Sterba Nov. 10, 2020, 2:27 p.m. UTC | #5
On Tue, Nov 10, 2020 at 08:56:21AM +0800, Qu Wenruo wrote:
> 
> 
> On 2020/11/10 上午2:25, David Sterba wrote:
> > On Tue, Nov 03, 2020 at 09:31:05PM +0800, Qu Wenruo wrote:
> >> Currently scrub_page only has one csum for each page, this is fine if
> >> page size == sector size, then each page has one csum for it.
> >>
> >> But for subpage support, we could have cases where only part of the page
> >> is utilized. E.g one 4K sector is read into a 64K page.
> >> In that case, we need a way to determine which range is really utilized.
> >>
> >> This patch will introduce scrub_page::page_len so that we can know
> >> where the utilized range ends.
> > 
> > Actually, this should be sectorsize or nodesize? Ie. is it necessary to
> > track the length inside scrub_page at all? It might make sense for
> > convenience though.
> > 
> In the end, no need to track page_len for current implement.
> It's always sector size.
> 
> But that conflicts with the name "scrub_page", making it more
> "scrub_sector".

Yeah, that's would have to be updated as well and actually 'sector' is
what we want to use.

> Anyway, I need to update the scrub support patchset to follow the one
> sector one scrub_page policy.

I've added to misc-next, the other patches depend on the ->page_len
patch.

btrfs: scrub: refactor scrub_find_csum()
btrfs: scrub: remove the force parameter of scrub_pages
btrfs: scrub: distinguish scrub page from regular page
diff mbox series

Patch

diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index e4f73dfc3516..9f380009890f 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -72,9 +72,15 @@  struct scrub_page {
 	u64			physical_for_dev_replace;
 	atomic_t		refs;
 	struct {
-		unsigned int	mirror_num:8;
-		unsigned int	have_csum:1;
-		unsigned int	io_error:1;
+		/*
+		 * For subpage case, where only part of the page is utilized
+		 * Note that 16 bits can only go 65535, not 65536, thus we have
+		 * to use 17 bits here.
+		 */
+		u32	page_len:17;
+		u32	mirror_num:8;
+		u32	have_csum:1;
+		u32	io_error:1;
 	};
 	struct scrub_recover	*recover;
 
@@ -216,6 +222,11 @@  static struct scrub_page *alloc_scrub_page(struct scrub_ctx *sctx, gfp_t mask)
 	u32 sectorsize = sctx->fs_info->sectorsize;
 	size_t size;
 
+	/*
+	 * The bits in scrub_page::page_len only supports up to 64K page size.
+	 */
+	BUILD_BUG_ON(PAGE_SIZE > SZ_64K);
+
 	/* No support for multi-page sector size yet */
 	ASSERT(PAGE_SIZE >= sectorsize && IS_ALIGNED(PAGE_SIZE, sectorsize));
 
@@ -1357,6 +1368,7 @@  static int scrub_setup_recheck_block(struct scrub_block *original_sblock,
 			}
 			scrub_page_get(spage);
 			sblock->pagev[page_index] = spage;
+			spage->page_len = sublen;
 			spage->sblock = sblock;
 			spage->flags = flags;
 			spage->generation = generation;
@@ -1450,7 +1462,7 @@  static void scrub_recheck_block_on_raid56(struct btrfs_fs_info *fs_info,
 		struct scrub_page *spage = sblock->pagev[page_num];
 
 		WARN_ON(!spage->page);
-		bio_add_page(bio, spage->page, PAGE_SIZE, 0);
+		bio_add_page(bio, spage->page, spage->page_len, 0);
 	}
 
 	if (scrub_submit_raid56_bio_wait(fs_info, bio, first_page)) {
@@ -1503,7 +1515,7 @@  static void scrub_recheck_block(struct btrfs_fs_info *fs_info,
 		bio = btrfs_io_bio_alloc(1);
 		bio_set_dev(bio, spage->dev->bdev);
 
-		bio_add_page(bio, spage->page, PAGE_SIZE, 0);
+		bio_add_page(bio, spage->page, spage->page_len, 0);
 		bio->bi_iter.bi_sector = spage->physical >> 9;
 		bio->bi_opf = REQ_OP_READ;
 
@@ -1586,8 +1598,8 @@  static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad,
 		bio->bi_iter.bi_sector = spage_bad->physical >> 9;
 		bio->bi_opf = REQ_OP_WRITE;
 
-		ret = bio_add_page(bio, spage_good->page, PAGE_SIZE, 0);
-		if (PAGE_SIZE != ret) {
+		ret = bio_add_page(bio, spage_good->page, spage_good->page_len, 0);
+		if (ret != spage_good->page_len) {
 			bio_put(bio);
 			return -EIO;
 		}
@@ -1683,8 +1695,8 @@  static int scrub_add_page_to_wr_bio(struct scrub_ctx *sctx,
 		goto again;
 	}
 
-	ret = bio_add_page(sbio->bio, spage->page, PAGE_SIZE, 0);
-	if (ret != PAGE_SIZE) {
+	ret = bio_add_page(sbio->bio, spage->page, spage->page_len, 0);
+	if (ret != spage->page_len) {
 		if (sbio->page_count < 1) {
 			bio_put(sbio->bio);
 			sbio->bio = NULL;
@@ -2031,8 +2043,8 @@  static int scrub_add_page_to_rd_bio(struct scrub_ctx *sctx,
 	}
 
 	sbio->pagev[sbio->page_count] = spage;
-	ret = bio_add_page(sbio->bio, spage->page, PAGE_SIZE, 0);
-	if (ret != PAGE_SIZE) {
+	ret = bio_add_page(sbio->bio, spage->page, spage->page_len, 0);
+	if (ret != spage->page_len) {
 		if (sbio->page_count < 1) {
 			bio_put(sbio->bio);
 			sbio->bio = NULL;
@@ -2208,6 +2220,7 @@  static int scrub_pages(struct scrub_ctx *sctx, u64 logical, u64 len,
 		BUG_ON(index >= SCRUB_MAX_PAGES_PER_BLOCK);
 		scrub_page_get(spage);
 		sblock->pagev[index] = spage;
+		spage->page_len = l;
 		spage->sblock = sblock;
 		spage->dev = dev;
 		spage->flags = flags;
@@ -2552,6 +2565,7 @@  static int scrub_pages_for_parity(struct scrub_parity *sparity,
 		/* For scrub parity */
 		scrub_page_get(spage);
 		list_add_tail(&spage->list, &sparity->spages);
+		spage->page_len = l;
 		spage->sblock = sblock;
 		spage->dev = dev;
 		spage->flags = flags;