diff mbox series

[v2,03/11] btrfs: introduce new members for extent_map

Message ID f0be8547f0df8d8a4578c5e4d9b560c053dab8db.1714707707.git.wqu@suse.com (mailing list archive)
State New
Headers show
Series btrfs: extent-map: unify the members with btrfs_ordered_extent | expand

Commit Message

Qu Wenruo May 3, 2024, 6:01 a.m. UTC
Introduce two new members for extent_map:

- disk_bytenr
- offset

Both are matching the members with the same name inside
btrfs_file_extent_items.

For now this patch only touches those members when:

- Reading btrfs_file_extent_items from disk
- Inserting new holes
- Merging two extent maps
  With the new disk_bytenr and disk_num_bytes, doing merging would be a
  little complex, as we have 3 different cases:

  * Both extent maps are referring to the same data extent
  * Both extent maps are referring to different data extents, but
    those data extents are adjacent, and extent maps are at head/tail
    of each data extents
  * One of the extent map is referring to an merged and larger data
    extent that covers both extent maps

  The 3rd case seems only valid in selftest (test_case_3()), but
  a new helper merge_ondisk_extents() should be able to handle all of
  them.

To properly assign values for those new members, a new btrfs_file_extent
parameter is introduced to all the involved call sites.

- For NOCOW writes the btrfs_file_extent would be exposed from
  can_nocow_file_extent().

- For other writes, the members can be easily calculated
  As most of them have 0 offset and utilizing the whole on-disk data
  extent.
  The exception is encoded write, but thankfully that interface provided
  offset directly and all other needed info.

For now, both the old members (block_start/block_len/orig_start) are
co-existing with the new members (disk_bytenr/offset), meanwhile all the
critical code is still using the old members only.

The cleanup would happen later after all the older and newer members are
properly validated.

Signed-off-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/defrag.c     |  4 +++
 fs/btrfs/extent_map.c | 75 +++++++++++++++++++++++++++++++++++++++++--
 fs/btrfs/extent_map.h | 17 ++++++++++
 fs/btrfs/file-item.c  |  9 +++++-
 fs/btrfs/file.c       |  1 +
 fs/btrfs/inode.c      | 60 ++++++++++++++++++++++++++++++----
 6 files changed, 155 insertions(+), 11 deletions(-)

Comments

Filipe Manana May 9, 2024, 5:05 p.m. UTC | #1
On Fri, May 3, 2024 at 7:02 AM Qu Wenruo <wqu@suse.com> wrote:
>
> Introduce two new members for extent_map:
>
> - disk_bytenr
> - offset
>
> Both are matching the members with the same name inside
> btrfs_file_extent_items.
>
> For now this patch only touches those members when:
>
> - Reading btrfs_file_extent_items from disk
> - Inserting new holes
> - Merging two extent maps
>   With the new disk_bytenr and disk_num_bytes, doing merging would be a
>   little complex, as we have 3 different cases:
>
>   * Both extent maps are referring to the same data extent
>   * Both extent maps are referring to different data extents, but
>     those data extents are adjacent, and extent maps are at head/tail
>     of each data extents

I have no idea what this last part of the sentence means:

"and extent maps are at head/tail of each data extents"

>   * One of the extent map is referring to an merged and larger data

map -> maps
an -> a

>     extent that covers both extent maps

Not sure if I can understand this. How can one of the extent maps
already cover the range of the other extent map?
This suggests some overlapping, or most likely I misunderstood what
this is trying to say.

>
>   The 3rd case seems only valid in selftest (test_case_3()), but
>   a new helper merge_ondisk_extents() should be able to handle all of
>   them.
>
> To properly assign values for those new members, a new btrfs_file_extent
> parameter is introduced to all the involved call sites.
>
> - For NOCOW writes the btrfs_file_extent would be exposed from
>   can_nocow_file_extent().
>
> - For other writes, the members can be easily calculated
>   As most of them have 0 offset and utilizing the whole on-disk data
>   extent.
>   The exception is encoded write, but thankfully that interface provided
>   offset directly and all other needed info.
>
> For now, both the old members (block_start/block_len/orig_start) are
> co-existing with the new members (disk_bytenr/offset), meanwhile all the
> critical code is still using the old members only.
>
> The cleanup would happen later after all the older and newer members are
> properly validated.
>
> Signed-off-by: Qu Wenruo <wqu@suse.com>
> Signed-off-by: David Sterba <dsterba@suse.com>
> ---
>  fs/btrfs/defrag.c     |  4 +++
>  fs/btrfs/extent_map.c | 75 +++++++++++++++++++++++++++++++++++++++++--
>  fs/btrfs/extent_map.h | 17 ++++++++++
>  fs/btrfs/file-item.c  |  9 +++++-
>  fs/btrfs/file.c       |  1 +
>  fs/btrfs/inode.c      | 60 ++++++++++++++++++++++++++++++----
>  6 files changed, 155 insertions(+), 11 deletions(-)
>
> diff --git a/fs/btrfs/defrag.c b/fs/btrfs/defrag.c
> index 407ccec3e57e..242c5469f4ba 100644
> --- a/fs/btrfs/defrag.c
> +++ b/fs/btrfs/defrag.c
> @@ -709,6 +709,10 @@ static struct extent_map *defrag_get_extent(struct btrfs_inode *inode,
>                         em->start = start;
>                         em->orig_start = start;
>                         em->block_start = EXTENT_MAP_HOLE;
> +                       em->disk_bytenr = EXTENT_MAP_HOLE;
> +                       em->disk_num_bytes = 0;
> +                       em->ram_bytes = 0;
> +                       em->offset = 0;
>                         em->len = key.offset - start;
>                         break;
>                 }
> diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c
> index 4230dd0f34cc..4d4ac9fc43e2 100644
> --- a/fs/btrfs/extent_map.c
> +++ b/fs/btrfs/extent_map.c
> @@ -232,6 +232,58 @@ static bool mergeable_maps(const struct extent_map *prev, const struct extent_ma
>         return next->block_start == prev->block_start;
>  }
>
> +/*
> + * Handle the ondisk data extents merge for @prev and @next.
> + *
> + * Only touches disk_bytenr/disk_num_bytes/offset/ram_bytes.
> + * For now only uncompressed regular extent can be merged.
> + *
> + * @prev and @next will be both updated to point to the new merged range.
> + * Thus one of them should be removed by the caller.
> + */
> +static void merge_ondisk_extents(struct extent_map *prev, struct extent_map *next)
> +{
> +       u64 new_disk_bytenr;
> +       u64 new_disk_num_bytes;
> +       u64 new_offset;
> +
> +       /* @prev and @next should not be compressed. */
> +       ASSERT(!extent_map_is_compressed(prev));
> +       ASSERT(!extent_map_is_compressed(next));
> +
> +       /*
> +        * There are several different cases that @prev and @next can be merged.

that -> where

> +        *
> +        * 1) They are referring to the same data extent
> +        * 2) Their ondisk data extents are adjacent and @prev is the tail
> +        *    and @next is the head of their data extents

This thing of head and tail is confusing.
Just say that they are adjacent on disk, it's clear enough and
head/tail suggest some part of an extent only, which isn't the case.

> +        * 3) One of @prev/@next is referrring to a larger merged data extent.

referrring -> referring

> +        *    (test_case_3 of extent maps tests).

If something is only exercised by the self tests but can't happen in
practice, I'd rather change the tests and assert such a case can't
happen.

> +        *
> +        * The calculation here always merge the data extents first, then update
> +        * @offset using the new data extents.
> +        *
> +        * For case 1), the merged data extent would be the same.
> +        * For case 2), we just merge the two data extents into one.
> +        * For case 3), we just got the larger data extent.
> +        */
> +       new_disk_bytenr = min(prev->disk_bytenr, next->disk_bytenr);
> +       new_disk_num_bytes = max(prev->disk_bytenr + prev->disk_num_bytes,
> +                                next->disk_bytenr + next->disk_num_bytes) -
> +                            new_disk_bytenr;

So this is confusing, disk_num_bytes being a max between the two
extent maps and not their sum.
I gather this is modelled after what we already do at
btrfs_drop_extent_map_range() when splitting.

But the truth is we never used the disk_num_bytes of an extent map
that was merged - we also didn't do it before this patch, for that
reason.
It's only used for logging new extents - which can't be merged - they
can be merged only after being logged.

We can set this to the sum, or leave with some value to signal it's invalid.
Just leave a comment saying disk_num_bytes it's not used anywhere for
merged extent maps.

In the splitting case at btrfs_drop_extent_map_range() it's what we
need since in the case the extent is new and not logged (in the
modified list), disk_num_bytes always represents the size of the
original, before split, extent.

> +       new_offset = prev->disk_bytenr + prev->offset - new_disk_bytenr;
> +
> +       prev->disk_bytenr = new_disk_bytenr;
> +       prev->disk_num_bytes = new_disk_num_bytes;
> +       prev->ram_bytes = new_disk_num_bytes;
> +       prev->offset = new_offset;
> +
> +       next->disk_bytenr = new_disk_bytenr;
> +       next->disk_num_bytes = new_disk_num_bytes;
> +       next->ram_bytes = new_disk_num_bytes;
> +       next->offset = new_offset;
> +}
> +
>  static void try_merge_map(struct btrfs_inode *inode, struct extent_map *em)
>  {
>         struct extent_map_tree *tree = &inode->extent_tree;
> @@ -263,6 +315,9 @@ static void try_merge_map(struct btrfs_inode *inode, struct extent_map *em)
>                         em->block_len += merge->block_len;
>                         em->block_start = merge->block_start;
>                         em->generation = max(em->generation, merge->generation);
> +
> +                       if (em->disk_bytenr < EXTENT_MAP_LAST_BYTE)
> +                               merge_ondisk_extents(merge, em);
>                         em->flags |= EXTENT_FLAG_MERGED;
>
>                         rb_erase_cached(&merge->rb_node, &tree->map);
> @@ -278,6 +333,8 @@ static void try_merge_map(struct btrfs_inode *inode, struct extent_map *em)
>         if (rb && can_merge_extent_map(merge) && mergeable_maps(em, merge)) {
>                 em->len += merge->len;
>                 em->block_len += merge->block_len;
> +               if (em->disk_bytenr < EXTENT_MAP_LAST_BYTE)
> +                       merge_ondisk_extents(em, merge);
>                 rb_erase_cached(&merge->rb_node, &tree->map);
>                 RB_CLEAR_NODE(&merge->rb_node);
>                 em->generation = max(em->generation, merge->generation);
> @@ -565,6 +622,7 @@ static noinline int merge_extent_mapping(struct btrfs_inode *inode,
>             !extent_map_is_compressed(em)) {
>                 em->block_start += start_diff;
>                 em->block_len = em->len;
> +               em->offset += start_diff;
>         }
>         return add_extent_mapping(inode, em, 0);
>  }
> @@ -783,14 +841,18 @@ void btrfs_drop_extent_map_range(struct btrfs_inode *inode, u64 start, u64 end,
>                                         split->block_len = em->block_len;
>                                 else
>                                         split->block_len = split->len;
> +                               split->disk_bytenr = em->disk_bytenr;
>                                 split->disk_num_bytes = max(split->block_len,
>                                                             em->disk_num_bytes);
> +                               split->offset = em->offset;
>                                 split->ram_bytes = em->ram_bytes;
>                         } else {
>                                 split->orig_start = split->start;
>                                 split->block_len = 0;
>                                 split->block_start = em->block_start;
> +                               split->disk_bytenr = em->disk_bytenr;
>                                 split->disk_num_bytes = 0;
> +                               split->offset = 0;
>                                 split->ram_bytes = split->len;
>                         }
>
> @@ -811,13 +873,14 @@ void btrfs_drop_extent_map_range(struct btrfs_inode *inode, u64 start, u64 end,
>                         split->start = end;
>                         split->len = em_end - end;
>                         split->block_start = em->block_start;
> +                       split->disk_bytenr = em->disk_bytenr;
>                         split->flags = flags;
>                         split->generation = gen;
>
>                         if (em->block_start < EXTENT_MAP_LAST_BYTE) {
>                                 split->disk_num_bytes = max(em->block_len,
>                                                             em->disk_num_bytes);
> -
> +                               split->offset = em->offset + end - em->start;
>                                 split->ram_bytes = em->ram_bytes;
>                                 if (compressed) {
>                                         split->block_len = em->block_len;
> @@ -830,10 +893,11 @@ void btrfs_drop_extent_map_range(struct btrfs_inode *inode, u64 start, u64 end,
>                                         split->orig_start = em->orig_start;
>                                 }
>                         } else {
> +                               split->disk_num_bytes = 0;
> +                               split->offset = 0;
>                                 split->ram_bytes = split->len;
>                                 split->orig_start = split->start;
>                                 split->block_len = 0;
> -                               split->disk_num_bytes = 0;
>                         }
>
>                         if (extent_map_in_tree(em)) {
> @@ -987,6 +1051,9 @@ int split_extent_map(struct btrfs_inode *inode, u64 start, u64 len, u64 pre,
>         /* First, replace the em with a new extent_map starting from * em->start */
>         split_pre->start = em->start;
>         split_pre->len = pre;
> +       split_pre->disk_bytenr = new_logical;
> +       split_pre->disk_num_bytes = split_pre->len;
> +       split_pre->offset = 0;
>         split_pre->orig_start = split_pre->start;
>         split_pre->block_start = new_logical;
>         split_pre->block_len = split_pre->len;
> @@ -1005,10 +1072,12 @@ int split_extent_map(struct btrfs_inode *inode, u64 start, u64 len, u64 pre,
>         /* Insert the middle extent_map. */
>         split_mid->start = em->start + pre;
>         split_mid->len = em->len - pre;
> +       split_mid->disk_bytenr = em->block_start + pre;
> +       split_mid->disk_num_bytes = split_mid->len;
> +       split_mid->offset = 0;
>         split_mid->orig_start = split_mid->start;
>         split_mid->block_start = em->block_start + pre;
>         split_mid->block_len = split_mid->len;
> -       split_mid->disk_num_bytes = split_mid->block_len;
>         split_mid->ram_bytes = split_mid->len;
>         split_mid->flags = flags;
>         split_mid->generation = em->generation;
> diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h
> index 6ea0287b0d61..cc9c8092b704 100644
> --- a/fs/btrfs/extent_map.h
> +++ b/fs/btrfs/extent_map.h
> @@ -70,12 +70,29 @@ struct extent_map {
>          */
>         u64 orig_start;
>
> +       /*
> +        * The bytenr for of the full on-disk extent.

for of -> of

> +        *
> +        * For regular extents it's btrfs_file_extent_item::disk_bytenr.
> +        * For holes it's EXTENT_MAP_HOLE and for inline extents it's
> +        * EXTENT_MAP_INLINE.
> +        */
> +       u64 disk_bytenr;
> +
>         /*
>          * The full on-disk extent length, matching
>          * btrfs_file_extent_item::disk_num_bytes.
>          */
>         u64 disk_num_bytes;
>
> +       /*
> +        * Offset inside the decompressed extent.
> +        *
> +        * For regular extents it's btrfs_file_extent_item::offset.
> +        * For holes and inline extents it's 0.
> +        */
> +       u64 offset;
> +
>         /*
>          * The decompressed size of the whole on-disk extent, matching
>          * btrfs_file_extent_item::ram_bytes.
> diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
> index 2197cfe5443b..47bd4fe0a44b 100644
> --- a/fs/btrfs/file-item.c
> +++ b/fs/btrfs/file-item.c
> @@ -1294,12 +1294,17 @@ void btrfs_extent_item_to_extent_map(struct btrfs_inode *inode,
>                 em->len = btrfs_file_extent_end(path) - extent_start;
>                 em->orig_start = extent_start -
>                         btrfs_file_extent_offset(leaf, fi);
> -               em->disk_num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
>                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
>                 if (bytenr == 0) {
>                         em->block_start = EXTENT_MAP_HOLE;
> +                       em->disk_bytenr = EXTENT_MAP_HOLE;
> +                       em->disk_num_bytes = 0;
> +                       em->offset = 0;
>                         return;
>                 }
> +               em->disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
> +               em->disk_num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
> +               em->offset = btrfs_file_extent_offset(leaf, fi);
>                 if (compress_type != BTRFS_COMPRESS_NONE) {
>                         extent_map_set_compression(em, compress_type);
>                         em->block_start = bytenr;
> @@ -1316,8 +1321,10 @@ void btrfs_extent_item_to_extent_map(struct btrfs_inode *inode,
>                 ASSERT(extent_start == 0);
>
>                 em->block_start = EXTENT_MAP_INLINE;
> +               em->disk_bytenr = EXTENT_MAP_INLINE;
>                 em->start = 0;
>                 em->len = fs_info->sectorsize;
> +               em->offset = 0;
>                 /*
>                  * Initialize orig_start and block_len with the same values
>                  * as in inode.c:btrfs_get_extent().
> diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
> index 63a13a4cace0..8931eeee199d 100644
> --- a/fs/btrfs/file.c
> +++ b/fs/btrfs/file.c
> @@ -2337,6 +2337,7 @@ static int fill_holes(struct btrfs_trans_handle *trans,
>                 hole_em->orig_start = offset;
>
>                 hole_em->block_start = EXTENT_MAP_HOLE;
> +               hole_em->disk_bytenr = EXTENT_MAP_HOLE;
>                 hole_em->block_len = 0;
>                 hole_em->disk_num_bytes = 0;
>                 hole_em->generation = trans->transid;
> diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
> index 2815b72f2d85..42fea12d509f 100644
> --- a/fs/btrfs/inode.c
> +++ b/fs/btrfs/inode.c
> @@ -139,8 +139,9 @@ static noinline int run_delalloc_cow(struct btrfs_inode *inode,
>                                      bool pages_dirty);
>  static struct extent_map *create_io_em(struct btrfs_inode *inode, u64 start,
>                                        u64 len, u64 orig_start, u64 block_start,
> -                                      u64 block_len, u64 orig_block_len,
> +                                      u64 block_len, u64 disk_num_bytes,
>                                        u64 ram_bytes, int compress_type,
> +                                      struct btrfs_file_extent *file_extent,
>                                        int type);
>
>  static int data_reloc_print_warning_inode(u64 inum, u64 offset, u64 num_bytes,
> @@ -1152,6 +1153,7 @@ static void submit_one_async_extent(struct async_chunk *async_chunk,
>         struct btrfs_root *root = inode->root;
>         struct btrfs_fs_info *fs_info = root->fs_info;
>         struct btrfs_ordered_extent *ordered;
> +       struct btrfs_file_extent file_extent = { 0 };
>         struct btrfs_key ins;
>         struct page *locked_page = NULL;
>         struct extent_state *cached = NULL;
> @@ -1198,6 +1200,13 @@ static void submit_one_async_extent(struct async_chunk *async_chunk,
>         lock_extent(io_tree, start, end, &cached);
>
>         /* Here we're doing allocation and writeback of the compressed pages */
> +       file_extent.disk_bytenr = ins.objectid;
> +       file_extent.disk_num_bytes = ins.offset;
> +       file_extent.ram_bytes = async_extent->ram_size;
> +       file_extent.num_bytes = async_extent->ram_size;
> +       file_extent.offset = 0;
> +       file_extent.compression = async_extent->compress_type;
> +
>         em = create_io_em(inode, start,
>                           async_extent->ram_size,       /* len */
>                           start,                        /* orig_start */
> @@ -1206,6 +1215,7 @@ static void submit_one_async_extent(struct async_chunk *async_chunk,
>                           ins.offset,                   /* orig_block_len */
>                           async_extent->ram_size,       /* ram_bytes */
>                           async_extent->compress_type,
> +                         &file_extent,
>                           BTRFS_ORDERED_COMPRESSED);
>         if (IS_ERR(em)) {
>                 ret = PTR_ERR(em);
> @@ -1395,6 +1405,7 @@ static noinline int cow_file_range(struct btrfs_inode *inode,
>
>         while (num_bytes > 0) {
>                 struct btrfs_ordered_extent *ordered;
> +               struct btrfs_file_extent file_extent = { 0 };

Unnecessary initialization, see below.

>
>                 cur_alloc_size = num_bytes;
>                 ret = btrfs_reserve_extent(root, cur_alloc_size, cur_alloc_size,
> @@ -1431,6 +1442,12 @@ static noinline int cow_file_range(struct btrfs_inode *inode,
>                 extent_reserved = true;
>
>                 ram_size = ins.offset;
> +               file_extent.disk_bytenr = ins.objectid;
> +               file_extent.disk_num_bytes = ins.offset;
> +               file_extent.num_bytes = ins.offset;
> +               file_extent.ram_bytes = ins.offset;
> +               file_extent.offset = 0;
> +               file_extent.compression = BTRFS_COMPRESS_NONE;

If we always have to initialize all the members of the structure, it's
pointless to have it initialized to zeros when we declared it.

>
>                 lock_extent(&inode->io_tree, start, start + ram_size - 1,
>                             &cached);
> @@ -1442,6 +1459,7 @@ static noinline int cow_file_range(struct btrfs_inode *inode,
>                                   ins.offset, /* orig_block_len */
>                                   ram_size, /* ram_bytes */
>                                   BTRFS_COMPRESS_NONE, /* compress_type */
> +                                 &file_extent,
>                                   BTRFS_ORDERED_REGULAR /* type */);
>                 if (IS_ERR(em)) {
>                         unlock_extent(&inode->io_tree, start,
> @@ -1855,6 +1873,7 @@ struct can_nocow_file_extent_args {
>         u64 disk_bytenr;
>         u64 disk_num_bytes;
>         u64 extent_offset;
> +

Unrelated white space change.

>         /* Number of bytes that can be written to in NOCOW mode. */
>         u64 num_bytes;
>
> @@ -2182,6 +2201,7 @@ static noinline int run_delalloc_nocow(struct btrfs_inode *inode,
>                                           nocow_args.num_bytes, /* block_len */
>                                           nocow_args.disk_num_bytes, /* orig_block_len */
>                                           ram_bytes, BTRFS_COMPRESS_NONE,
> +                                         &nocow_args.file_extent,
>                                           BTRFS_ORDERED_PREALLOC);
>                         if (IS_ERR(em)) {
>                                 unlock_extent(&inode->io_tree, cur_offset,
> @@ -4982,6 +5002,7 @@ int btrfs_cont_expand(struct btrfs_inode *inode, loff_t oldsize, loff_t size)
>                         hole_em->orig_start = cur_offset;
>
>                         hole_em->block_start = EXTENT_MAP_HOLE;
> +                       hole_em->disk_bytenr = EXTENT_MAP_HOLE;
>                         hole_em->block_len = 0;
>                         hole_em->disk_num_bytes = 0;
>                         hole_em->ram_bytes = hole_size;
> @@ -6842,6 +6863,7 @@ struct extent_map *btrfs_get_extent(struct btrfs_inode *inode,
>         }
>         em->start = EXTENT_MAP_HOLE;
>         em->orig_start = EXTENT_MAP_HOLE;
> +       em->disk_bytenr = EXTENT_MAP_HOLE;
>         em->len = (u64)-1;
>         em->block_len = (u64)-1;
>
> @@ -7007,7 +7029,8 @@ static struct extent_map *btrfs_create_dio_extent(struct btrfs_inode *inode,
>                                                   const u64 block_len,
>                                                   const u64 orig_block_len,
>                                                   const u64 ram_bytes,
> -                                                 const int type)
> +                                                 const int type,
> +                                                 struct btrfs_file_extent *file_extent)
>  {
>         struct extent_map *em = NULL;
>         struct btrfs_ordered_extent *ordered;
> @@ -7016,7 +7039,7 @@ static struct extent_map *btrfs_create_dio_extent(struct btrfs_inode *inode,
>                 em = create_io_em(inode, start, len, orig_start, block_start,
>                                   block_len, orig_block_len, ram_bytes,
>                                   BTRFS_COMPRESS_NONE, /* compress_type */
> -                                 type);
> +                                 file_extent, type);
>                 if (IS_ERR(em))
>                         goto out;
>         }
> @@ -7047,6 +7070,7 @@ static struct extent_map *btrfs_new_extent_direct(struct btrfs_inode *inode,
>  {
>         struct btrfs_root *root = inode->root;
>         struct btrfs_fs_info *fs_info = root->fs_info;
> +       struct btrfs_file_extent file_extent = { 0 };
>         struct extent_map *em;
>         struct btrfs_key ins;
>         u64 alloc_hint;
> @@ -7065,9 +7089,16 @@ static struct extent_map *btrfs_new_extent_direct(struct btrfs_inode *inode,
>         if (ret)
>                 return ERR_PTR(ret);
>
> +       file_extent.disk_bytenr = ins.objectid;
> +       file_extent.disk_num_bytes = ins.offset;
> +       file_extent.num_bytes = ins.offset;
> +       file_extent.ram_bytes = ins.offset;
> +       file_extent.offset = 0;
> +       file_extent.compression = BTRFS_COMPRESS_NONE;

Same as before:

"If we always have to initialize all the members of the structure,
it's pointless to have it initialized to zeros when we declared it."


>         em = btrfs_create_dio_extent(inode, dio_data, start, ins.offset, start,
>                                      ins.objectid, ins.offset, ins.offset,
> -                                    ins.offset, BTRFS_ORDERED_REGULAR);
> +                                    ins.offset, BTRFS_ORDERED_REGULAR,
> +                                    &file_extent);
>         btrfs_dec_block_group_reservations(fs_info, ins.objectid);
>         if (IS_ERR(em))
>                 btrfs_free_reserved_extent(fs_info, ins.objectid, ins.offset,
> @@ -7310,6 +7341,7 @@ static struct extent_map *create_io_em(struct btrfs_inode *inode, u64 start,
>                                        u64 len, u64 orig_start, u64 block_start,
>                                        u64 block_len, u64 disk_num_bytes,
>                                        u64 ram_bytes, int compress_type,
> +                                      struct btrfs_file_extent *file_extent,
>                                        int type)
>  {
>         struct extent_map *em;
> @@ -7367,9 +7399,11 @@ static struct extent_map *create_io_em(struct btrfs_inode *inode, u64 start,
>         em->len = len;
>         em->block_len = block_len;
>         em->block_start = block_start;
> +       em->disk_bytenr = file_extent->disk_bytenr;
>         em->disk_num_bytes = disk_num_bytes;
>         em->ram_bytes = ram_bytes;
>         em->generation = -1;
> +       em->offset = file_extent->offset;
>         em->flags |= EXTENT_FLAG_PINNED;
>         if (type == BTRFS_ORDERED_COMPRESSED)
>                 extent_map_set_compression(em, compress_type);
> @@ -7393,6 +7427,7 @@ static int btrfs_get_blocks_direct_write(struct extent_map **map,
>  {
>         const bool nowait = (iomap_flags & IOMAP_NOWAIT);
>         struct btrfs_fs_info *fs_info = inode_to_fs_info(inode);
> +       struct btrfs_file_extent file_extent = { 0 };
>         struct extent_map *em = *map;
>         int type;
>         u64 block_start, orig_start, orig_block_len, ram_bytes;
> @@ -7423,7 +7458,8 @@ static int btrfs_get_blocks_direct_write(struct extent_map **map,
>                 block_start = em->block_start + (start - em->start);
>
>                 if (can_nocow_extent(inode, start, &len, &orig_start,
> -                                    &orig_block_len, &ram_bytes, NULL, false, false) == 1) {
> +                                    &orig_block_len, &ram_bytes,
> +                                    &file_extent, false, false) == 1) {
>                         bg = btrfs_inc_nocow_writers(fs_info, block_start);
>                         if (bg)
>                                 can_nocow = true;
> @@ -7451,7 +7487,8 @@ static int btrfs_get_blocks_direct_write(struct extent_map **map,
>                 em2 = btrfs_create_dio_extent(BTRFS_I(inode), dio_data, start, len,
>                                               orig_start, block_start,
>                                               len, orig_block_len,
> -                                             ram_bytes, type);
> +                                             ram_bytes, type,
> +                                             &file_extent);
>                 btrfs_dec_nocow_writers(bg);
>                 if (type == BTRFS_ORDERED_PREALLOC) {
>                         free_extent_map(em);
> @@ -9602,6 +9639,8 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
>                 em->orig_start = cur_offset;
>                 em->len = ins.offset;
>                 em->block_start = ins.objectid;
> +               em->disk_bytenr = ins.objectid;
> +               em->offset = 0;
>                 em->block_len = ins.offset;
>                 em->disk_num_bytes = ins.offset;
>                 em->ram_bytes = ins.offset;
> @@ -10168,6 +10207,7 @@ ssize_t btrfs_do_encoded_write(struct kiocb *iocb, struct iov_iter *from,
>         struct extent_changeset *data_reserved = NULL;
>         struct extent_state *cached_state = NULL;
>         struct btrfs_ordered_extent *ordered;
> +       struct btrfs_file_extent file_extent = { 0 };
>         int compression;
>         size_t orig_count;
>         u64 start, end;
> @@ -10343,10 +10383,16 @@ ssize_t btrfs_do_encoded_write(struct kiocb *iocb, struct iov_iter *from,
>                 goto out_delalloc_release;
>         extent_reserved = true;
>
> +       file_extent.disk_bytenr = ins.objectid;
> +       file_extent.disk_num_bytes = ins.offset;
> +       file_extent.num_bytes = num_bytes;
> +       file_extent.ram_bytes = ram_bytes;
> +       file_extent.offset = encoded->unencoded_offset;
> +       file_extent.compression = compression;

Same as before:

"If we always have to initialize all the members of the structure,
it's pointless to have it initialized to zeros when we declared it."

The rest I suppose seems fine, but I have to look at the rest of the patchset.

Thanks.

>         em = create_io_em(inode, start, num_bytes,
>                           start - encoded->unencoded_offset, ins.objectid,
>                           ins.offset, ins.offset, ram_bytes, compression,
> -                         BTRFS_ORDERED_COMPRESSED);
> +                         &file_extent, BTRFS_ORDERED_COMPRESSED);
>         if (IS_ERR(em)) {
>                 ret = PTR_ERR(em);
>                 goto out_free_reserved;
> --
> 2.45.0
>
>
Qu Wenruo May 9, 2024, 10:11 p.m. UTC | #2
在 2024/5/10 02:35, Filipe Manana 写道:
> On Fri, May 3, 2024 at 7:02 AM Qu Wenruo <wqu@suse.com> wrote:
>>
>> Introduce two new members for extent_map:
>>
>> - disk_bytenr
>> - offset
>>
>> Both are matching the members with the same name inside
>> btrfs_file_extent_items.
>>
>> For now this patch only touches those members when:
>>
>> - Reading btrfs_file_extent_items from disk
>> - Inserting new holes
>> - Merging two extent maps
>>    With the new disk_bytenr and disk_num_bytes, doing merging would be a
>>    little complex, as we have 3 different cases:
>>
>>    * Both extent maps are referring to the same data extent
>>    * Both extent maps are referring to different data extents, but
>>      those data extents are adjacent, and extent maps are at head/tail
>>      of each data extents
>
> I have no idea what this last part of the sentence means:
>
> "and extent maps are at head/tail of each data extents"

For this case:

         |<- data extent 1 ->|<- data extent 2 ->|
	|          |////////|////////|          |
                      FE A      FE B

In above case, FE A is only referring the tailing part of data extent 1,
and FE B is referring to the heading part of data extent 2.

In that case, FE A and FE B can be merged into something like this:

         |<------ merged extent 1 + 2 --------->|
	|          |/////////////////|         |
                        FE A + B

In that case, merged file extent would have:

- disk_bytenr = fe_a->disk_bytenr
- disk_num_bytes = fe_a->disk_num_bytes + fe_b->disk_num_bytes
- ram_bytes = fe_a->ram_bytes + fe_b->ram_bytes
- offset = fe_a->offset
- num_bytes = fe_a->num_bytes + fe_b->num_bytes

>
>>    * One of the extent map is referring to an merged and larger data
>
> map -> maps
> an -> a
>
>>      extent that covers both extent maps
>
> Not sure if I can understand this. How can one of the extent maps
> already cover the range of the other extent map?
> This suggests some overlapping, or most likely I misunderstood what
> this is trying to say.

That's the for impossible test case 3, but as you mentioned, it's more
sane just to remove it.

[...]
>
>> +        *
>> +        * The calculation here always merge the data extents first, then update
>> +        * @offset using the new data extents.
>> +        *
>> +        * For case 1), the merged data extent would be the same.
>> +        * For case 2), we just merge the two data extents into one.
>> +        * For case 3), we just got the larger data extent.
>> +        */
>> +       new_disk_bytenr = min(prev->disk_bytenr, next->disk_bytenr);
>> +       new_disk_num_bytes = max(prev->disk_bytenr + prev->disk_num_bytes,
>> +                                next->disk_bytenr + next->disk_num_bytes) -
>> +                            new_disk_bytenr;
>
> So this is confusing, disk_num_bytes being a max between the two
> extent maps and not their sum.

Check case 1).

Both file extents are referring to the same data extent.

Like this:

FE 1, file pos 0, num_bytes 4K, disk_bytenr X, disk_num_bytes 16K,
offset 0, ram_bytes 16K, compression none

FE 2, file pos 4k, num_bytes 4K, disk_bytenr X, disk_num_bytes 16K,
offset 4k, ram_bytes 16K, compression none.

In that case we should not just sum up the disk_num_bytes at all.
Remember disk_num_bytes are for the data extent.

> I gather this is modelled after what we already do at
> btrfs_drop_extent_map_range() when splitting.
>
> But the truth is we never used the disk_num_bytes of an extent map
> that was merged - we also didn't do it before this patch, for that
> reason.
> It's only used for logging new extents - which can't be merged - they
> can be merged only after being logged.
>
> We can set this to the sum, or leave with some value to signal it's invalid.
> Just leave a comment saying disk_num_bytes it's not used anywhere for
> merged extent maps.
>
> In the splitting case at btrfs_drop_extent_map_range() it's what we
> need since in the case the extent is new and not logged (in the
> modified list), disk_num_bytes always represents the size of the
> original, before split, extent.

[...]

>> +       file_extent.disk_bytenr = ins.objectid;
>> +       file_extent.disk_num_bytes = ins.offset;
>> +       file_extent.num_bytes = ins.offset;
>> +       file_extent.ram_bytes = ins.offset;
>> +       file_extent.offset = 0;
>> +       file_extent.compression = BTRFS_COMPRESS_NONE;
>
> Same as before:
>
> "If we always have to initialize all the members of the structure,
> it's pointless to have it initialized to zeros when we declared it."
>
[...]
>> +       file_extent.disk_bytenr = ins.objectid;
>> +       file_extent.disk_num_bytes = ins.offset;
>> +       file_extent.num_bytes = num_bytes;
>> +       file_extent.ram_bytes = ram_bytes;
>> +       file_extent.offset = encoded->unencoded_offset;
>> +       file_extent.compression = compression;
>
> Same as before:
>
> "If we always have to initialize all the members of the structure,
> it's pointless to have it initialized to zeros when we declared it."

Fair enough, and an uninitilized structure member can also make compiler
to warn us.

Thanks,
Qu
>
> The rest I suppose seems fine, but I have to look at the rest of the patchset.
>
> Thanks.
>
>>          em = create_io_em(inode, start, num_bytes,
>>                            start - encoded->unencoded_offset, ins.objectid,
>>                            ins.offset, ins.offset, ram_bytes, compression,
>> -                         BTRFS_ORDERED_COMPRESSED);
>> +                         &file_extent, BTRFS_ORDERED_COMPRESSED);
>>          if (IS_ERR(em)) {
>>                  ret = PTR_ERR(em);
>>                  goto out_free_reserved;
>> --
>> 2.45.0
>>
>>
>
Filipe Manana May 10, 2024, 11:26 a.m. UTC | #3
On Thu, May 9, 2024 at 11:12 PM Qu Wenruo <quwenruo.btrfs@gmx.com> wrote:
>
>
>
> 在 2024/5/10 02:35, Filipe Manana 写道:
> > On Fri, May 3, 2024 at 7:02 AM Qu Wenruo <wqu@suse.com> wrote:
> >>
> >> Introduce two new members for extent_map:
> >>
> >> - disk_bytenr
> >> - offset
> >>
> >> Both are matching the members with the same name inside
> >> btrfs_file_extent_items.
> >>
> >> For now this patch only touches those members when:
> >>
> >> - Reading btrfs_file_extent_items from disk
> >> - Inserting new holes
> >> - Merging two extent maps
> >>    With the new disk_bytenr and disk_num_bytes, doing merging would be a
> >>    little complex, as we have 3 different cases:
> >>
> >>    * Both extent maps are referring to the same data extent
> >>    * Both extent maps are referring to different data extents, but
> >>      those data extents are adjacent, and extent maps are at head/tail
> >>      of each data extents
> >
> > I have no idea what this last part of the sentence means:
> >
> > "and extent maps are at head/tail of each data extents"
>
> For this case:
>
>          |<- data extent 1 ->|<- data extent 2 ->|
>         |          |////////|////////|          |
>                       FE A      FE B
>
> In above case, FE A is only referring the tailing part of data extent 1,
> and FE B is referring to the heading part of data extent 2.
>
> In that case, FE A and FE B can be merged into something like this:
>
>          |<------ merged extent 1 + 2 --------->|
>         |          |/////////////////|         |
>                         FE A + B
>
> In that case, merged file extent would have:
>
> - disk_bytenr = fe_a->disk_bytenr
> - disk_num_bytes = fe_a->disk_num_bytes + fe_b->disk_num_bytes
> - ram_bytes = fe_a->ram_bytes + fe_b->ram_bytes
> - offset = fe_a->offset
> - num_bytes = fe_a->num_bytes + fe_b->num_bytes
>
> >
> >>    * One of the extent map is referring to an merged and larger data
> >
> > map -> maps
> > an -> a
> >
> >>      extent that covers both extent maps
> >
> > Not sure if I can understand this. How can one of the extent maps
> > already cover the range of the other extent map?
> > This suggests some overlapping, or most likely I misunderstood what
> > this is trying to say.
>
> That's the for impossible test case 3, but as you mentioned, it's more
> sane just to remove it.
>
> [...]
> >
> >> +        *
> >> +        * The calculation here always merge the data extents first, then update
> >> +        * @offset using the new data extents.
> >> +        *
> >> +        * For case 1), the merged data extent would be the same.
> >> +        * For case 2), we just merge the two data extents into one.
> >> +        * For case 3), we just got the larger data extent.
> >> +        */
> >> +       new_disk_bytenr = min(prev->disk_bytenr, next->disk_bytenr);
> >> +       new_disk_num_bytes = max(prev->disk_bytenr + prev->disk_num_bytes,
> >> +                                next->disk_bytenr + next->disk_num_bytes) -
> >> +                            new_disk_bytenr;
> >
> > So this is confusing, disk_num_bytes being a max between the two
> > extent maps and not their sum.
>
> Check case 1).
>
> Both file extents are referring to the same data extent.
>
> Like this:
>
> FE 1, file pos 0, num_bytes 4K, disk_bytenr X, disk_num_bytes 16K,
> offset 0, ram_bytes 16K, compression none
>
> FE 2, file pos 4k, num_bytes 4K, disk_bytenr X, disk_num_bytes 16K,
> offset 4k, ram_bytes 16K, compression none.
>
> In that case we should not just sum up the disk_num_bytes at all.
> Remember disk_num_bytes are for the data extent.

Yes, but for cases where they refer to different file extent items
that are contiguous on disk, the max is confusing, and a sum is what
makes sense. Example:

FE 1, file pos 0, num_bytes 4K, disk_bytenr X, disk_num_bytes 4K,
offset 0, ram_bytes 4K, no compression

FE 2, file pos 4K, num_bytes 4K, disk_bytenr X + 4K, disk_num_bytes
4K, offset 0, ram_bytes 4K, no compression

When merging the corresponding extent maps it's natural to think
disk_num_bytes is 8K and not 4K.

But as I mentioned before, after merging we really don't use
disk_num_bytes anywhere.
It's only used during extent logging, which only happens for extents
maps that were not merged and can not be before they are logged.

Before this patchset, when disk_num_bytes was named orig_block_len,
that was never updated during merging, exactly because we don't use
cases for it.


>
> > I gather this is modelled after what we already do at
> > btrfs_drop_extent_map_range() when splitting.
> >
> > But the truth is we never used the disk_num_bytes of an extent map
> > that was merged - we also didn't do it before this patch, for that
> > reason.
> > It's only used for logging new extents - which can't be merged - they
> > can be merged only after being logged.
> >
> > We can set this to the sum, or leave with some value to signal it's invalid.
> > Just leave a comment saying disk_num_bytes it's not used anywhere for
> > merged extent maps.
> >
> > In the splitting case at btrfs_drop_extent_map_range() it's what we
> > need since in the case the extent is new and not logged (in the
> > modified list), disk_num_bytes always represents the size of the
> > original, before split, extent.
>
> [...]
>
> >> +       file_extent.disk_bytenr = ins.objectid;
> >> +       file_extent.disk_num_bytes = ins.offset;
> >> +       file_extent.num_bytes = ins.offset;
> >> +       file_extent.ram_bytes = ins.offset;
> >> +       file_extent.offset = 0;
> >> +       file_extent.compression = BTRFS_COMPRESS_NONE;
> >
> > Same as before:
> >
> > "If we always have to initialize all the members of the structure,
> > it's pointless to have it initialized to zeros when we declared it."
> >
> [...]
> >> +       file_extent.disk_bytenr = ins.objectid;
> >> +       file_extent.disk_num_bytes = ins.offset;
> >> +       file_extent.num_bytes = num_bytes;
> >> +       file_extent.ram_bytes = ram_bytes;
> >> +       file_extent.offset = encoded->unencoded_offset;
> >> +       file_extent.compression = compression;
> >
> > Same as before:
> >
> > "If we always have to initialize all the members of the structure,
> > it's pointless to have it initialized to zeros when we declared it."
>
> Fair enough, and an uninitilized structure member can also make compiler
> to warn us.
>
> Thanks,
> Qu
> >
> > The rest I suppose seems fine, but I have to look at the rest of the patchset.
> >
> > Thanks.
> >
> >>          em = create_io_em(inode, start, num_bytes,
> >>                            start - encoded->unencoded_offset, ins.objectid,
> >>                            ins.offset, ins.offset, ram_bytes, compression,
> >> -                         BTRFS_ORDERED_COMPRESSED);
> >> +                         &file_extent, BTRFS_ORDERED_COMPRESSED);
> >>          if (IS_ERR(em)) {
> >>                  ret = PTR_ERR(em);
> >>                  goto out_free_reserved;
> >> --
> >> 2.45.0
> >>
> >>
> >
Qu Wenruo May 10, 2024, 10:26 p.m. UTC | #4
在 2024/5/10 20:56, Filipe Manana 写道:
[...]
>>
>> Check case 1).
>>
>> Both file extents are referring to the same data extent.
>>
>> Like this:
>>
>> FE 1, file pos 0, num_bytes 4K, disk_bytenr X, disk_num_bytes 16K,
>> offset 0, ram_bytes 16K, compression none
>>
>> FE 2, file pos 4k, num_bytes 4K, disk_bytenr X, disk_num_bytes 16K,
>> offset 4k, ram_bytes 16K, compression none.
>>
>> In that case we should not just sum up the disk_num_bytes at all.
>> Remember disk_num_bytes are for the data extent.
> 
> Yes, but for cases where they refer to different file extent items
> that are contiguous on disk, the max is confusing, and a sum is what
> makes sense. Example:
> 
> FE 1, file pos 0, num_bytes 4K, disk_bytenr X, disk_num_bytes 4K,
> offset 0, ram_bytes 4K, no compression
> 
> FE 2, file pos 4K, num_bytes 4K, disk_bytenr X + 4K, disk_num_bytes
> 4K, offset 0, ram_bytes 4K, no compression
> 
> When merging the corresponding extent maps it's natural to think
> disk_num_bytes is 8K and not 4K.

The max based solution is based on their end bytenr, not disk_num_bytes.

So the merged one would have:

disk_bytenr = min(X, X + 4K) = X
disk_num_bytes = max(X + 4K, X + 4K + 4K) - disk_bytenr = 8K.

So I didn't see why it's not natural.

Furthermore, the max/min based solution can handle both case 1) (same 
data extent, different parts) and case 2) (different data extents) well.

> 
> But as I mentioned before, after merging we really don't use
> disk_num_bytes anywhere.
> It's only used during extent logging, which only happens for extents
> maps that were not merged and can not be before they are logged.
> 
> Before this patchset, when disk_num_bytes was named orig_block_len,
> that was never updated during merging, exactly because we don't use
> cases for it.

But since we want everything to match the definition of the same-named 
member, no matter if they get merged or not, we need to change the values.

If they do not get used, that's still fine.

Thanks,
Qu
Filipe Manana May 13, 2024, 12:48 p.m. UTC | #5
On Fri, May 3, 2024 at 7:02 AM Qu Wenruo <wqu@suse.com> wrote:
>
> Introduce two new members for extent_map:
>
> - disk_bytenr
> - offset
>
> Both are matching the members with the same name inside
> btrfs_file_extent_items.
>
> For now this patch only touches those members when:
>
> - Reading btrfs_file_extent_items from disk
> - Inserting new holes
> - Merging two extent maps
>   With the new disk_bytenr and disk_num_bytes, doing merging would be a
>   little complex, as we have 3 different cases:
>
>   * Both extent maps are referring to the same data extent
>   * Both extent maps are referring to different data extents, but
>     those data extents are adjacent, and extent maps are at head/tail
>     of each data extents
>   * One of the extent map is referring to an merged and larger data
>     extent that covers both extent maps
>
>   The 3rd case seems only valid in selftest (test_case_3()), but
>   a new helper merge_ondisk_extents() should be able to handle all of
>   them.
>
> To properly assign values for those new members, a new btrfs_file_extent
> parameter is introduced to all the involved call sites.
>
> - For NOCOW writes the btrfs_file_extent would be exposed from
>   can_nocow_file_extent().
>
> - For other writes, the members can be easily calculated
>   As most of them have 0 offset and utilizing the whole on-disk data
>   extent.
>   The exception is encoded write, but thankfully that interface provided
>   offset directly and all other needed info.
>
> For now, both the old members (block_start/block_len/orig_start) are
> co-existing with the new members (disk_bytenr/offset), meanwhile all the
> critical code is still using the old members only.
>
> The cleanup would happen later after all the older and newer members are
> properly validated.
>
> Signed-off-by: Qu Wenruo <wqu@suse.com>
> Signed-off-by: David Sterba <dsterba@suse.com>
> ---
>  fs/btrfs/defrag.c     |  4 +++
>  fs/btrfs/extent_map.c | 75 +++++++++++++++++++++++++++++++++++++++++--
>  fs/btrfs/extent_map.h | 17 ++++++++++
>  fs/btrfs/file-item.c  |  9 +++++-
>  fs/btrfs/file.c       |  1 +
>  fs/btrfs/inode.c      | 60 ++++++++++++++++++++++++++++++----
>  6 files changed, 155 insertions(+), 11 deletions(-)
>
> diff --git a/fs/btrfs/defrag.c b/fs/btrfs/defrag.c
> index 407ccec3e57e..242c5469f4ba 100644
> --- a/fs/btrfs/defrag.c
> +++ b/fs/btrfs/defrag.c
> @@ -709,6 +709,10 @@ static struct extent_map *defrag_get_extent(struct btrfs_inode *inode,
>                         em->start = start;
>                         em->orig_start = start;
>                         em->block_start = EXTENT_MAP_HOLE;
> +                       em->disk_bytenr = EXTENT_MAP_HOLE;
> +                       em->disk_num_bytes = 0;
> +                       em->ram_bytes = 0;
> +                       em->offset = 0;
>                         em->len = key.offset - start;
>                         break;
>                 }
> diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c
> index 4230dd0f34cc..4d4ac9fc43e2 100644
> --- a/fs/btrfs/extent_map.c
> +++ b/fs/btrfs/extent_map.c
> @@ -232,6 +232,58 @@ static bool mergeable_maps(const struct extent_map *prev, const struct extent_ma
>         return next->block_start == prev->block_start;
>  }
>
> +/*
> + * Handle the ondisk data extents merge for @prev and @next.
> + *
> + * Only touches disk_bytenr/disk_num_bytes/offset/ram_bytes.
> + * For now only uncompressed regular extent can be merged.
> + *
> + * @prev and @next will be both updated to point to the new merged range.
> + * Thus one of them should be removed by the caller.
> + */
> +static void merge_ondisk_extents(struct extent_map *prev, struct extent_map *next)
> +{
> +       u64 new_disk_bytenr;
> +       u64 new_disk_num_bytes;
> +       u64 new_offset;
> +
> +       /* @prev and @next should not be compressed. */
> +       ASSERT(!extent_map_is_compressed(prev));
> +       ASSERT(!extent_map_is_compressed(next));
> +
> +       /*
> +        * There are several different cases that @prev and @next can be merged.
> +        *
> +        * 1) They are referring to the same data extent
> +        * 2) Their ondisk data extents are adjacent and @prev is the tail
> +        *    and @next is the head of their data extents
> +        * 3) One of @prev/@next is referrring to a larger merged data extent.
> +        *    (test_case_3 of extent maps tests).
> +        *
> +        * The calculation here always merge the data extents first, then update
> +        * @offset using the new data extents.
> +        *
> +        * For case 1), the merged data extent would be the same.
> +        * For case 2), we just merge the two data extents into one.
> +        * For case 3), we just got the larger data extent.
> +        */
> +       new_disk_bytenr = min(prev->disk_bytenr, next->disk_bytenr);
> +       new_disk_num_bytes = max(prev->disk_bytenr + prev->disk_num_bytes,
> +                                next->disk_bytenr + next->disk_num_bytes) -
> +                            new_disk_bytenr;
> +       new_offset = prev->disk_bytenr + prev->offset - new_disk_bytenr;
> +
> +       prev->disk_bytenr = new_disk_bytenr;
> +       prev->disk_num_bytes = new_disk_num_bytes;
> +       prev->ram_bytes = new_disk_num_bytes;
> +       prev->offset = new_offset;
> +
> +       next->disk_bytenr = new_disk_bytenr;
> +       next->disk_num_bytes = new_disk_num_bytes;
> +       next->ram_bytes = new_disk_num_bytes;
> +       next->offset = new_offset;
> +}
> +
>  static void try_merge_map(struct btrfs_inode *inode, struct extent_map *em)
>  {
>         struct extent_map_tree *tree = &inode->extent_tree;
> @@ -263,6 +315,9 @@ static void try_merge_map(struct btrfs_inode *inode, struct extent_map *em)
>                         em->block_len += merge->block_len;
>                         em->block_start = merge->block_start;
>                         em->generation = max(em->generation, merge->generation);
> +
> +                       if (em->disk_bytenr < EXTENT_MAP_LAST_BYTE)
> +                               merge_ondisk_extents(merge, em);
>                         em->flags |= EXTENT_FLAG_MERGED;
>
>                         rb_erase_cached(&merge->rb_node, &tree->map);
> @@ -278,6 +333,8 @@ static void try_merge_map(struct btrfs_inode *inode, struct extent_map *em)
>         if (rb && can_merge_extent_map(merge) && mergeable_maps(em, merge)) {
>                 em->len += merge->len;
>                 em->block_len += merge->block_len;
> +               if (em->disk_bytenr < EXTENT_MAP_LAST_BYTE)
> +                       merge_ondisk_extents(em, merge);
>                 rb_erase_cached(&merge->rb_node, &tree->map);
>                 RB_CLEAR_NODE(&merge->rb_node);
>                 em->generation = max(em->generation, merge->generation);
> @@ -565,6 +622,7 @@ static noinline int merge_extent_mapping(struct btrfs_inode *inode,
>             !extent_map_is_compressed(em)) {
>                 em->block_start += start_diff;
>                 em->block_len = em->len;
> +               em->offset += start_diff;
>         }
>         return add_extent_mapping(inode, em, 0);
>  }
> @@ -783,14 +841,18 @@ void btrfs_drop_extent_map_range(struct btrfs_inode *inode, u64 start, u64 end,
>                                         split->block_len = em->block_len;
>                                 else
>                                         split->block_len = split->len;
> +                               split->disk_bytenr = em->disk_bytenr;
>                                 split->disk_num_bytes = max(split->block_len,
>                                                             em->disk_num_bytes);
> +                               split->offset = em->offset;
>                                 split->ram_bytes = em->ram_bytes;
>                         } else {
>                                 split->orig_start = split->start;
>                                 split->block_len = 0;
>                                 split->block_start = em->block_start;
> +                               split->disk_bytenr = em->disk_bytenr;
>                                 split->disk_num_bytes = 0;
> +                               split->offset = 0;
>                                 split->ram_bytes = split->len;
>                         }
>
> @@ -811,13 +873,14 @@ void btrfs_drop_extent_map_range(struct btrfs_inode *inode, u64 start, u64 end,
>                         split->start = end;
>                         split->len = em_end - end;
>                         split->block_start = em->block_start;
> +                       split->disk_bytenr = em->disk_bytenr;
>                         split->flags = flags;
>                         split->generation = gen;
>
>                         if (em->block_start < EXTENT_MAP_LAST_BYTE) {
>                                 split->disk_num_bytes = max(em->block_len,
>                                                             em->disk_num_bytes);
> -
> +                               split->offset = em->offset + end - em->start;
>                                 split->ram_bytes = em->ram_bytes;
>                                 if (compressed) {
>                                         split->block_len = em->block_len;
> @@ -830,10 +893,11 @@ void btrfs_drop_extent_map_range(struct btrfs_inode *inode, u64 start, u64 end,
>                                         split->orig_start = em->orig_start;
>                                 }
>                         } else {
> +                               split->disk_num_bytes = 0;
> +                               split->offset = 0;
>                                 split->ram_bytes = split->len;
>                                 split->orig_start = split->start;
>                                 split->block_len = 0;
> -                               split->disk_num_bytes = 0;
>                         }
>
>                         if (extent_map_in_tree(em)) {
> @@ -987,6 +1051,9 @@ int split_extent_map(struct btrfs_inode *inode, u64 start, u64 len, u64 pre,
>         /* First, replace the em with a new extent_map starting from * em->start */
>         split_pre->start = em->start;
>         split_pre->len = pre;
> +       split_pre->disk_bytenr = new_logical;
> +       split_pre->disk_num_bytes = split_pre->len;
> +       split_pre->offset = 0;
>         split_pre->orig_start = split_pre->start;
>         split_pre->block_start = new_logical;
>         split_pre->block_len = split_pre->len;
> @@ -1005,10 +1072,12 @@ int split_extent_map(struct btrfs_inode *inode, u64 start, u64 len, u64 pre,
>         /* Insert the middle extent_map. */
>         split_mid->start = em->start + pre;
>         split_mid->len = em->len - pre;
> +       split_mid->disk_bytenr = em->block_start + pre;
> +       split_mid->disk_num_bytes = split_mid->len;
> +       split_mid->offset = 0;
>         split_mid->orig_start = split_mid->start;
>         split_mid->block_start = em->block_start + pre;
>         split_mid->block_len = split_mid->len;
> -       split_mid->disk_num_bytes = split_mid->block_len;
>         split_mid->ram_bytes = split_mid->len;
>         split_mid->flags = flags;
>         split_mid->generation = em->generation;
> diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h
> index 6ea0287b0d61..cc9c8092b704 100644
> --- a/fs/btrfs/extent_map.h
> +++ b/fs/btrfs/extent_map.h
> @@ -70,12 +70,29 @@ struct extent_map {
>          */
>         u64 orig_start;
>
> +       /*
> +        * The bytenr for of the full on-disk extent.
> +        *
> +        * For regular extents it's btrfs_file_extent_item::disk_bytenr.
> +        * For holes it's EXTENT_MAP_HOLE and for inline extents it's
> +        * EXTENT_MAP_INLINE.
> +        */
> +       u64 disk_bytenr;
> +
>         /*
>          * The full on-disk extent length, matching
>          * btrfs_file_extent_item::disk_num_bytes.
>          */
>         u64 disk_num_bytes;
>
> +       /*
> +        * Offset inside the decompressed extent.
> +        *
> +        * For regular extents it's btrfs_file_extent_item::offset.
> +        * For holes and inline extents it's 0.
> +        */
> +       u64 offset;
> +
>         /*
>          * The decompressed size of the whole on-disk extent, matching
>          * btrfs_file_extent_item::ram_bytes.
> diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
> index 2197cfe5443b..47bd4fe0a44b 100644
> --- a/fs/btrfs/file-item.c
> +++ b/fs/btrfs/file-item.c
> @@ -1294,12 +1294,17 @@ void btrfs_extent_item_to_extent_map(struct btrfs_inode *inode,
>                 em->len = btrfs_file_extent_end(path) - extent_start;
>                 em->orig_start = extent_start -
>                         btrfs_file_extent_offset(leaf, fi);
> -               em->disk_num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
>                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
>                 if (bytenr == 0) {
>                         em->block_start = EXTENT_MAP_HOLE;
> +                       em->disk_bytenr = EXTENT_MAP_HOLE;
> +                       em->disk_num_bytes = 0;
> +                       em->offset = 0;
>                         return;
>                 }
> +               em->disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
> +               em->disk_num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
> +               em->offset = btrfs_file_extent_offset(leaf, fi);
>                 if (compress_type != BTRFS_COMPRESS_NONE) {
>                         extent_map_set_compression(em, compress_type);
>                         em->block_start = bytenr;
> @@ -1316,8 +1321,10 @@ void btrfs_extent_item_to_extent_map(struct btrfs_inode *inode,
>                 ASSERT(extent_start == 0);
>
>                 em->block_start = EXTENT_MAP_INLINE;
> +               em->disk_bytenr = EXTENT_MAP_INLINE;
>                 em->start = 0;
>                 em->len = fs_info->sectorsize;
> +               em->offset = 0;
>                 /*
>                  * Initialize orig_start and block_len with the same values
>                  * as in inode.c:btrfs_get_extent().
> diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
> index 63a13a4cace0..8931eeee199d 100644
> --- a/fs/btrfs/file.c
> +++ b/fs/btrfs/file.c
> @@ -2337,6 +2337,7 @@ static int fill_holes(struct btrfs_trans_handle *trans,
>                 hole_em->orig_start = offset;
>
>                 hole_em->block_start = EXTENT_MAP_HOLE;
> +               hole_em->disk_bytenr = EXTENT_MAP_HOLE;
>                 hole_em->block_len = 0;
>                 hole_em->disk_num_bytes = 0;
>                 hole_em->generation = trans->transid;
> diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
> index 2815b72f2d85..42fea12d509f 100644
> --- a/fs/btrfs/inode.c
> +++ b/fs/btrfs/inode.c
> @@ -139,8 +139,9 @@ static noinline int run_delalloc_cow(struct btrfs_inode *inode,
>                                      bool pages_dirty);
>  static struct extent_map *create_io_em(struct btrfs_inode *inode, u64 start,
>                                        u64 len, u64 orig_start, u64 block_start,
> -                                      u64 block_len, u64 orig_block_len,
> +                                      u64 block_len, u64 disk_num_bytes,

This hunk should have been in the previous patch, which renames
orig_block_len to disk_num_bytes everywhere, including the prototype
declaration of this function.

>                                        u64 ram_bytes, int compress_type,
> +                                      struct btrfs_file_extent *file_extent,
>                                        int type);
>
>  static int data_reloc_print_warning_inode(u64 inum, u64 offset, u64 num_bytes,
> @@ -1152,6 +1153,7 @@ static void submit_one_async_extent(struct async_chunk *async_chunk,
>         struct btrfs_root *root = inode->root;
>         struct btrfs_fs_info *fs_info = root->fs_info;
>         struct btrfs_ordered_extent *ordered;
> +       struct btrfs_file_extent file_extent = { 0 };
>         struct btrfs_key ins;
>         struct page *locked_page = NULL;
>         struct extent_state *cached = NULL;
> @@ -1198,6 +1200,13 @@ static void submit_one_async_extent(struct async_chunk *async_chunk,
>         lock_extent(io_tree, start, end, &cached);
>
>         /* Here we're doing allocation and writeback of the compressed pages */
> +       file_extent.disk_bytenr = ins.objectid;
> +       file_extent.disk_num_bytes = ins.offset;
> +       file_extent.ram_bytes = async_extent->ram_size;
> +       file_extent.num_bytes = async_extent->ram_size;
> +       file_extent.offset = 0;
> +       file_extent.compression = async_extent->compress_type;
> +
>         em = create_io_em(inode, start,
>                           async_extent->ram_size,       /* len */
>                           start,                        /* orig_start */
> @@ -1206,6 +1215,7 @@ static void submit_one_async_extent(struct async_chunk *async_chunk,
>                           ins.offset,                   /* orig_block_len */
>                           async_extent->ram_size,       /* ram_bytes */
>                           async_extent->compress_type,
> +                         &file_extent,
>                           BTRFS_ORDERED_COMPRESSED);
>         if (IS_ERR(em)) {
>                 ret = PTR_ERR(em);
> @@ -1395,6 +1405,7 @@ static noinline int cow_file_range(struct btrfs_inode *inode,
>
>         while (num_bytes > 0) {
>                 struct btrfs_ordered_extent *ordered;
> +               struct btrfs_file_extent file_extent = { 0 };
>
>                 cur_alloc_size = num_bytes;
>                 ret = btrfs_reserve_extent(root, cur_alloc_size, cur_alloc_size,
> @@ -1431,6 +1442,12 @@ static noinline int cow_file_range(struct btrfs_inode *inode,
>                 extent_reserved = true;
>
>                 ram_size = ins.offset;
> +               file_extent.disk_bytenr = ins.objectid;
> +               file_extent.disk_num_bytes = ins.offset;
> +               file_extent.num_bytes = ins.offset;
> +               file_extent.ram_bytes = ins.offset;
> +               file_extent.offset = 0;
> +               file_extent.compression = BTRFS_COMPRESS_NONE;
>
>                 lock_extent(&inode->io_tree, start, start + ram_size - 1,
>                             &cached);
> @@ -1442,6 +1459,7 @@ static noinline int cow_file_range(struct btrfs_inode *inode,
>                                   ins.offset, /* orig_block_len */
>                                   ram_size, /* ram_bytes */
>                                   BTRFS_COMPRESS_NONE, /* compress_type */
> +                                 &file_extent,
>                                   BTRFS_ORDERED_REGULAR /* type */);
>                 if (IS_ERR(em)) {
>                         unlock_extent(&inode->io_tree, start,
> @@ -1855,6 +1873,7 @@ struct can_nocow_file_extent_args {
>         u64 disk_bytenr;
>         u64 disk_num_bytes;
>         u64 extent_offset;
> +
>         /* Number of bytes that can be written to in NOCOW mode. */
>         u64 num_bytes;
>
> @@ -2182,6 +2201,7 @@ static noinline int run_delalloc_nocow(struct btrfs_inode *inode,
>                                           nocow_args.num_bytes, /* block_len */
>                                           nocow_args.disk_num_bytes, /* orig_block_len */
>                                           ram_bytes, BTRFS_COMPRESS_NONE,
> +                                         &nocow_args.file_extent,
>                                           BTRFS_ORDERED_PREALLOC);
>                         if (IS_ERR(em)) {
>                                 unlock_extent(&inode->io_tree, cur_offset,
> @@ -4982,6 +5002,7 @@ int btrfs_cont_expand(struct btrfs_inode *inode, loff_t oldsize, loff_t size)
>                         hole_em->orig_start = cur_offset;
>
>                         hole_em->block_start = EXTENT_MAP_HOLE;
> +                       hole_em->disk_bytenr = EXTENT_MAP_HOLE;
>                         hole_em->block_len = 0;
>                         hole_em->disk_num_bytes = 0;
>                         hole_em->ram_bytes = hole_size;
> @@ -6842,6 +6863,7 @@ struct extent_map *btrfs_get_extent(struct btrfs_inode *inode,
>         }
>         em->start = EXTENT_MAP_HOLE;
>         em->orig_start = EXTENT_MAP_HOLE;
> +       em->disk_bytenr = EXTENT_MAP_HOLE;
>         em->len = (u64)-1;
>         em->block_len = (u64)-1;
>
> @@ -7007,7 +7029,8 @@ static struct extent_map *btrfs_create_dio_extent(struct btrfs_inode *inode,
>                                                   const u64 block_len,
>                                                   const u64 orig_block_len,
>                                                   const u64 ram_bytes,
> -                                                 const int type)
> +                                                 const int type,
> +                                                 struct btrfs_file_extent *file_extent)
>  {
>         struct extent_map *em = NULL;
>         struct btrfs_ordered_extent *ordered;
> @@ -7016,7 +7039,7 @@ static struct extent_map *btrfs_create_dio_extent(struct btrfs_inode *inode,
>                 em = create_io_em(inode, start, len, orig_start, block_start,
>                                   block_len, orig_block_len, ram_bytes,
>                                   BTRFS_COMPRESS_NONE, /* compress_type */
> -                                 type);
> +                                 file_extent, type);
>                 if (IS_ERR(em))
>                         goto out;
>         }
> @@ -7047,6 +7070,7 @@ static struct extent_map *btrfs_new_extent_direct(struct btrfs_inode *inode,
>  {
>         struct btrfs_root *root = inode->root;
>         struct btrfs_fs_info *fs_info = root->fs_info;
> +       struct btrfs_file_extent file_extent = { 0 };
>         struct extent_map *em;
>         struct btrfs_key ins;
>         u64 alloc_hint;
> @@ -7065,9 +7089,16 @@ static struct extent_map *btrfs_new_extent_direct(struct btrfs_inode *inode,
>         if (ret)
>                 return ERR_PTR(ret);
>
> +       file_extent.disk_bytenr = ins.objectid;
> +       file_extent.disk_num_bytes = ins.offset;
> +       file_extent.num_bytes = ins.offset;
> +       file_extent.ram_bytes = ins.offset;
> +       file_extent.offset = 0;
> +       file_extent.compression = BTRFS_COMPRESS_NONE;
>         em = btrfs_create_dio_extent(inode, dio_data, start, ins.offset, start,
>                                      ins.objectid, ins.offset, ins.offset,
> -                                    ins.offset, BTRFS_ORDERED_REGULAR);
> +                                    ins.offset, BTRFS_ORDERED_REGULAR,
> +                                    &file_extent);
>         btrfs_dec_block_group_reservations(fs_info, ins.objectid);
>         if (IS_ERR(em))
>                 btrfs_free_reserved_extent(fs_info, ins.objectid, ins.offset,
> @@ -7310,6 +7341,7 @@ static struct extent_map *create_io_em(struct btrfs_inode *inode, u64 start,
>                                        u64 len, u64 orig_start, u64 block_start,
>                                        u64 block_len, u64 disk_num_bytes,
>                                        u64 ram_bytes, int compress_type,
> +                                      struct btrfs_file_extent *file_extent,
>                                        int type)
>  {
>         struct extent_map *em;
> @@ -7367,9 +7399,11 @@ static struct extent_map *create_io_em(struct btrfs_inode *inode, u64 start,
>         em->len = len;
>         em->block_len = block_len;
>         em->block_start = block_start;
> +       em->disk_bytenr = file_extent->disk_bytenr;
>         em->disk_num_bytes = disk_num_bytes;
>         em->ram_bytes = ram_bytes;
>         em->generation = -1;
> +       em->offset = file_extent->offset;
>         em->flags |= EXTENT_FLAG_PINNED;
>         if (type == BTRFS_ORDERED_COMPRESSED)
>                 extent_map_set_compression(em, compress_type);
> @@ -7393,6 +7427,7 @@ static int btrfs_get_blocks_direct_write(struct extent_map **map,
>  {
>         const bool nowait = (iomap_flags & IOMAP_NOWAIT);
>         struct btrfs_fs_info *fs_info = inode_to_fs_info(inode);
> +       struct btrfs_file_extent file_extent = { 0 };
>         struct extent_map *em = *map;
>         int type;
>         u64 block_start, orig_start, orig_block_len, ram_bytes;
> @@ -7423,7 +7458,8 @@ static int btrfs_get_blocks_direct_write(struct extent_map **map,
>                 block_start = em->block_start + (start - em->start);
>
>                 if (can_nocow_extent(inode, start, &len, &orig_start,
> -                                    &orig_block_len, &ram_bytes, NULL, false, false) == 1) {
> +                                    &orig_block_len, &ram_bytes,
> +                                    &file_extent, false, false) == 1) {
>                         bg = btrfs_inc_nocow_writers(fs_info, block_start);
>                         if (bg)
>                                 can_nocow = true;
> @@ -7451,7 +7487,8 @@ static int btrfs_get_blocks_direct_write(struct extent_map **map,
>                 em2 = btrfs_create_dio_extent(BTRFS_I(inode), dio_data, start, len,
>                                               orig_start, block_start,
>                                               len, orig_block_len,
> -                                             ram_bytes, type);
> +                                             ram_bytes, type,
> +                                             &file_extent);
>                 btrfs_dec_nocow_writers(bg);
>                 if (type == BTRFS_ORDERED_PREALLOC) {
>                         free_extent_map(em);
> @@ -9602,6 +9639,8 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
>                 em->orig_start = cur_offset;
>                 em->len = ins.offset;
>                 em->block_start = ins.objectid;
> +               em->disk_bytenr = ins.objectid;
> +               em->offset = 0;
>                 em->block_len = ins.offset;
>                 em->disk_num_bytes = ins.offset;
>                 em->ram_bytes = ins.offset;
> @@ -10168,6 +10207,7 @@ ssize_t btrfs_do_encoded_write(struct kiocb *iocb, struct iov_iter *from,
>         struct extent_changeset *data_reserved = NULL;
>         struct extent_state *cached_state = NULL;
>         struct btrfs_ordered_extent *ordered;
> +       struct btrfs_file_extent file_extent = { 0 };
>         int compression;
>         size_t orig_count;
>         u64 start, end;
> @@ -10343,10 +10383,16 @@ ssize_t btrfs_do_encoded_write(struct kiocb *iocb, struct iov_iter *from,
>                 goto out_delalloc_release;
>         extent_reserved = true;
>
> +       file_extent.disk_bytenr = ins.objectid;
> +       file_extent.disk_num_bytes = ins.offset;
> +       file_extent.num_bytes = num_bytes;
> +       file_extent.ram_bytes = ram_bytes;
> +       file_extent.offset = encoded->unencoded_offset;
> +       file_extent.compression = compression;
>         em = create_io_em(inode, start, num_bytes,
>                           start - encoded->unencoded_offset, ins.objectid,
>                           ins.offset, ins.offset, ram_bytes, compression,
> -                         BTRFS_ORDERED_COMPRESSED);
> +                         &file_extent, BTRFS_ORDERED_COMPRESSED);
>         if (IS_ERR(em)) {
>                 ret = PTR_ERR(em);
>                 goto out_free_reserved;
> --
> 2.45.0
>
>
Filipe Manana May 13, 2024, 12:54 p.m. UTC | #6
On Mon, May 13, 2024 at 1:48 PM Filipe Manana <fdmanana@kernel.org> wrote:
>
> On Fri, May 3, 2024 at 7:02 AM Qu Wenruo <wqu@suse.com> wrote:
> >
> > Introduce two new members for extent_map:
> >
> > - disk_bytenr
> > - offset
> >
> > Both are matching the members with the same name inside
> > btrfs_file_extent_items.
> >
> > For now this patch only touches those members when:
> >
> > - Reading btrfs_file_extent_items from disk
> > - Inserting new holes
> > - Merging two extent maps
> >   With the new disk_bytenr and disk_num_bytes, doing merging would be a
> >   little complex, as we have 3 different cases:
> >
> >   * Both extent maps are referring to the same data extent
> >   * Both extent maps are referring to different data extents, but
> >     those data extents are adjacent, and extent maps are at head/tail
> >     of each data extents
> >   * One of the extent map is referring to an merged and larger data
> >     extent that covers both extent maps
> >
> >   The 3rd case seems only valid in selftest (test_case_3()), but
> >   a new helper merge_ondisk_extents() should be able to handle all of
> >   them.
> >
> > To properly assign values for those new members, a new btrfs_file_extent
> > parameter is introduced to all the involved call sites.
> >
> > - For NOCOW writes the btrfs_file_extent would be exposed from
> >   can_nocow_file_extent().
> >
> > - For other writes, the members can be easily calculated
> >   As most of them have 0 offset and utilizing the whole on-disk data
> >   extent.
> >   The exception is encoded write, but thankfully that interface provided
> >   offset directly and all other needed info.
> >
> > For now, both the old members (block_start/block_len/orig_start) are
> > co-existing with the new members (disk_bytenr/offset), meanwhile all the
> > critical code is still using the old members only.
> >
> > The cleanup would happen later after all the older and newer members are
> > properly validated.
> >
> > Signed-off-by: Qu Wenruo <wqu@suse.com>
> > Signed-off-by: David Sterba <dsterba@suse.com>
> > ---
> >  fs/btrfs/defrag.c     |  4 +++
> >  fs/btrfs/extent_map.c | 75 +++++++++++++++++++++++++++++++++++++++++--
> >  fs/btrfs/extent_map.h | 17 ++++++++++
> >  fs/btrfs/file-item.c  |  9 +++++-
> >  fs/btrfs/file.c       |  1 +
> >  fs/btrfs/inode.c      | 60 ++++++++++++++++++++++++++++++----
> >  6 files changed, 155 insertions(+), 11 deletions(-)
> >
> > diff --git a/fs/btrfs/defrag.c b/fs/btrfs/defrag.c
> > index 407ccec3e57e..242c5469f4ba 100644
> > --- a/fs/btrfs/defrag.c
> > +++ b/fs/btrfs/defrag.c
> > @@ -709,6 +709,10 @@ static struct extent_map *defrag_get_extent(struct btrfs_inode *inode,
> >                         em->start = start;
> >                         em->orig_start = start;
> >                         em->block_start = EXTENT_MAP_HOLE;
> > +                       em->disk_bytenr = EXTENT_MAP_HOLE;
> > +                       em->disk_num_bytes = 0;
> > +                       em->ram_bytes = 0;
> > +                       em->offset = 0;
> >                         em->len = key.offset - start;
> >                         break;
> >                 }
> > diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c
> > index 4230dd0f34cc..4d4ac9fc43e2 100644
> > --- a/fs/btrfs/extent_map.c
> > +++ b/fs/btrfs/extent_map.c
> > @@ -232,6 +232,58 @@ static bool mergeable_maps(const struct extent_map *prev, const struct extent_ma
> >         return next->block_start == prev->block_start;
> >  }
> >
> > +/*
> > + * Handle the ondisk data extents merge for @prev and @next.
> > + *
> > + * Only touches disk_bytenr/disk_num_bytes/offset/ram_bytes.
> > + * For now only uncompressed regular extent can be merged.
> > + *
> > + * @prev and @next will be both updated to point to the new merged range.
> > + * Thus one of them should be removed by the caller.
> > + */
> > +static void merge_ondisk_extents(struct extent_map *prev, struct extent_map *next)
> > +{
> > +       u64 new_disk_bytenr;
> > +       u64 new_disk_num_bytes;
> > +       u64 new_offset;
> > +
> > +       /* @prev and @next should not be compressed. */
> > +       ASSERT(!extent_map_is_compressed(prev));
> > +       ASSERT(!extent_map_is_compressed(next));
> > +
> > +       /*
> > +        * There are several different cases that @prev and @next can be merged.
> > +        *
> > +        * 1) They are referring to the same data extent
> > +        * 2) Their ondisk data extents are adjacent and @prev is the tail
> > +        *    and @next is the head of their data extents
> > +        * 3) One of @prev/@next is referrring to a larger merged data extent.
> > +        *    (test_case_3 of extent maps tests).
> > +        *
> > +        * The calculation here always merge the data extents first, then update
> > +        * @offset using the new data extents.
> > +        *
> > +        * For case 1), the merged data extent would be the same.
> > +        * For case 2), we just merge the two data extents into one.
> > +        * For case 3), we just got the larger data extent.
> > +        */
> > +       new_disk_bytenr = min(prev->disk_bytenr, next->disk_bytenr);
> > +       new_disk_num_bytes = max(prev->disk_bytenr + prev->disk_num_bytes,
> > +                                next->disk_bytenr + next->disk_num_bytes) -
> > +                            new_disk_bytenr;
> > +       new_offset = prev->disk_bytenr + prev->offset - new_disk_bytenr;
> > +
> > +       prev->disk_bytenr = new_disk_bytenr;
> > +       prev->disk_num_bytes = new_disk_num_bytes;
> > +       prev->ram_bytes = new_disk_num_bytes;
> > +       prev->offset = new_offset;
> > +
> > +       next->disk_bytenr = new_disk_bytenr;
> > +       next->disk_num_bytes = new_disk_num_bytes;
> > +       next->ram_bytes = new_disk_num_bytes;
> > +       next->offset = new_offset;
> > +}
> > +
> >  static void try_merge_map(struct btrfs_inode *inode, struct extent_map *em)
> >  {
> >         struct extent_map_tree *tree = &inode->extent_tree;
> > @@ -263,6 +315,9 @@ static void try_merge_map(struct btrfs_inode *inode, struct extent_map *em)
> >                         em->block_len += merge->block_len;
> >                         em->block_start = merge->block_start;
> >                         em->generation = max(em->generation, merge->generation);
> > +
> > +                       if (em->disk_bytenr < EXTENT_MAP_LAST_BYTE)
> > +                               merge_ondisk_extents(merge, em);
> >                         em->flags |= EXTENT_FLAG_MERGED;
> >
> >                         rb_erase_cached(&merge->rb_node, &tree->map);
> > @@ -278,6 +333,8 @@ static void try_merge_map(struct btrfs_inode *inode, struct extent_map *em)
> >         if (rb && can_merge_extent_map(merge) && mergeable_maps(em, merge)) {
> >                 em->len += merge->len;
> >                 em->block_len += merge->block_len;
> > +               if (em->disk_bytenr < EXTENT_MAP_LAST_BYTE)
> > +                       merge_ondisk_extents(em, merge);
> >                 rb_erase_cached(&merge->rb_node, &tree->map);
> >                 RB_CLEAR_NODE(&merge->rb_node);
> >                 em->generation = max(em->generation, merge->generation);
> > @@ -565,6 +622,7 @@ static noinline int merge_extent_mapping(struct btrfs_inode *inode,
> >             !extent_map_is_compressed(em)) {
> >                 em->block_start += start_diff;
> >                 em->block_len = em->len;
> > +               em->offset += start_diff;
> >         }
> >         return add_extent_mapping(inode, em, 0);
> >  }
> > @@ -783,14 +841,18 @@ void btrfs_drop_extent_map_range(struct btrfs_inode *inode, u64 start, u64 end,
> >                                         split->block_len = em->block_len;
> >                                 else
> >                                         split->block_len = split->len;
> > +                               split->disk_bytenr = em->disk_bytenr;
> >                                 split->disk_num_bytes = max(split->block_len,
> >                                                             em->disk_num_bytes);
> > +                               split->offset = em->offset;
> >                                 split->ram_bytes = em->ram_bytes;
> >                         } else {
> >                                 split->orig_start = split->start;
> >                                 split->block_len = 0;
> >                                 split->block_start = em->block_start;
> > +                               split->disk_bytenr = em->disk_bytenr;
> >                                 split->disk_num_bytes = 0;
> > +                               split->offset = 0;
> >                                 split->ram_bytes = split->len;
> >                         }
> >
> > @@ -811,13 +873,14 @@ void btrfs_drop_extent_map_range(struct btrfs_inode *inode, u64 start, u64 end,
> >                         split->start = end;
> >                         split->len = em_end - end;
> >                         split->block_start = em->block_start;
> > +                       split->disk_bytenr = em->disk_bytenr;
> >                         split->flags = flags;
> >                         split->generation = gen;
> >
> >                         if (em->block_start < EXTENT_MAP_LAST_BYTE) {
> >                                 split->disk_num_bytes = max(em->block_len,
> >                                                             em->disk_num_bytes);
> > -
> > +                               split->offset = em->offset + end - em->start;
> >                                 split->ram_bytes = em->ram_bytes;
> >                                 if (compressed) {
> >                                         split->block_len = em->block_len;
> > @@ -830,10 +893,11 @@ void btrfs_drop_extent_map_range(struct btrfs_inode *inode, u64 start, u64 end,
> >                                         split->orig_start = em->orig_start;
> >                                 }
> >                         } else {
> > +                               split->disk_num_bytes = 0;
> > +                               split->offset = 0;
> >                                 split->ram_bytes = split->len;
> >                                 split->orig_start = split->start;
> >                                 split->block_len = 0;
> > -                               split->disk_num_bytes = 0;
> >                         }
> >
> >                         if (extent_map_in_tree(em)) {
> > @@ -987,6 +1051,9 @@ int split_extent_map(struct btrfs_inode *inode, u64 start, u64 len, u64 pre,
> >         /* First, replace the em with a new extent_map starting from * em->start */
> >         split_pre->start = em->start;
> >         split_pre->len = pre;
> > +       split_pre->disk_bytenr = new_logical;
> > +       split_pre->disk_num_bytes = split_pre->len;
> > +       split_pre->offset = 0;
> >         split_pre->orig_start = split_pre->start;
> >         split_pre->block_start = new_logical;
> >         split_pre->block_len = split_pre->len;
> > @@ -1005,10 +1072,12 @@ int split_extent_map(struct btrfs_inode *inode, u64 start, u64 len, u64 pre,
> >         /* Insert the middle extent_map. */
> >         split_mid->start = em->start + pre;
> >         split_mid->len = em->len - pre;
> > +       split_mid->disk_bytenr = em->block_start + pre;
> > +       split_mid->disk_num_bytes = split_mid->len;
> > +       split_mid->offset = 0;
> >         split_mid->orig_start = split_mid->start;
> >         split_mid->block_start = em->block_start + pre;
> >         split_mid->block_len = split_mid->len;
> > -       split_mid->disk_num_bytes = split_mid->block_len;
> >         split_mid->ram_bytes = split_mid->len;
> >         split_mid->flags = flags;
> >         split_mid->generation = em->generation;
> > diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h
> > index 6ea0287b0d61..cc9c8092b704 100644
> > --- a/fs/btrfs/extent_map.h
> > +++ b/fs/btrfs/extent_map.h
> > @@ -70,12 +70,29 @@ struct extent_map {
> >          */
> >         u64 orig_start;
> >
> > +       /*
> > +        * The bytenr for of the full on-disk extent.
> > +        *
> > +        * For regular extents it's btrfs_file_extent_item::disk_bytenr.
> > +        * For holes it's EXTENT_MAP_HOLE and for inline extents it's
> > +        * EXTENT_MAP_INLINE.
> > +        */
> > +       u64 disk_bytenr;
> > +
> >         /*
> >          * The full on-disk extent length, matching
> >          * btrfs_file_extent_item::disk_num_bytes.
> >          */
> >         u64 disk_num_bytes;
> >
> > +       /*
> > +        * Offset inside the decompressed extent.
> > +        *
> > +        * For regular extents it's btrfs_file_extent_item::offset.
> > +        * For holes and inline extents it's 0.
> > +        */
> > +       u64 offset;
> > +
> >         /*
> >          * The decompressed size of the whole on-disk extent, matching
> >          * btrfs_file_extent_item::ram_bytes.
> > diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
> > index 2197cfe5443b..47bd4fe0a44b 100644
> > --- a/fs/btrfs/file-item.c
> > +++ b/fs/btrfs/file-item.c
> > @@ -1294,12 +1294,17 @@ void btrfs_extent_item_to_extent_map(struct btrfs_inode *inode,
> >                 em->len = btrfs_file_extent_end(path) - extent_start;
> >                 em->orig_start = extent_start -
> >                         btrfs_file_extent_offset(leaf, fi);
> > -               em->disk_num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
> >                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
> >                 if (bytenr == 0) {
> >                         em->block_start = EXTENT_MAP_HOLE;
> > +                       em->disk_bytenr = EXTENT_MAP_HOLE;
> > +                       em->disk_num_bytes = 0;
> > +                       em->offset = 0;
> >                         return;
> >                 }
> > +               em->disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
> > +               em->disk_num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
> > +               em->offset = btrfs_file_extent_offset(leaf, fi);
> >                 if (compress_type != BTRFS_COMPRESS_NONE) {
> >                         extent_map_set_compression(em, compress_type);
> >                         em->block_start = bytenr;
> > @@ -1316,8 +1321,10 @@ void btrfs_extent_item_to_extent_map(struct btrfs_inode *inode,
> >                 ASSERT(extent_start == 0);
> >
> >                 em->block_start = EXTENT_MAP_INLINE;
> > +               em->disk_bytenr = EXTENT_MAP_INLINE;
> >                 em->start = 0;
> >                 em->len = fs_info->sectorsize;
> > +               em->offset = 0;
> >                 /*
> >                  * Initialize orig_start and block_len with the same values
> >                  * as in inode.c:btrfs_get_extent().
> > diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
> > index 63a13a4cace0..8931eeee199d 100644
> > --- a/fs/btrfs/file.c
> > +++ b/fs/btrfs/file.c
> > @@ -2337,6 +2337,7 @@ static int fill_holes(struct btrfs_trans_handle *trans,
> >                 hole_em->orig_start = offset;
> >
> >                 hole_em->block_start = EXTENT_MAP_HOLE;
> > +               hole_em->disk_bytenr = EXTENT_MAP_HOLE;
> >                 hole_em->block_len = 0;
> >                 hole_em->disk_num_bytes = 0;
> >                 hole_em->generation = trans->transid;
> > diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
> > index 2815b72f2d85..42fea12d509f 100644
> > --- a/fs/btrfs/inode.c
> > +++ b/fs/btrfs/inode.c
> > @@ -139,8 +139,9 @@ static noinline int run_delalloc_cow(struct btrfs_inode *inode,
> >                                      bool pages_dirty);
> >  static struct extent_map *create_io_em(struct btrfs_inode *inode, u64 start,
> >                                        u64 len, u64 orig_start, u64 block_start,
> > -                                      u64 block_len, u64 orig_block_len,
> > +                                      u64 block_len, u64 disk_num_bytes,
>
> This hunk should have been in the previous patch, which renames

In the first patch actually.

> orig_block_len to disk_num_bytes everywhere, including the prototype
> declaration of this function.

And in the declaration of the function, not the prototype.


>
> >                                        u64 ram_bytes, int compress_type,
> > +                                      struct btrfs_file_extent *file_extent,
> >                                        int type);
> >
> >  static int data_reloc_print_warning_inode(u64 inum, u64 offset, u64 num_bytes,
> > @@ -1152,6 +1153,7 @@ static void submit_one_async_extent(struct async_chunk *async_chunk,
> >         struct btrfs_root *root = inode->root;
> >         struct btrfs_fs_info *fs_info = root->fs_info;
> >         struct btrfs_ordered_extent *ordered;
> > +       struct btrfs_file_extent file_extent = { 0 };
> >         struct btrfs_key ins;
> >         struct page *locked_page = NULL;
> >         struct extent_state *cached = NULL;
> > @@ -1198,6 +1200,13 @@ static void submit_one_async_extent(struct async_chunk *async_chunk,
> >         lock_extent(io_tree, start, end, &cached);
> >
> >         /* Here we're doing allocation and writeback of the compressed pages */
> > +       file_extent.disk_bytenr = ins.objectid;
> > +       file_extent.disk_num_bytes = ins.offset;
> > +       file_extent.ram_bytes = async_extent->ram_size;
> > +       file_extent.num_bytes = async_extent->ram_size;
> > +       file_extent.offset = 0;
> > +       file_extent.compression = async_extent->compress_type;
> > +
> >         em = create_io_em(inode, start,
> >                           async_extent->ram_size,       /* len */
> >                           start,                        /* orig_start */
> > @@ -1206,6 +1215,7 @@ static void submit_one_async_extent(struct async_chunk *async_chunk,
> >                           ins.offset,                   /* orig_block_len */
> >                           async_extent->ram_size,       /* ram_bytes */
> >                           async_extent->compress_type,
> > +                         &file_extent,
> >                           BTRFS_ORDERED_COMPRESSED);
> >         if (IS_ERR(em)) {
> >                 ret = PTR_ERR(em);
> > @@ -1395,6 +1405,7 @@ static noinline int cow_file_range(struct btrfs_inode *inode,
> >
> >         while (num_bytes > 0) {
> >                 struct btrfs_ordered_extent *ordered;
> > +               struct btrfs_file_extent file_extent = { 0 };
> >
> >                 cur_alloc_size = num_bytes;
> >                 ret = btrfs_reserve_extent(root, cur_alloc_size, cur_alloc_size,
> > @@ -1431,6 +1442,12 @@ static noinline int cow_file_range(struct btrfs_inode *inode,
> >                 extent_reserved = true;
> >
> >                 ram_size = ins.offset;
> > +               file_extent.disk_bytenr = ins.objectid;
> > +               file_extent.disk_num_bytes = ins.offset;
> > +               file_extent.num_bytes = ins.offset;
> > +               file_extent.ram_bytes = ins.offset;
> > +               file_extent.offset = 0;
> > +               file_extent.compression = BTRFS_COMPRESS_NONE;
> >
> >                 lock_extent(&inode->io_tree, start, start + ram_size - 1,
> >                             &cached);
> > @@ -1442,6 +1459,7 @@ static noinline int cow_file_range(struct btrfs_inode *inode,
> >                                   ins.offset, /* orig_block_len */
> >                                   ram_size, /* ram_bytes */
> >                                   BTRFS_COMPRESS_NONE, /* compress_type */
> > +                                 &file_extent,
> >                                   BTRFS_ORDERED_REGULAR /* type */);
> >                 if (IS_ERR(em)) {
> >                         unlock_extent(&inode->io_tree, start,
> > @@ -1855,6 +1873,7 @@ struct can_nocow_file_extent_args {
> >         u64 disk_bytenr;
> >         u64 disk_num_bytes;
> >         u64 extent_offset;
> > +
> >         /* Number of bytes that can be written to in NOCOW mode. */
> >         u64 num_bytes;
> >
> > @@ -2182,6 +2201,7 @@ static noinline int run_delalloc_nocow(struct btrfs_inode *inode,
> >                                           nocow_args.num_bytes, /* block_len */
> >                                           nocow_args.disk_num_bytes, /* orig_block_len */
> >                                           ram_bytes, BTRFS_COMPRESS_NONE,
> > +                                         &nocow_args.file_extent,
> >                                           BTRFS_ORDERED_PREALLOC);
> >                         if (IS_ERR(em)) {
> >                                 unlock_extent(&inode->io_tree, cur_offset,
> > @@ -4982,6 +5002,7 @@ int btrfs_cont_expand(struct btrfs_inode *inode, loff_t oldsize, loff_t size)
> >                         hole_em->orig_start = cur_offset;
> >
> >                         hole_em->block_start = EXTENT_MAP_HOLE;
> > +                       hole_em->disk_bytenr = EXTENT_MAP_HOLE;
> >                         hole_em->block_len = 0;
> >                         hole_em->disk_num_bytes = 0;
> >                         hole_em->ram_bytes = hole_size;
> > @@ -6842,6 +6863,7 @@ struct extent_map *btrfs_get_extent(struct btrfs_inode *inode,
> >         }
> >         em->start = EXTENT_MAP_HOLE;
> >         em->orig_start = EXTENT_MAP_HOLE;
> > +       em->disk_bytenr = EXTENT_MAP_HOLE;
> >         em->len = (u64)-1;
> >         em->block_len = (u64)-1;
> >
> > @@ -7007,7 +7029,8 @@ static struct extent_map *btrfs_create_dio_extent(struct btrfs_inode *inode,
> >                                                   const u64 block_len,
> >                                                   const u64 orig_block_len,
> >                                                   const u64 ram_bytes,
> > -                                                 const int type)
> > +                                                 const int type,
> > +                                                 struct btrfs_file_extent *file_extent)
> >  {
> >         struct extent_map *em = NULL;
> >         struct btrfs_ordered_extent *ordered;
> > @@ -7016,7 +7039,7 @@ static struct extent_map *btrfs_create_dio_extent(struct btrfs_inode *inode,
> >                 em = create_io_em(inode, start, len, orig_start, block_start,
> >                                   block_len, orig_block_len, ram_bytes,
> >                                   BTRFS_COMPRESS_NONE, /* compress_type */
> > -                                 type);
> > +                                 file_extent, type);
> >                 if (IS_ERR(em))
> >                         goto out;
> >         }
> > @@ -7047,6 +7070,7 @@ static struct extent_map *btrfs_new_extent_direct(struct btrfs_inode *inode,
> >  {
> >         struct btrfs_root *root = inode->root;
> >         struct btrfs_fs_info *fs_info = root->fs_info;
> > +       struct btrfs_file_extent file_extent = { 0 };
> >         struct extent_map *em;
> >         struct btrfs_key ins;
> >         u64 alloc_hint;
> > @@ -7065,9 +7089,16 @@ static struct extent_map *btrfs_new_extent_direct(struct btrfs_inode *inode,
> >         if (ret)
> >                 return ERR_PTR(ret);
> >
> > +       file_extent.disk_bytenr = ins.objectid;
> > +       file_extent.disk_num_bytes = ins.offset;
> > +       file_extent.num_bytes = ins.offset;
> > +       file_extent.ram_bytes = ins.offset;
> > +       file_extent.offset = 0;
> > +       file_extent.compression = BTRFS_COMPRESS_NONE;
> >         em = btrfs_create_dio_extent(inode, dio_data, start, ins.offset, start,
> >                                      ins.objectid, ins.offset, ins.offset,
> > -                                    ins.offset, BTRFS_ORDERED_REGULAR);
> > +                                    ins.offset, BTRFS_ORDERED_REGULAR,
> > +                                    &file_extent);
> >         btrfs_dec_block_group_reservations(fs_info, ins.objectid);
> >         if (IS_ERR(em))
> >                 btrfs_free_reserved_extent(fs_info, ins.objectid, ins.offset,
> > @@ -7310,6 +7341,7 @@ static struct extent_map *create_io_em(struct btrfs_inode *inode, u64 start,
> >                                        u64 len, u64 orig_start, u64 block_start,
> >                                        u64 block_len, u64 disk_num_bytes,
> >                                        u64 ram_bytes, int compress_type,
> > +                                      struct btrfs_file_extent *file_extent,
> >                                        int type)
> >  {
> >         struct extent_map *em;
> > @@ -7367,9 +7399,11 @@ static struct extent_map *create_io_em(struct btrfs_inode *inode, u64 start,
> >         em->len = len;
> >         em->block_len = block_len;
> >         em->block_start = block_start;
> > +       em->disk_bytenr = file_extent->disk_bytenr;
> >         em->disk_num_bytes = disk_num_bytes;
> >         em->ram_bytes = ram_bytes;
> >         em->generation = -1;
> > +       em->offset = file_extent->offset;
> >         em->flags |= EXTENT_FLAG_PINNED;
> >         if (type == BTRFS_ORDERED_COMPRESSED)
> >                 extent_map_set_compression(em, compress_type);
> > @@ -7393,6 +7427,7 @@ static int btrfs_get_blocks_direct_write(struct extent_map **map,
> >  {
> >         const bool nowait = (iomap_flags & IOMAP_NOWAIT);
> >         struct btrfs_fs_info *fs_info = inode_to_fs_info(inode);
> > +       struct btrfs_file_extent file_extent = { 0 };
> >         struct extent_map *em = *map;
> >         int type;
> >         u64 block_start, orig_start, orig_block_len, ram_bytes;
> > @@ -7423,7 +7458,8 @@ static int btrfs_get_blocks_direct_write(struct extent_map **map,
> >                 block_start = em->block_start + (start - em->start);
> >
> >                 if (can_nocow_extent(inode, start, &len, &orig_start,
> > -                                    &orig_block_len, &ram_bytes, NULL, false, false) == 1) {
> > +                                    &orig_block_len, &ram_bytes,
> > +                                    &file_extent, false, false) == 1) {
> >                         bg = btrfs_inc_nocow_writers(fs_info, block_start);
> >                         if (bg)
> >                                 can_nocow = true;
> > @@ -7451,7 +7487,8 @@ static int btrfs_get_blocks_direct_write(struct extent_map **map,
> >                 em2 = btrfs_create_dio_extent(BTRFS_I(inode), dio_data, start, len,
> >                                               orig_start, block_start,
> >                                               len, orig_block_len,
> > -                                             ram_bytes, type);
> > +                                             ram_bytes, type,
> > +                                             &file_extent);
> >                 btrfs_dec_nocow_writers(bg);
> >                 if (type == BTRFS_ORDERED_PREALLOC) {
> >                         free_extent_map(em);
> > @@ -9602,6 +9639,8 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
> >                 em->orig_start = cur_offset;
> >                 em->len = ins.offset;
> >                 em->block_start = ins.objectid;
> > +               em->disk_bytenr = ins.objectid;
> > +               em->offset = 0;
> >                 em->block_len = ins.offset;
> >                 em->disk_num_bytes = ins.offset;
> >                 em->ram_bytes = ins.offset;
> > @@ -10168,6 +10207,7 @@ ssize_t btrfs_do_encoded_write(struct kiocb *iocb, struct iov_iter *from,
> >         struct extent_changeset *data_reserved = NULL;
> >         struct extent_state *cached_state = NULL;
> >         struct btrfs_ordered_extent *ordered;
> > +       struct btrfs_file_extent file_extent = { 0 };
> >         int compression;
> >         size_t orig_count;
> >         u64 start, end;
> > @@ -10343,10 +10383,16 @@ ssize_t btrfs_do_encoded_write(struct kiocb *iocb, struct iov_iter *from,
> >                 goto out_delalloc_release;
> >         extent_reserved = true;
> >
> > +       file_extent.disk_bytenr = ins.objectid;
> > +       file_extent.disk_num_bytes = ins.offset;
> > +       file_extent.num_bytes = num_bytes;
> > +       file_extent.ram_bytes = ram_bytes;
> > +       file_extent.offset = encoded->unencoded_offset;
> > +       file_extent.compression = compression;
> >         em = create_io_em(inode, start, num_bytes,
> >                           start - encoded->unencoded_offset, ins.objectid,
> >                           ins.offset, ins.offset, ram_bytes, compression,
> > -                         BTRFS_ORDERED_COMPRESSED);
> > +                         &file_extent, BTRFS_ORDERED_COMPRESSED);
> >         if (IS_ERR(em)) {
> >                 ret = PTR_ERR(em);
> >                 goto out_free_reserved;
> > --
> > 2.45.0
> >
> >
Filipe Manana May 13, 2024, 5:31 p.m. UTC | #7
On Fri, May 3, 2024 at 7:02 AM Qu Wenruo <wqu@suse.com> wrote:
>
> Introduce two new members for extent_map:
>
> - disk_bytenr
> - offset
>
> Both are matching the members with the same name inside
> btrfs_file_extent_items.
>
> For now this patch only touches those members when:
>
> - Reading btrfs_file_extent_items from disk
> - Inserting new holes
> - Merging two extent maps
>   With the new disk_bytenr and disk_num_bytes, doing merging would be a
>   little complex, as we have 3 different cases:
>
>   * Both extent maps are referring to the same data extent
>   * Both extent maps are referring to different data extents, but
>     those data extents are adjacent, and extent maps are at head/tail
>     of each data extents
>   * One of the extent map is referring to an merged and larger data
>     extent that covers both extent maps
>
>   The 3rd case seems only valid in selftest (test_case_3()), but
>   a new helper merge_ondisk_extents() should be able to handle all of
>   them.
>
> To properly assign values for those new members, a new btrfs_file_extent
> parameter is introduced to all the involved call sites.
>
> - For NOCOW writes the btrfs_file_extent would be exposed from
>   can_nocow_file_extent().
>
> - For other writes, the members can be easily calculated
>   As most of them have 0 offset and utilizing the whole on-disk data
>   extent.
>   The exception is encoded write, but thankfully that interface provided
>   offset directly and all other needed info.
>
> For now, both the old members (block_start/block_len/orig_start) are
> co-existing with the new members (disk_bytenr/offset), meanwhile all the
> critical code is still using the old members only.
>
> The cleanup would happen later after all the older and newer members are
> properly validated.
>
> Signed-off-by: Qu Wenruo <wqu@suse.com>
> Signed-off-by: David Sterba <dsterba@suse.com>
> ---
>  fs/btrfs/defrag.c     |  4 +++
>  fs/btrfs/extent_map.c | 75 +++++++++++++++++++++++++++++++++++++++++--
>  fs/btrfs/extent_map.h | 17 ++++++++++
>  fs/btrfs/file-item.c  |  9 +++++-
>  fs/btrfs/file.c       |  1 +
>  fs/btrfs/inode.c      | 60 ++++++++++++++++++++++++++++++----
>  6 files changed, 155 insertions(+), 11 deletions(-)
>
> diff --git a/fs/btrfs/defrag.c b/fs/btrfs/defrag.c
> index 407ccec3e57e..242c5469f4ba 100644
> --- a/fs/btrfs/defrag.c
> +++ b/fs/btrfs/defrag.c
> @@ -709,6 +709,10 @@ static struct extent_map *defrag_get_extent(struct btrfs_inode *inode,
>                         em->start = start;
>                         em->orig_start = start;
>                         em->block_start = EXTENT_MAP_HOLE;
> +                       em->disk_bytenr = EXTENT_MAP_HOLE;
> +                       em->disk_num_bytes = 0;
> +                       em->ram_bytes = 0;
> +                       em->offset = 0;
>                         em->len = key.offset - start;
>                         break;
>                 }
> diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c
> index 4230dd0f34cc..4d4ac9fc43e2 100644
> --- a/fs/btrfs/extent_map.c
> +++ b/fs/btrfs/extent_map.c
> @@ -232,6 +232,58 @@ static bool mergeable_maps(const struct extent_map *prev, const struct extent_ma
>         return next->block_start == prev->block_start;
>  }
>
> +/*
> + * Handle the ondisk data extents merge for @prev and @next.
> + *
> + * Only touches disk_bytenr/disk_num_bytes/offset/ram_bytes.
> + * For now only uncompressed regular extent can be merged.
> + *
> + * @prev and @next will be both updated to point to the new merged range.
> + * Thus one of them should be removed by the caller.
> + */
> +static void merge_ondisk_extents(struct extent_map *prev, struct extent_map *next)
> +{
> +       u64 new_disk_bytenr;
> +       u64 new_disk_num_bytes;
> +       u64 new_offset;
> +
> +       /* @prev and @next should not be compressed. */
> +       ASSERT(!extent_map_is_compressed(prev));
> +       ASSERT(!extent_map_is_compressed(next));
> +
> +       /*
> +        * There are several different cases that @prev and @next can be merged.
> +        *
> +        * 1) They are referring to the same data extent
> +        * 2) Their ondisk data extents are adjacent and @prev is the tail
> +        *    and @next is the head of their data extents
> +        * 3) One of @prev/@next is referrring to a larger merged data extent.
> +        *    (test_case_3 of extent maps tests).
> +        *
> +        * The calculation here always merge the data extents first, then update
> +        * @offset using the new data extents.
> +        *
> +        * For case 1), the merged data extent would be the same.
> +        * For case 2), we just merge the two data extents into one.
> +        * For case 3), we just got the larger data extent.
> +        */
> +       new_disk_bytenr = min(prev->disk_bytenr, next->disk_bytenr);
> +       new_disk_num_bytes = max(prev->disk_bytenr + prev->disk_num_bytes,
> +                                next->disk_bytenr + next->disk_num_bytes) -
> +                            new_disk_bytenr;
> +       new_offset = prev->disk_bytenr + prev->offset - new_disk_bytenr;
> +
> +       prev->disk_bytenr = new_disk_bytenr;
> +       prev->disk_num_bytes = new_disk_num_bytes;
> +       prev->ram_bytes = new_disk_num_bytes;
> +       prev->offset = new_offset;
> +
> +       next->disk_bytenr = new_disk_bytenr;
> +       next->disk_num_bytes = new_disk_num_bytes;
> +       next->ram_bytes = new_disk_num_bytes;
> +       next->offset = new_offset;
> +}
> +
>  static void try_merge_map(struct btrfs_inode *inode, struct extent_map *em)
>  {
>         struct extent_map_tree *tree = &inode->extent_tree;
> @@ -263,6 +315,9 @@ static void try_merge_map(struct btrfs_inode *inode, struct extent_map *em)
>                         em->block_len += merge->block_len;
>                         em->block_start = merge->block_start;
>                         em->generation = max(em->generation, merge->generation);
> +
> +                       if (em->disk_bytenr < EXTENT_MAP_LAST_BYTE)
> +                               merge_ondisk_extents(merge, em);
>                         em->flags |= EXTENT_FLAG_MERGED;
>
>                         rb_erase_cached(&merge->rb_node, &tree->map);
> @@ -278,6 +333,8 @@ static void try_merge_map(struct btrfs_inode *inode, struct extent_map *em)
>         if (rb && can_merge_extent_map(merge) && mergeable_maps(em, merge)) {
>                 em->len += merge->len;
>                 em->block_len += merge->block_len;
> +               if (em->disk_bytenr < EXTENT_MAP_LAST_BYTE)
> +                       merge_ondisk_extents(em, merge);
>                 rb_erase_cached(&merge->rb_node, &tree->map);
>                 RB_CLEAR_NODE(&merge->rb_node);
>                 em->generation = max(em->generation, merge->generation);
> @@ -565,6 +622,7 @@ static noinline int merge_extent_mapping(struct btrfs_inode *inode,
>             !extent_map_is_compressed(em)) {
>                 em->block_start += start_diff;
>                 em->block_len = em->len;
> +               em->offset += start_diff;
>         }
>         return add_extent_mapping(inode, em, 0);
>  }
> @@ -783,14 +841,18 @@ void btrfs_drop_extent_map_range(struct btrfs_inode *inode, u64 start, u64 end,
>                                         split->block_len = em->block_len;
>                                 else
>                                         split->block_len = split->len;
> +                               split->disk_bytenr = em->disk_bytenr;
>                                 split->disk_num_bytes = max(split->block_len,
>                                                             em->disk_num_bytes);
> +                               split->offset = em->offset;
>                                 split->ram_bytes = em->ram_bytes;
>                         } else {
>                                 split->orig_start = split->start;
>                                 split->block_len = 0;
>                                 split->block_start = em->block_start;
> +                               split->disk_bytenr = em->disk_bytenr;
>                                 split->disk_num_bytes = 0;
> +                               split->offset = 0;
>                                 split->ram_bytes = split->len;
>                         }
>
> @@ -811,13 +873,14 @@ void btrfs_drop_extent_map_range(struct btrfs_inode *inode, u64 start, u64 end,
>                         split->start = end;
>                         split->len = em_end - end;
>                         split->block_start = em->block_start;
> +                       split->disk_bytenr = em->disk_bytenr;
>                         split->flags = flags;
>                         split->generation = gen;
>
>                         if (em->block_start < EXTENT_MAP_LAST_BYTE) {
>                                 split->disk_num_bytes = max(em->block_len,
>                                                             em->disk_num_bytes);
> -
> +                               split->offset = em->offset + end - em->start;
>                                 split->ram_bytes = em->ram_bytes;
>                                 if (compressed) {
>                                         split->block_len = em->block_len;
> @@ -830,10 +893,11 @@ void btrfs_drop_extent_map_range(struct btrfs_inode *inode, u64 start, u64 end,
>                                         split->orig_start = em->orig_start;
>                                 }
>                         } else {
> +                               split->disk_num_bytes = 0;
> +                               split->offset = 0;
>                                 split->ram_bytes = split->len;
>                                 split->orig_start = split->start;
>                                 split->block_len = 0;
> -                               split->disk_num_bytes = 0;
>                         }
>
>                         if (extent_map_in_tree(em)) {
> @@ -987,6 +1051,9 @@ int split_extent_map(struct btrfs_inode *inode, u64 start, u64 len, u64 pre,
>         /* First, replace the em with a new extent_map starting from * em->start */
>         split_pre->start = em->start;
>         split_pre->len = pre;
> +       split_pre->disk_bytenr = new_logical;
> +       split_pre->disk_num_bytes = split_pre->len;

So this assignment of ->disk_num_bytes is redundant, because a few
lines below (not shown in the patch diff), there's this:

split_pre->disk_num_bytes = split_pre->block_len;

Which overwrites this statement.


> +       split_pre->offset = 0;
>         split_pre->orig_start = split_pre->start;
>         split_pre->block_start = new_logical;
>         split_pre->block_len = split_pre->len;
> @@ -1005,10 +1072,12 @@ int split_extent_map(struct btrfs_inode *inode, u64 start, u64 len, u64 pre,
>         /* Insert the middle extent_map. */
>         split_mid->start = em->start + pre;
>         split_mid->len = em->len - pre;
> +       split_mid->disk_bytenr = em->block_start + pre;
> +       split_mid->disk_num_bytes = split_mid->len;
> +       split_mid->offset = 0;
>         split_mid->orig_start = split_mid->start;
>         split_mid->block_start = em->block_start + pre;
>         split_mid->block_len = split_mid->len;
> -       split_mid->disk_num_bytes = split_mid->block_len;
>         split_mid->ram_bytes = split_mid->len;
>         split_mid->flags = flags;
>         split_mid->generation = em->generation;
> diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h
> index 6ea0287b0d61..cc9c8092b704 100644
> --- a/fs/btrfs/extent_map.h
> +++ b/fs/btrfs/extent_map.h
> @@ -70,12 +70,29 @@ struct extent_map {
>          */
>         u64 orig_start;
>
> +       /*
> +        * The bytenr for of the full on-disk extent.
> +        *
> +        * For regular extents it's btrfs_file_extent_item::disk_bytenr.
> +        * For holes it's EXTENT_MAP_HOLE and for inline extents it's
> +        * EXTENT_MAP_INLINE.
> +        */
> +       u64 disk_bytenr;
> +
>         /*
>          * The full on-disk extent length, matching
>          * btrfs_file_extent_item::disk_num_bytes.
>          */
>         u64 disk_num_bytes;
>
> +       /*
> +        * Offset inside the decompressed extent.
> +        *
> +        * For regular extents it's btrfs_file_extent_item::offset.
> +        * For holes and inline extents it's 0.
> +        */
> +       u64 offset;
> +
>         /*
>          * The decompressed size of the whole on-disk extent, matching
>          * btrfs_file_extent_item::ram_bytes.
> diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
> index 2197cfe5443b..47bd4fe0a44b 100644
> --- a/fs/btrfs/file-item.c
> +++ b/fs/btrfs/file-item.c
> @@ -1294,12 +1294,17 @@ void btrfs_extent_item_to_extent_map(struct btrfs_inode *inode,
>                 em->len = btrfs_file_extent_end(path) - extent_start;
>                 em->orig_start = extent_start -
>                         btrfs_file_extent_offset(leaf, fi);
> -               em->disk_num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
>                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
>                 if (bytenr == 0) {
>                         em->block_start = EXTENT_MAP_HOLE;
> +                       em->disk_bytenr = EXTENT_MAP_HOLE;
> +                       em->disk_num_bytes = 0;
> +                       em->offset = 0;
>                         return;
>                 }
> +               em->disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
> +               em->disk_num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
> +               em->offset = btrfs_file_extent_offset(leaf, fi);
>                 if (compress_type != BTRFS_COMPRESS_NONE) {
>                         extent_map_set_compression(em, compress_type);
>                         em->block_start = bytenr;
> @@ -1316,8 +1321,10 @@ void btrfs_extent_item_to_extent_map(struct btrfs_inode *inode,
>                 ASSERT(extent_start == 0);
>
>                 em->block_start = EXTENT_MAP_INLINE;
> +               em->disk_bytenr = EXTENT_MAP_INLINE;
>                 em->start = 0;
>                 em->len = fs_info->sectorsize;
> +               em->offset = 0;
>                 /*
>                  * Initialize orig_start and block_len with the same values
>                  * as in inode.c:btrfs_get_extent().
> diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
> index 63a13a4cace0..8931eeee199d 100644
> --- a/fs/btrfs/file.c
> +++ b/fs/btrfs/file.c
> @@ -2337,6 +2337,7 @@ static int fill_holes(struct btrfs_trans_handle *trans,
>                 hole_em->orig_start = offset;
>
>                 hole_em->block_start = EXTENT_MAP_HOLE;
> +               hole_em->disk_bytenr = EXTENT_MAP_HOLE;
>                 hole_em->block_len = 0;
>                 hole_em->disk_num_bytes = 0;
>                 hole_em->generation = trans->transid;
> diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
> index 2815b72f2d85..42fea12d509f 100644
> --- a/fs/btrfs/inode.c
> +++ b/fs/btrfs/inode.c
> @@ -139,8 +139,9 @@ static noinline int run_delalloc_cow(struct btrfs_inode *inode,
>                                      bool pages_dirty);
>  static struct extent_map *create_io_em(struct btrfs_inode *inode, u64 start,
>                                        u64 len, u64 orig_start, u64 block_start,
> -                                      u64 block_len, u64 orig_block_len,
> +                                      u64 block_len, u64 disk_num_bytes,
>                                        u64 ram_bytes, int compress_type,
> +                                      struct btrfs_file_extent *file_extent,
>                                        int type);
>
>  static int data_reloc_print_warning_inode(u64 inum, u64 offset, u64 num_bytes,
> @@ -1152,6 +1153,7 @@ static void submit_one_async_extent(struct async_chunk *async_chunk,
>         struct btrfs_root *root = inode->root;
>         struct btrfs_fs_info *fs_info = root->fs_info;
>         struct btrfs_ordered_extent *ordered;
> +       struct btrfs_file_extent file_extent = { 0 };
>         struct btrfs_key ins;
>         struct page *locked_page = NULL;
>         struct extent_state *cached = NULL;
> @@ -1198,6 +1200,13 @@ static void submit_one_async_extent(struct async_chunk *async_chunk,
>         lock_extent(io_tree, start, end, &cached);
>
>         /* Here we're doing allocation and writeback of the compressed pages */
> +       file_extent.disk_bytenr = ins.objectid;
> +       file_extent.disk_num_bytes = ins.offset;
> +       file_extent.ram_bytes = async_extent->ram_size;
> +       file_extent.num_bytes = async_extent->ram_size;
> +       file_extent.offset = 0;
> +       file_extent.compression = async_extent->compress_type;
> +
>         em = create_io_em(inode, start,
>                           async_extent->ram_size,       /* len */
>                           start,                        /* orig_start */
> @@ -1206,6 +1215,7 @@ static void submit_one_async_extent(struct async_chunk *async_chunk,
>                           ins.offset,                   /* orig_block_len */
>                           async_extent->ram_size,       /* ram_bytes */
>                           async_extent->compress_type,
> +                         &file_extent,
>                           BTRFS_ORDERED_COMPRESSED);
>         if (IS_ERR(em)) {
>                 ret = PTR_ERR(em);
> @@ -1395,6 +1405,7 @@ static noinline int cow_file_range(struct btrfs_inode *inode,
>
>         while (num_bytes > 0) {
>                 struct btrfs_ordered_extent *ordered;
> +               struct btrfs_file_extent file_extent = { 0 };
>
>                 cur_alloc_size = num_bytes;
>                 ret = btrfs_reserve_extent(root, cur_alloc_size, cur_alloc_size,
> @@ -1431,6 +1442,12 @@ static noinline int cow_file_range(struct btrfs_inode *inode,
>                 extent_reserved = true;
>
>                 ram_size = ins.offset;
> +               file_extent.disk_bytenr = ins.objectid;
> +               file_extent.disk_num_bytes = ins.offset;
> +               file_extent.num_bytes = ins.offset;
> +               file_extent.ram_bytes = ins.offset;
> +               file_extent.offset = 0;
> +               file_extent.compression = BTRFS_COMPRESS_NONE;
>
>                 lock_extent(&inode->io_tree, start, start + ram_size - 1,
>                             &cached);
> @@ -1442,6 +1459,7 @@ static noinline int cow_file_range(struct btrfs_inode *inode,
>                                   ins.offset, /* orig_block_len */
>                                   ram_size, /* ram_bytes */
>                                   BTRFS_COMPRESS_NONE, /* compress_type */
> +                                 &file_extent,
>                                   BTRFS_ORDERED_REGULAR /* type */);
>                 if (IS_ERR(em)) {
>                         unlock_extent(&inode->io_tree, start,
> @@ -1855,6 +1873,7 @@ struct can_nocow_file_extent_args {
>         u64 disk_bytenr;
>         u64 disk_num_bytes;
>         u64 extent_offset;
> +
>         /* Number of bytes that can be written to in NOCOW mode. */
>         u64 num_bytes;
>
> @@ -2182,6 +2201,7 @@ static noinline int run_delalloc_nocow(struct btrfs_inode *inode,
>                                           nocow_args.num_bytes, /* block_len */
>                                           nocow_args.disk_num_bytes, /* orig_block_len */
>                                           ram_bytes, BTRFS_COMPRESS_NONE,
> +                                         &nocow_args.file_extent,
>                                           BTRFS_ORDERED_PREALLOC);
>                         if (IS_ERR(em)) {
>                                 unlock_extent(&inode->io_tree, cur_offset,
> @@ -4982,6 +5002,7 @@ int btrfs_cont_expand(struct btrfs_inode *inode, loff_t oldsize, loff_t size)
>                         hole_em->orig_start = cur_offset;
>
>                         hole_em->block_start = EXTENT_MAP_HOLE;
> +                       hole_em->disk_bytenr = EXTENT_MAP_HOLE;
>                         hole_em->block_len = 0;
>                         hole_em->disk_num_bytes = 0;
>                         hole_em->ram_bytes = hole_size;
> @@ -6842,6 +6863,7 @@ struct extent_map *btrfs_get_extent(struct btrfs_inode *inode,
>         }
>         em->start = EXTENT_MAP_HOLE;
>         em->orig_start = EXTENT_MAP_HOLE;
> +       em->disk_bytenr = EXTENT_MAP_HOLE;
>         em->len = (u64)-1;
>         em->block_len = (u64)-1;
>
> @@ -7007,7 +7029,8 @@ static struct extent_map *btrfs_create_dio_extent(struct btrfs_inode *inode,
>                                                   const u64 block_len,
>                                                   const u64 orig_block_len,
>                                                   const u64 ram_bytes,
> -                                                 const int type)
> +                                                 const int type,
> +                                                 struct btrfs_file_extent *file_extent)
>  {
>         struct extent_map *em = NULL;
>         struct btrfs_ordered_extent *ordered;
> @@ -7016,7 +7039,7 @@ static struct extent_map *btrfs_create_dio_extent(struct btrfs_inode *inode,
>                 em = create_io_em(inode, start, len, orig_start, block_start,
>                                   block_len, orig_block_len, ram_bytes,
>                                   BTRFS_COMPRESS_NONE, /* compress_type */
> -                                 type);
> +                                 file_extent, type);
>                 if (IS_ERR(em))
>                         goto out;
>         }
> @@ -7047,6 +7070,7 @@ static struct extent_map *btrfs_new_extent_direct(struct btrfs_inode *inode,
>  {
>         struct btrfs_root *root = inode->root;
>         struct btrfs_fs_info *fs_info = root->fs_info;
> +       struct btrfs_file_extent file_extent = { 0 };
>         struct extent_map *em;
>         struct btrfs_key ins;
>         u64 alloc_hint;
> @@ -7065,9 +7089,16 @@ static struct extent_map *btrfs_new_extent_direct(struct btrfs_inode *inode,
>         if (ret)
>                 return ERR_PTR(ret);
>
> +       file_extent.disk_bytenr = ins.objectid;
> +       file_extent.disk_num_bytes = ins.offset;
> +       file_extent.num_bytes = ins.offset;
> +       file_extent.ram_bytes = ins.offset;
> +       file_extent.offset = 0;
> +       file_extent.compression = BTRFS_COMPRESS_NONE;
>         em = btrfs_create_dio_extent(inode, dio_data, start, ins.offset, start,
>                                      ins.objectid, ins.offset, ins.offset,
> -                                    ins.offset, BTRFS_ORDERED_REGULAR);
> +                                    ins.offset, BTRFS_ORDERED_REGULAR,
> +                                    &file_extent);
>         btrfs_dec_block_group_reservations(fs_info, ins.objectid);
>         if (IS_ERR(em))
>                 btrfs_free_reserved_extent(fs_info, ins.objectid, ins.offset,
> @@ -7310,6 +7341,7 @@ static struct extent_map *create_io_em(struct btrfs_inode *inode, u64 start,
>                                        u64 len, u64 orig_start, u64 block_start,
>                                        u64 block_len, u64 disk_num_bytes,
>                                        u64 ram_bytes, int compress_type,
> +                                      struct btrfs_file_extent *file_extent,
>                                        int type)
>  {
>         struct extent_map *em;
> @@ -7367,9 +7399,11 @@ static struct extent_map *create_io_em(struct btrfs_inode *inode, u64 start,
>         em->len = len;
>         em->block_len = block_len;
>         em->block_start = block_start;
> +       em->disk_bytenr = file_extent->disk_bytenr;
>         em->disk_num_bytes = disk_num_bytes;
>         em->ram_bytes = ram_bytes;
>         em->generation = -1;
> +       em->offset = file_extent->offset;
>         em->flags |= EXTENT_FLAG_PINNED;
>         if (type == BTRFS_ORDERED_COMPRESSED)
>                 extent_map_set_compression(em, compress_type);
> @@ -7393,6 +7427,7 @@ static int btrfs_get_blocks_direct_write(struct extent_map **map,
>  {
>         const bool nowait = (iomap_flags & IOMAP_NOWAIT);
>         struct btrfs_fs_info *fs_info = inode_to_fs_info(inode);
> +       struct btrfs_file_extent file_extent = { 0 };
>         struct extent_map *em = *map;
>         int type;
>         u64 block_start, orig_start, orig_block_len, ram_bytes;
> @@ -7423,7 +7458,8 @@ static int btrfs_get_blocks_direct_write(struct extent_map **map,
>                 block_start = em->block_start + (start - em->start);
>
>                 if (can_nocow_extent(inode, start, &len, &orig_start,
> -                                    &orig_block_len, &ram_bytes, NULL, false, false) == 1) {
> +                                    &orig_block_len, &ram_bytes,
> +                                    &file_extent, false, false) == 1) {
>                         bg = btrfs_inc_nocow_writers(fs_info, block_start);
>                         if (bg)
>                                 can_nocow = true;
> @@ -7451,7 +7487,8 @@ static int btrfs_get_blocks_direct_write(struct extent_map **map,
>                 em2 = btrfs_create_dio_extent(BTRFS_I(inode), dio_data, start, len,
>                                               orig_start, block_start,
>                                               len, orig_block_len,
> -                                             ram_bytes, type);
> +                                             ram_bytes, type,
> +                                             &file_extent);
>                 btrfs_dec_nocow_writers(bg);
>                 if (type == BTRFS_ORDERED_PREALLOC) {
>                         free_extent_map(em);
> @@ -9602,6 +9639,8 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
>                 em->orig_start = cur_offset;
>                 em->len = ins.offset;
>                 em->block_start = ins.objectid;
> +               em->disk_bytenr = ins.objectid;
> +               em->offset = 0;
>                 em->block_len = ins.offset;
>                 em->disk_num_bytes = ins.offset;
>                 em->ram_bytes = ins.offset;
> @@ -10168,6 +10207,7 @@ ssize_t btrfs_do_encoded_write(struct kiocb *iocb, struct iov_iter *from,
>         struct extent_changeset *data_reserved = NULL;
>         struct extent_state *cached_state = NULL;
>         struct btrfs_ordered_extent *ordered;
> +       struct btrfs_file_extent file_extent = { 0 };
>         int compression;
>         size_t orig_count;
>         u64 start, end;
> @@ -10343,10 +10383,16 @@ ssize_t btrfs_do_encoded_write(struct kiocb *iocb, struct iov_iter *from,
>                 goto out_delalloc_release;
>         extent_reserved = true;
>
> +       file_extent.disk_bytenr = ins.objectid;
> +       file_extent.disk_num_bytes = ins.offset;
> +       file_extent.num_bytes = num_bytes;
> +       file_extent.ram_bytes = ram_bytes;
> +       file_extent.offset = encoded->unencoded_offset;
> +       file_extent.compression = compression;
>         em = create_io_em(inode, start, num_bytes,
>                           start - encoded->unencoded_offset, ins.objectid,
>                           ins.offset, ins.offset, ram_bytes, compression,
> -                         BTRFS_ORDERED_COMPRESSED);
> +                         &file_extent, BTRFS_ORDERED_COMPRESSED);
>         if (IS_ERR(em)) {
>                 ret = PTR_ERR(em);
>                 goto out_free_reserved;
> --
> 2.45.0
>
>
diff mbox series

Patch

diff --git a/fs/btrfs/defrag.c b/fs/btrfs/defrag.c
index 407ccec3e57e..242c5469f4ba 100644
--- a/fs/btrfs/defrag.c
+++ b/fs/btrfs/defrag.c
@@ -709,6 +709,10 @@  static struct extent_map *defrag_get_extent(struct btrfs_inode *inode,
 			em->start = start;
 			em->orig_start = start;
 			em->block_start = EXTENT_MAP_HOLE;
+			em->disk_bytenr = EXTENT_MAP_HOLE;
+			em->disk_num_bytes = 0;
+			em->ram_bytes = 0;
+			em->offset = 0;
 			em->len = key.offset - start;
 			break;
 		}
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c
index 4230dd0f34cc..4d4ac9fc43e2 100644
--- a/fs/btrfs/extent_map.c
+++ b/fs/btrfs/extent_map.c
@@ -232,6 +232,58 @@  static bool mergeable_maps(const struct extent_map *prev, const struct extent_ma
 	return next->block_start == prev->block_start;
 }
 
+/*
+ * Handle the ondisk data extents merge for @prev and @next.
+ *
+ * Only touches disk_bytenr/disk_num_bytes/offset/ram_bytes.
+ * For now only uncompressed regular extent can be merged.
+ *
+ * @prev and @next will be both updated to point to the new merged range.
+ * Thus one of them should be removed by the caller.
+ */
+static void merge_ondisk_extents(struct extent_map *prev, struct extent_map *next)
+{
+	u64 new_disk_bytenr;
+	u64 new_disk_num_bytes;
+	u64 new_offset;
+
+	/* @prev and @next should not be compressed. */
+	ASSERT(!extent_map_is_compressed(prev));
+	ASSERT(!extent_map_is_compressed(next));
+
+	/*
+	 * There are several different cases that @prev and @next can be merged.
+	 *
+	 * 1) They are referring to the same data extent
+	 * 2) Their ondisk data extents are adjacent and @prev is the tail
+	 *    and @next is the head of their data extents
+	 * 3) One of @prev/@next is referrring to a larger merged data extent.
+	 *    (test_case_3 of extent maps tests).
+	 *
+	 * The calculation here always merge the data extents first, then update
+	 * @offset using the new data extents.
+	 *
+	 * For case 1), the merged data extent would be the same.
+	 * For case 2), we just merge the two data extents into one.
+	 * For case 3), we just got the larger data extent.
+	 */
+	new_disk_bytenr = min(prev->disk_bytenr, next->disk_bytenr);
+	new_disk_num_bytes = max(prev->disk_bytenr + prev->disk_num_bytes,
+				 next->disk_bytenr + next->disk_num_bytes) -
+			     new_disk_bytenr;
+	new_offset = prev->disk_bytenr + prev->offset - new_disk_bytenr;
+
+	prev->disk_bytenr = new_disk_bytenr;
+	prev->disk_num_bytes = new_disk_num_bytes;
+	prev->ram_bytes = new_disk_num_bytes;
+	prev->offset = new_offset;
+
+	next->disk_bytenr = new_disk_bytenr;
+	next->disk_num_bytes = new_disk_num_bytes;
+	next->ram_bytes = new_disk_num_bytes;
+	next->offset = new_offset;
+}
+
 static void try_merge_map(struct btrfs_inode *inode, struct extent_map *em)
 {
 	struct extent_map_tree *tree = &inode->extent_tree;
@@ -263,6 +315,9 @@  static void try_merge_map(struct btrfs_inode *inode, struct extent_map *em)
 			em->block_len += merge->block_len;
 			em->block_start = merge->block_start;
 			em->generation = max(em->generation, merge->generation);
+
+			if (em->disk_bytenr < EXTENT_MAP_LAST_BYTE)
+				merge_ondisk_extents(merge, em);
 			em->flags |= EXTENT_FLAG_MERGED;
 
 			rb_erase_cached(&merge->rb_node, &tree->map);
@@ -278,6 +333,8 @@  static void try_merge_map(struct btrfs_inode *inode, struct extent_map *em)
 	if (rb && can_merge_extent_map(merge) && mergeable_maps(em, merge)) {
 		em->len += merge->len;
 		em->block_len += merge->block_len;
+		if (em->disk_bytenr < EXTENT_MAP_LAST_BYTE)
+			merge_ondisk_extents(em, merge);
 		rb_erase_cached(&merge->rb_node, &tree->map);
 		RB_CLEAR_NODE(&merge->rb_node);
 		em->generation = max(em->generation, merge->generation);
@@ -565,6 +622,7 @@  static noinline int merge_extent_mapping(struct btrfs_inode *inode,
 	    !extent_map_is_compressed(em)) {
 		em->block_start += start_diff;
 		em->block_len = em->len;
+		em->offset += start_diff;
 	}
 	return add_extent_mapping(inode, em, 0);
 }
@@ -783,14 +841,18 @@  void btrfs_drop_extent_map_range(struct btrfs_inode *inode, u64 start, u64 end,
 					split->block_len = em->block_len;
 				else
 					split->block_len = split->len;
+				split->disk_bytenr = em->disk_bytenr;
 				split->disk_num_bytes = max(split->block_len,
 							    em->disk_num_bytes);
+				split->offset = em->offset;
 				split->ram_bytes = em->ram_bytes;
 			} else {
 				split->orig_start = split->start;
 				split->block_len = 0;
 				split->block_start = em->block_start;
+				split->disk_bytenr = em->disk_bytenr;
 				split->disk_num_bytes = 0;
+				split->offset = 0;
 				split->ram_bytes = split->len;
 			}
 
@@ -811,13 +873,14 @@  void btrfs_drop_extent_map_range(struct btrfs_inode *inode, u64 start, u64 end,
 			split->start = end;
 			split->len = em_end - end;
 			split->block_start = em->block_start;
+			split->disk_bytenr = em->disk_bytenr;
 			split->flags = flags;
 			split->generation = gen;
 
 			if (em->block_start < EXTENT_MAP_LAST_BYTE) {
 				split->disk_num_bytes = max(em->block_len,
 							    em->disk_num_bytes);
-
+				split->offset = em->offset + end - em->start;
 				split->ram_bytes = em->ram_bytes;
 				if (compressed) {
 					split->block_len = em->block_len;
@@ -830,10 +893,11 @@  void btrfs_drop_extent_map_range(struct btrfs_inode *inode, u64 start, u64 end,
 					split->orig_start = em->orig_start;
 				}
 			} else {
+				split->disk_num_bytes = 0;
+				split->offset = 0;
 				split->ram_bytes = split->len;
 				split->orig_start = split->start;
 				split->block_len = 0;
-				split->disk_num_bytes = 0;
 			}
 
 			if (extent_map_in_tree(em)) {
@@ -987,6 +1051,9 @@  int split_extent_map(struct btrfs_inode *inode, u64 start, u64 len, u64 pre,
 	/* First, replace the em with a new extent_map starting from * em->start */
 	split_pre->start = em->start;
 	split_pre->len = pre;
+	split_pre->disk_bytenr = new_logical;
+	split_pre->disk_num_bytes = split_pre->len;
+	split_pre->offset = 0;
 	split_pre->orig_start = split_pre->start;
 	split_pre->block_start = new_logical;
 	split_pre->block_len = split_pre->len;
@@ -1005,10 +1072,12 @@  int split_extent_map(struct btrfs_inode *inode, u64 start, u64 len, u64 pre,
 	/* Insert the middle extent_map. */
 	split_mid->start = em->start + pre;
 	split_mid->len = em->len - pre;
+	split_mid->disk_bytenr = em->block_start + pre;
+	split_mid->disk_num_bytes = split_mid->len;
+	split_mid->offset = 0;
 	split_mid->orig_start = split_mid->start;
 	split_mid->block_start = em->block_start + pre;
 	split_mid->block_len = split_mid->len;
-	split_mid->disk_num_bytes = split_mid->block_len;
 	split_mid->ram_bytes = split_mid->len;
 	split_mid->flags = flags;
 	split_mid->generation = em->generation;
diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h
index 6ea0287b0d61..cc9c8092b704 100644
--- a/fs/btrfs/extent_map.h
+++ b/fs/btrfs/extent_map.h
@@ -70,12 +70,29 @@  struct extent_map {
 	 */
 	u64 orig_start;
 
+	/*
+	 * The bytenr for of the full on-disk extent.
+	 *
+	 * For regular extents it's btrfs_file_extent_item::disk_bytenr.
+	 * For holes it's EXTENT_MAP_HOLE and for inline extents it's
+	 * EXTENT_MAP_INLINE.
+	 */
+	u64 disk_bytenr;
+
 	/*
 	 * The full on-disk extent length, matching
 	 * btrfs_file_extent_item::disk_num_bytes.
 	 */
 	u64 disk_num_bytes;
 
+	/*
+	 * Offset inside the decompressed extent.
+	 *
+	 * For regular extents it's btrfs_file_extent_item::offset.
+	 * For holes and inline extents it's 0.
+	 */
+	u64 offset;
+
 	/*
 	 * The decompressed size of the whole on-disk extent, matching
 	 * btrfs_file_extent_item::ram_bytes.
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index 2197cfe5443b..47bd4fe0a44b 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -1294,12 +1294,17 @@  void btrfs_extent_item_to_extent_map(struct btrfs_inode *inode,
 		em->len = btrfs_file_extent_end(path) - extent_start;
 		em->orig_start = extent_start -
 			btrfs_file_extent_offset(leaf, fi);
-		em->disk_num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
 		bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
 		if (bytenr == 0) {
 			em->block_start = EXTENT_MAP_HOLE;
+			em->disk_bytenr = EXTENT_MAP_HOLE;
+			em->disk_num_bytes = 0;
+			em->offset = 0;
 			return;
 		}
+		em->disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
+		em->disk_num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
+		em->offset = btrfs_file_extent_offset(leaf, fi);
 		if (compress_type != BTRFS_COMPRESS_NONE) {
 			extent_map_set_compression(em, compress_type);
 			em->block_start = bytenr;
@@ -1316,8 +1321,10 @@  void btrfs_extent_item_to_extent_map(struct btrfs_inode *inode,
 		ASSERT(extent_start == 0);
 
 		em->block_start = EXTENT_MAP_INLINE;
+		em->disk_bytenr = EXTENT_MAP_INLINE;
 		em->start = 0;
 		em->len = fs_info->sectorsize;
+		em->offset = 0;
 		/*
 		 * Initialize orig_start and block_len with the same values
 		 * as in inode.c:btrfs_get_extent().
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 63a13a4cace0..8931eeee199d 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -2337,6 +2337,7 @@  static int fill_holes(struct btrfs_trans_handle *trans,
 		hole_em->orig_start = offset;
 
 		hole_em->block_start = EXTENT_MAP_HOLE;
+		hole_em->disk_bytenr = EXTENT_MAP_HOLE;
 		hole_em->block_len = 0;
 		hole_em->disk_num_bytes = 0;
 		hole_em->generation = trans->transid;
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 2815b72f2d85..42fea12d509f 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -139,8 +139,9 @@  static noinline int run_delalloc_cow(struct btrfs_inode *inode,
 				     bool pages_dirty);
 static struct extent_map *create_io_em(struct btrfs_inode *inode, u64 start,
 				       u64 len, u64 orig_start, u64 block_start,
-				       u64 block_len, u64 orig_block_len,
+				       u64 block_len, u64 disk_num_bytes,
 				       u64 ram_bytes, int compress_type,
+				       struct btrfs_file_extent *file_extent,
 				       int type);
 
 static int data_reloc_print_warning_inode(u64 inum, u64 offset, u64 num_bytes,
@@ -1152,6 +1153,7 @@  static void submit_one_async_extent(struct async_chunk *async_chunk,
 	struct btrfs_root *root = inode->root;
 	struct btrfs_fs_info *fs_info = root->fs_info;
 	struct btrfs_ordered_extent *ordered;
+	struct btrfs_file_extent file_extent = { 0 };
 	struct btrfs_key ins;
 	struct page *locked_page = NULL;
 	struct extent_state *cached = NULL;
@@ -1198,6 +1200,13 @@  static void submit_one_async_extent(struct async_chunk *async_chunk,
 	lock_extent(io_tree, start, end, &cached);
 
 	/* Here we're doing allocation and writeback of the compressed pages */
+	file_extent.disk_bytenr = ins.objectid;
+	file_extent.disk_num_bytes = ins.offset;
+	file_extent.ram_bytes = async_extent->ram_size;
+	file_extent.num_bytes = async_extent->ram_size;
+	file_extent.offset = 0;
+	file_extent.compression = async_extent->compress_type;
+
 	em = create_io_em(inode, start,
 			  async_extent->ram_size,	/* len */
 			  start,			/* orig_start */
@@ -1206,6 +1215,7 @@  static void submit_one_async_extent(struct async_chunk *async_chunk,
 			  ins.offset,			/* orig_block_len */
 			  async_extent->ram_size,	/* ram_bytes */
 			  async_extent->compress_type,
+			  &file_extent,
 			  BTRFS_ORDERED_COMPRESSED);
 	if (IS_ERR(em)) {
 		ret = PTR_ERR(em);
@@ -1395,6 +1405,7 @@  static noinline int cow_file_range(struct btrfs_inode *inode,
 
 	while (num_bytes > 0) {
 		struct btrfs_ordered_extent *ordered;
+		struct btrfs_file_extent file_extent = { 0 };
 
 		cur_alloc_size = num_bytes;
 		ret = btrfs_reserve_extent(root, cur_alloc_size, cur_alloc_size,
@@ -1431,6 +1442,12 @@  static noinline int cow_file_range(struct btrfs_inode *inode,
 		extent_reserved = true;
 
 		ram_size = ins.offset;
+		file_extent.disk_bytenr = ins.objectid;
+		file_extent.disk_num_bytes = ins.offset;
+		file_extent.num_bytes = ins.offset;
+		file_extent.ram_bytes = ins.offset;
+		file_extent.offset = 0;
+		file_extent.compression = BTRFS_COMPRESS_NONE;
 
 		lock_extent(&inode->io_tree, start, start + ram_size - 1,
 			    &cached);
@@ -1442,6 +1459,7 @@  static noinline int cow_file_range(struct btrfs_inode *inode,
 				  ins.offset, /* orig_block_len */
 				  ram_size, /* ram_bytes */
 				  BTRFS_COMPRESS_NONE, /* compress_type */
+				  &file_extent,
 				  BTRFS_ORDERED_REGULAR /* type */);
 		if (IS_ERR(em)) {
 			unlock_extent(&inode->io_tree, start,
@@ -1855,6 +1873,7 @@  struct can_nocow_file_extent_args {
 	u64 disk_bytenr;
 	u64 disk_num_bytes;
 	u64 extent_offset;
+
 	/* Number of bytes that can be written to in NOCOW mode. */
 	u64 num_bytes;
 
@@ -2182,6 +2201,7 @@  static noinline int run_delalloc_nocow(struct btrfs_inode *inode,
 					  nocow_args.num_bytes, /* block_len */
 					  nocow_args.disk_num_bytes, /* orig_block_len */
 					  ram_bytes, BTRFS_COMPRESS_NONE,
+					  &nocow_args.file_extent,
 					  BTRFS_ORDERED_PREALLOC);
 			if (IS_ERR(em)) {
 				unlock_extent(&inode->io_tree, cur_offset,
@@ -4982,6 +5002,7 @@  int btrfs_cont_expand(struct btrfs_inode *inode, loff_t oldsize, loff_t size)
 			hole_em->orig_start = cur_offset;
 
 			hole_em->block_start = EXTENT_MAP_HOLE;
+			hole_em->disk_bytenr = EXTENT_MAP_HOLE;
 			hole_em->block_len = 0;
 			hole_em->disk_num_bytes = 0;
 			hole_em->ram_bytes = hole_size;
@@ -6842,6 +6863,7 @@  struct extent_map *btrfs_get_extent(struct btrfs_inode *inode,
 	}
 	em->start = EXTENT_MAP_HOLE;
 	em->orig_start = EXTENT_MAP_HOLE;
+	em->disk_bytenr = EXTENT_MAP_HOLE;
 	em->len = (u64)-1;
 	em->block_len = (u64)-1;
 
@@ -7007,7 +7029,8 @@  static struct extent_map *btrfs_create_dio_extent(struct btrfs_inode *inode,
 						  const u64 block_len,
 						  const u64 orig_block_len,
 						  const u64 ram_bytes,
-						  const int type)
+						  const int type,
+						  struct btrfs_file_extent *file_extent)
 {
 	struct extent_map *em = NULL;
 	struct btrfs_ordered_extent *ordered;
@@ -7016,7 +7039,7 @@  static struct extent_map *btrfs_create_dio_extent(struct btrfs_inode *inode,
 		em = create_io_em(inode, start, len, orig_start, block_start,
 				  block_len, orig_block_len, ram_bytes,
 				  BTRFS_COMPRESS_NONE, /* compress_type */
-				  type);
+				  file_extent, type);
 		if (IS_ERR(em))
 			goto out;
 	}
@@ -7047,6 +7070,7 @@  static struct extent_map *btrfs_new_extent_direct(struct btrfs_inode *inode,
 {
 	struct btrfs_root *root = inode->root;
 	struct btrfs_fs_info *fs_info = root->fs_info;
+	struct btrfs_file_extent file_extent = { 0 };
 	struct extent_map *em;
 	struct btrfs_key ins;
 	u64 alloc_hint;
@@ -7065,9 +7089,16 @@  static struct extent_map *btrfs_new_extent_direct(struct btrfs_inode *inode,
 	if (ret)
 		return ERR_PTR(ret);
 
+	file_extent.disk_bytenr = ins.objectid;
+	file_extent.disk_num_bytes = ins.offset;
+	file_extent.num_bytes = ins.offset;
+	file_extent.ram_bytes = ins.offset;
+	file_extent.offset = 0;
+	file_extent.compression = BTRFS_COMPRESS_NONE;
 	em = btrfs_create_dio_extent(inode, dio_data, start, ins.offset, start,
 				     ins.objectid, ins.offset, ins.offset,
-				     ins.offset, BTRFS_ORDERED_REGULAR);
+				     ins.offset, BTRFS_ORDERED_REGULAR,
+				     &file_extent);
 	btrfs_dec_block_group_reservations(fs_info, ins.objectid);
 	if (IS_ERR(em))
 		btrfs_free_reserved_extent(fs_info, ins.objectid, ins.offset,
@@ -7310,6 +7341,7 @@  static struct extent_map *create_io_em(struct btrfs_inode *inode, u64 start,
 				       u64 len, u64 orig_start, u64 block_start,
 				       u64 block_len, u64 disk_num_bytes,
 				       u64 ram_bytes, int compress_type,
+				       struct btrfs_file_extent *file_extent,
 				       int type)
 {
 	struct extent_map *em;
@@ -7367,9 +7399,11 @@  static struct extent_map *create_io_em(struct btrfs_inode *inode, u64 start,
 	em->len = len;
 	em->block_len = block_len;
 	em->block_start = block_start;
+	em->disk_bytenr = file_extent->disk_bytenr;
 	em->disk_num_bytes = disk_num_bytes;
 	em->ram_bytes = ram_bytes;
 	em->generation = -1;
+	em->offset = file_extent->offset;
 	em->flags |= EXTENT_FLAG_PINNED;
 	if (type == BTRFS_ORDERED_COMPRESSED)
 		extent_map_set_compression(em, compress_type);
@@ -7393,6 +7427,7 @@  static int btrfs_get_blocks_direct_write(struct extent_map **map,
 {
 	const bool nowait = (iomap_flags & IOMAP_NOWAIT);
 	struct btrfs_fs_info *fs_info = inode_to_fs_info(inode);
+	struct btrfs_file_extent file_extent = { 0 };
 	struct extent_map *em = *map;
 	int type;
 	u64 block_start, orig_start, orig_block_len, ram_bytes;
@@ -7423,7 +7458,8 @@  static int btrfs_get_blocks_direct_write(struct extent_map **map,
 		block_start = em->block_start + (start - em->start);
 
 		if (can_nocow_extent(inode, start, &len, &orig_start,
-				     &orig_block_len, &ram_bytes, NULL, false, false) == 1) {
+				     &orig_block_len, &ram_bytes,
+				     &file_extent, false, false) == 1) {
 			bg = btrfs_inc_nocow_writers(fs_info, block_start);
 			if (bg)
 				can_nocow = true;
@@ -7451,7 +7487,8 @@  static int btrfs_get_blocks_direct_write(struct extent_map **map,
 		em2 = btrfs_create_dio_extent(BTRFS_I(inode), dio_data, start, len,
 					      orig_start, block_start,
 					      len, orig_block_len,
-					      ram_bytes, type);
+					      ram_bytes, type,
+					      &file_extent);
 		btrfs_dec_nocow_writers(bg);
 		if (type == BTRFS_ORDERED_PREALLOC) {
 			free_extent_map(em);
@@ -9602,6 +9639,8 @@  static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
 		em->orig_start = cur_offset;
 		em->len = ins.offset;
 		em->block_start = ins.objectid;
+		em->disk_bytenr = ins.objectid;
+		em->offset = 0;
 		em->block_len = ins.offset;
 		em->disk_num_bytes = ins.offset;
 		em->ram_bytes = ins.offset;
@@ -10168,6 +10207,7 @@  ssize_t btrfs_do_encoded_write(struct kiocb *iocb, struct iov_iter *from,
 	struct extent_changeset *data_reserved = NULL;
 	struct extent_state *cached_state = NULL;
 	struct btrfs_ordered_extent *ordered;
+	struct btrfs_file_extent file_extent = { 0 };
 	int compression;
 	size_t orig_count;
 	u64 start, end;
@@ -10343,10 +10383,16 @@  ssize_t btrfs_do_encoded_write(struct kiocb *iocb, struct iov_iter *from,
 		goto out_delalloc_release;
 	extent_reserved = true;
 
+	file_extent.disk_bytenr = ins.objectid;
+	file_extent.disk_num_bytes = ins.offset;
+	file_extent.num_bytes = num_bytes;
+	file_extent.ram_bytes = ram_bytes;
+	file_extent.offset = encoded->unencoded_offset;
+	file_extent.compression = compression;
 	em = create_io_em(inode, start, num_bytes,
 			  start - encoded->unencoded_offset, ins.objectid,
 			  ins.offset, ins.offset, ram_bytes, compression,
-			  BTRFS_ORDERED_COMPRESSED);
+			  &file_extent, BTRFS_ORDERED_COMPRESSED);
 	if (IS_ERR(em)) {
 		ret = PTR_ERR(em);
 		goto out_free_reserved;