diff mbox series

[RFC,ONLY,4/8] btrfs: add boilerplate code to insert raid extent

Message ID 35ea1d22a55d8dd30bc9f9dfcd4a48890bf7feaf.1652711187.git.johannes.thumshirn@wdc.com (mailing list archive)
State New, archived
Headers show
Series btrfs: introduce raid-stripe-tree | expand

Commit Message

Johannes Thumshirn May 16, 2022, 2:31 p.m. UTC
Add boilerplate code to insert raid extents into the raid-stripe-tree on
each write to a RAID1 block-group.

Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
---
 fs/btrfs/Makefile           |  2 +-
 fs/btrfs/raid-stripe-tree.c | 72 +++++++++++++++++++++++++++++++++++++
 fs/btrfs/raid-stripe-tree.h | 28 +++++++++++++++
 fs/btrfs/volumes.c          | 21 +++++++++++
 fs/btrfs/volumes.h          |  3 ++
 5 files changed, 125 insertions(+), 1 deletion(-)
 create mode 100644 fs/btrfs/raid-stripe-tree.c
 create mode 100644 fs/btrfs/raid-stripe-tree.h

Comments

Qu Wenruo May 17, 2022, 7:53 a.m. UTC | #1
On 2022/5/16 22:31, Johannes Thumshirn wrote:
> Add boilerplate code to insert raid extents into the raid-stripe-tree on
> each write to a RAID1 block-group.
>
> Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
> ---
>   fs/btrfs/Makefile           |  2 +-
>   fs/btrfs/raid-stripe-tree.c | 72 +++++++++++++++++++++++++++++++++++++
>   fs/btrfs/raid-stripe-tree.h | 28 +++++++++++++++
>   fs/btrfs/volumes.c          | 21 +++++++++++
>   fs/btrfs/volumes.h          |  3 ++
>   5 files changed, 125 insertions(+), 1 deletion(-)
>   create mode 100644 fs/btrfs/raid-stripe-tree.c
>   create mode 100644 fs/btrfs/raid-stripe-tree.h
>
> diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile
> index 4188ba3fd8c3..6b9a00ad532a 100644
> --- a/fs/btrfs/Makefile
> +++ b/fs/btrfs/Makefile
> @@ -30,7 +30,7 @@ btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \
>   	   backref.o ulist.o qgroup.o send.o dev-replace.o raid56.o \
>   	   uuid-tree.o props.o free-space-tree.o tree-checker.o space-info.o \
>   	   block-rsv.o delalloc-space.o block-group.o discard.o reflink.o \
> -	   subpage.o tree-mod-log.o
> +	   subpage.o tree-mod-log.o raid-stripe-tree.o
>
>   btrfs-$(CONFIG_BTRFS_FS_POSIX_ACL) += acl.o
>   btrfs-$(CONFIG_BTRFS_FS_CHECK_INTEGRITY) += check-integrity.o
> diff --git a/fs/btrfs/raid-stripe-tree.c b/fs/btrfs/raid-stripe-tree.c
> new file mode 100644
> index 000000000000..426066bd7c0d
> --- /dev/null
> +++ b/fs/btrfs/raid-stripe-tree.c
> @@ -0,0 +1,72 @@
> +// SPDX-License-Identifier: GPL-2.0
> +
> +#include "ctree.h"
> +#include "transaction.h"
> +#include "disk-io.h"
> +#include "raid-stripe-tree.h"
> +#include "volumes.h"
> +
> +static void btrfs_insert_raid_extent(struct btrfs_trans_handle *trans,
> +				     struct btrfs_io_context *bioc)
> +{
> +	struct btrfs_fs_info *fs_info = bioc->fs_info;
> +	struct btrfs_key stripe_key;
> +	struct btrfs_root *stripe_root = fs_info->stripe_root;
> +	struct btrfs_dp_stripe *raid_stripe;
> +	struct btrfs_stripe_extent *stripe_extent;
> +	size_t item_size;
> +	int ret;
> +	int i;
> +
> +	item_size = sizeof(struct btrfs_dp_stripe) - sizeof(struct btrfs_stripe_extent) +
> +		bioc->num_stripes * sizeof(struct btrfs_stripe_extent);
> +
> +	raid_stripe = kzalloc(item_size, GFP_NOFS);
> +	if (!raid_stripe) {
> +		btrfs_abort_transaction(trans, -ENOMEM);
> +		return;
> +	}
> +
> +	stripe_extent = &raid_stripe->extents;
> +	for (i = 0; i  < bioc->num_stripes; i++) {
> +		u64 devid = bioc->stripes[i].dev->devid;
> +		u64 physical = bioc->stripes[i].physical;
> +
> +		btrfs_set_stack_stripe_extent_devid(stripe_extent, devid);
> +		btrfs_set_stack_stripe_extent_offset(stripe_extent, physical);
> +		stripe_extent++;
> +	}
> +
> +	stripe_key.objectid = bioc->logical;
> +	stripe_key.type = BTRFS_RAID_STRIPE_KEY;
> +	stripe_key.offset = bioc->length;
> +
> +	ret = btrfs_insert_item(trans, stripe_root, &stripe_key, raid_stripe,
> +				item_size);
> +	if (ret) {
> +		kfree(raid_stripe);
> +		btrfs_abort_transaction(trans, ret);
> +		return;
> +	}
> +
> +	kfree(raid_stripe);
> +}
> +
> +void btrfs_raid_stripe_tree_fn(struct work_struct *work)
> +{
> +	struct btrfs_io_context *bioc;
> +	struct btrfs_fs_info *fs_info;
> +	struct btrfs_root *root;
> +	struct btrfs_trans_handle *trans = NULL;
> +
> +	bioc = container_of(work, struct btrfs_io_context, stripe_update_work);
> +	fs_info = bioc->fs_info;
> +	root = fs_info->stripe_root;
> +
> +	trans = btrfs_join_transaction(root);
> +
> +	btrfs_insert_raid_extent(trans, bioc);
> +	btrfs_end_transaction(trans);
> +
> +	btrfs_put_bioc(bioc);
> +}
> diff --git a/fs/btrfs/raid-stripe-tree.h b/fs/btrfs/raid-stripe-tree.h
> new file mode 100644
> index 000000000000..320a110ecc66
> --- /dev/null
> +++ b/fs/btrfs/raid-stripe-tree.h
> @@ -0,0 +1,28 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +
> +#ifndef BTRFS_RAID_STRIPE_TREE_H
> +#define BTRFS_RAID_STRIPE_TREE_H
> +
> +#include "volumes.h"
> +
> +void btrfs_raid_stripe_tree_fn(struct work_struct *work);
> +
> +static inline bool btrfs_need_stripe_tree_update(struct btrfs_io_context *bioc)
> +{
> +	u64 type = bioc->map_type & BTRFS_BLOCK_GROUP_TYPE_MASK;
> +	u64 profile = bioc->map_type & BTRFS_BLOCK_GROUP_PROFILE_MASK;
> +
> +	if (!bioc->fs_info->stripe_root)
> +		return false;
> +
> +	// for now
> +	if (type != BTRFS_BLOCK_GROUP_DATA)
> +		return false;

OK, for now it's indeed excluding metadata/sys chunks from stripe tree.

That's fine for now.

But this really brings the problem of bootstrap, thus I'm afraid that we
may not support metadata/data for stripe tree mapped chunks forever.


This also brings a new problem to us, if we plan to make stripe tree
work for metadata/sys, despite the bootstrap problem, we also need to
determine if stripe tree is something global, or per-chunk.

a) Global switch for stripe tree

If global, then every data chunk needs to be stripe-mapped, or we build
a complex stripe-tree supported chunk type list.

In fact, currently the btrfs_need_stripe_tree_update() is already doing
that.

Without a proper on-disk indicate, we can never really do stable support
for new stripe-tree support on other profiles.

b) Per-chunk type stripe tree

Then we need an extra type/flag for chunks/block groups to indicate that
any read/write into the chunk needs stripe tree update.

This allows us to support different chunk types with stripe tree, but
needs more complex on-disk change, other than just a simple global flag.

Thanks,
Qu

> +
> +	if (profile & BTRFS_BLOCK_GROUP_RAID1_MASK)
> +		return true;
> +
> +	return false;
> +}
> +
> +#endif
> diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
> index 3fd17e87815a..36acef2ae5d8 100644
> --- a/fs/btrfs/volumes.c
> +++ b/fs/btrfs/volumes.c
> @@ -33,6 +33,7 @@
>   #include "block-group.h"
>   #include "discard.h"
>   #include "zoned.h"
> +#include "raid-stripe-tree.h"
>
>   #define BTRFS_BLOCK_GROUP_STRIPE_MASK	(BTRFS_BLOCK_GROUP_RAID0 | \
>   					 BTRFS_BLOCK_GROUP_RAID10 | \
> @@ -5917,6 +5918,7 @@ static struct btrfs_io_context *alloc_btrfs_io_context(struct btrfs_fs_info *fs_
>   	bioc->fs_info = fs_info;
>   	bioc->tgtdev_map = (int *)(bioc->stripes + total_stripes);
>   	bioc->raid_map = (u64 *)(bioc->tgtdev_map + real_stripes);
> +	INIT_WORK(&bioc->stripe_update_work, btrfs_raid_stripe_tree_fn);
>
>   	return bioc;
>   }
> @@ -6677,6 +6679,17 @@ static void btrfs_end_bio(struct bio *bio)
>   		}
>   	}
>
> +	if (bio_op(bio) == REQ_OP_ZONE_APPEND) {
> +		int i;
> +
> +		for (i = 0; i < bioc->num_stripes; i++) {
> +			if (bioc->stripes[i].dev->bdev != bio->bi_bdev)
> +				continue;
> +			bioc->stripes[i].physical = bio->bi_iter.bi_sector << SECTOR_SHIFT;
> +		}
> +	}
> +
> +
>   	if (bio == bioc->orig_bio)
>   		is_orig_bio = 1;
>
> @@ -6700,6 +6713,12 @@ static void btrfs_end_bio(struct bio *bio)
>   			 * go over the max number of errors
>   			 */
>   			bio->bi_status = BLK_STS_OK;
> +
> +			if (btrfs_op(bio) == BTRFS_MAP_WRITE &&
> +			    btrfs_need_stripe_tree_update(bioc)) {
> +				btrfs_get_bioc(bioc);
> +				schedule_work(&bioc->stripe_update_work);
> +			}
>   		}
>
>   		btrfs_end_bioc(bioc, bio);
> @@ -6788,6 +6807,8 @@ blk_status_t btrfs_map_bio(struct btrfs_fs_info *fs_info, struct bio *bio,
>   	bioc->orig_bio = first_bio;
>   	bioc->private = first_bio->bi_private;
>   	bioc->end_io = first_bio->bi_end_io;
> +	bioc->logical = logical;
> +	bioc->length = length;
>   	atomic_set(&bioc->stripes_pending, bioc->num_stripes);
>
>   	if ((bioc->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK) &&
> diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
> index 894d289a3b50..4b4235b4432a 100644
> --- a/fs/btrfs/volumes.h
> +++ b/fs/btrfs/volumes.h
> @@ -68,6 +68,9 @@ struct btrfs_io_context {
>   	int mirror_num;
>   	int num_tgtdevs;
>   	int *tgtdev_map;
> +	u64 logical;
> +	u64 length;
> +	struct work_struct stripe_update_work;
>   	/*
>   	 * logical block numbers for the start of each stripe
>   	 * The last one or two are p/q.  These are sorted,
Qu Wenruo May 17, 2022, 8 a.m. UTC | #2
On 2022/5/16 22:31, Johannes Thumshirn wrote:
> Add boilerplate code to insert raid extents into the raid-stripe-tree on
> each write to a RAID1 block-group.
>
> Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
> ---
>   fs/btrfs/Makefile           |  2 +-
>   fs/btrfs/raid-stripe-tree.c | 72 +++++++++++++++++++++++++++++++++++++
>   fs/btrfs/raid-stripe-tree.h | 28 +++++++++++++++
>   fs/btrfs/volumes.c          | 21 +++++++++++
>   fs/btrfs/volumes.h          |  3 ++
>   5 files changed, 125 insertions(+), 1 deletion(-)
>   create mode 100644 fs/btrfs/raid-stripe-tree.c
>   create mode 100644 fs/btrfs/raid-stripe-tree.h
>
> diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile
> index 4188ba3fd8c3..6b9a00ad532a 100644
> --- a/fs/btrfs/Makefile
> +++ b/fs/btrfs/Makefile
> @@ -30,7 +30,7 @@ btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \
>   	   backref.o ulist.o qgroup.o send.o dev-replace.o raid56.o \
>   	   uuid-tree.o props.o free-space-tree.o tree-checker.o space-info.o \
>   	   block-rsv.o delalloc-space.o block-group.o discard.o reflink.o \
> -	   subpage.o tree-mod-log.o
> +	   subpage.o tree-mod-log.o raid-stripe-tree.o
>
>   btrfs-$(CONFIG_BTRFS_FS_POSIX_ACL) += acl.o
>   btrfs-$(CONFIG_BTRFS_FS_CHECK_INTEGRITY) += check-integrity.o
> diff --git a/fs/btrfs/raid-stripe-tree.c b/fs/btrfs/raid-stripe-tree.c
> new file mode 100644
> index 000000000000..426066bd7c0d
> --- /dev/null
> +++ b/fs/btrfs/raid-stripe-tree.c
> @@ -0,0 +1,72 @@
> +// SPDX-License-Identifier: GPL-2.0
> +
> +#include "ctree.h"
> +#include "transaction.h"
> +#include "disk-io.h"
> +#include "raid-stripe-tree.h"
> +#include "volumes.h"
> +
> +static void btrfs_insert_raid_extent(struct btrfs_trans_handle *trans,
> +				     struct btrfs_io_context *bioc)
> +{
> +	struct btrfs_fs_info *fs_info = bioc->fs_info;
> +	struct btrfs_key stripe_key;
> +	struct btrfs_root *stripe_root = fs_info->stripe_root;
> +	struct btrfs_dp_stripe *raid_stripe;
> +	struct btrfs_stripe_extent *stripe_extent;
> +	size_t item_size;
> +	int ret;
> +	int i;
> +
> +	item_size = sizeof(struct btrfs_dp_stripe) - sizeof(struct btrfs_stripe_extent) +
> +		bioc->num_stripes * sizeof(struct btrfs_stripe_extent);
> +
> +	raid_stripe = kzalloc(item_size, GFP_NOFS);
> +	if (!raid_stripe) {
> +		btrfs_abort_transaction(trans, -ENOMEM);
> +		return;
> +	}
> +
> +	stripe_extent = &raid_stripe->extents;
> +	for (i = 0; i  < bioc->num_stripes; i++) {
> +		u64 devid = bioc->stripes[i].dev->devid;
> +		u64 physical = bioc->stripes[i].physical;
> +
> +		btrfs_set_stack_stripe_extent_devid(stripe_extent, devid);
> +		btrfs_set_stack_stripe_extent_offset(stripe_extent, physical);
> +		stripe_extent++;
> +	}
> +
> +	stripe_key.objectid = bioc->logical;
> +	stripe_key.type = BTRFS_RAID_STRIPE_KEY;
> +	stripe_key.offset = bioc->length;
> +
> +	ret = btrfs_insert_item(trans, stripe_root, &stripe_key, raid_stripe,
> +				item_size);
> +	if (ret) {
> +		kfree(raid_stripe);
> +		btrfs_abort_transaction(trans, ret);
> +		return;
> +	}
> +
> +	kfree(raid_stripe);
> +}
> +
> +void btrfs_raid_stripe_tree_fn(struct work_struct *work)
> +{
> +	struct btrfs_io_context *bioc;
> +	struct btrfs_fs_info *fs_info;
> +	struct btrfs_root *root;
> +	struct btrfs_trans_handle *trans = NULL;
> +
> +	bioc = container_of(work, struct btrfs_io_context, stripe_update_work);
> +	fs_info = bioc->fs_info;
> +	root = fs_info->stripe_root;
> +
> +	trans = btrfs_join_transaction(root);
> +
> +	btrfs_insert_raid_extent(trans, bioc);
> +	btrfs_end_transaction(trans);
> +
> +	btrfs_put_bioc(bioc);
> +}
> diff --git a/fs/btrfs/raid-stripe-tree.h b/fs/btrfs/raid-stripe-tree.h
> new file mode 100644
> index 000000000000..320a110ecc66
> --- /dev/null
> +++ b/fs/btrfs/raid-stripe-tree.h
> @@ -0,0 +1,28 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +
> +#ifndef BTRFS_RAID_STRIPE_TREE_H
> +#define BTRFS_RAID_STRIPE_TREE_H
> +
> +#include "volumes.h"
> +
> +void btrfs_raid_stripe_tree_fn(struct work_struct *work);
> +
> +static inline bool btrfs_need_stripe_tree_update(struct btrfs_io_context *bioc)
> +{
> +	u64 type = bioc->map_type & BTRFS_BLOCK_GROUP_TYPE_MASK;
> +	u64 profile = bioc->map_type & BTRFS_BLOCK_GROUP_PROFILE_MASK;
> +
> +	if (!bioc->fs_info->stripe_root)
> +		return false;
> +
> +	// for now
> +	if (type != BTRFS_BLOCK_GROUP_DATA)
> +		return false;
> +
> +	if (profile & BTRFS_BLOCK_GROUP_RAID1_MASK)
> +		return true;
> +
> +	return false;
> +}
> +
> +#endif
> diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
> index 3fd17e87815a..36acef2ae5d8 100644
> --- a/fs/btrfs/volumes.c
> +++ b/fs/btrfs/volumes.c
> @@ -33,6 +33,7 @@
>   #include "block-group.h"
>   #include "discard.h"
>   #include "zoned.h"
> +#include "raid-stripe-tree.h"
>
>   #define BTRFS_BLOCK_GROUP_STRIPE_MASK	(BTRFS_BLOCK_GROUP_RAID0 | \
>   					 BTRFS_BLOCK_GROUP_RAID10 | \
> @@ -5917,6 +5918,7 @@ static struct btrfs_io_context *alloc_btrfs_io_context(struct btrfs_fs_info *fs_
>   	bioc->fs_info = fs_info;
>   	bioc->tgtdev_map = (int *)(bioc->stripes + total_stripes);
>   	bioc->raid_map = (u64 *)(bioc->tgtdev_map + real_stripes);
> +	INIT_WORK(&bioc->stripe_update_work, btrfs_raid_stripe_tree_fn);
>
>   	return bioc;
>   }
> @@ -6677,6 +6679,17 @@ static void btrfs_end_bio(struct bio *bio)
>   		}
>   	}
>
> +	if (bio_op(bio) == REQ_OP_ZONE_APPEND) {
> +		int i;
> +
> +		for (i = 0; i < bioc->num_stripes; i++) {
> +			if (bioc->stripes[i].dev->bdev != bio->bi_bdev)
> +				continue;
> +			bioc->stripes[i].physical = bio->bi_iter.bi_sector << SECTOR_SHIFT;
> +		}
> +	}
> +
> +
>   	if (bio == bioc->orig_bio)
>   		is_orig_bio = 1;
>
> @@ -6700,6 +6713,12 @@ static void btrfs_end_bio(struct bio *bio)
>   			 * go over the max number of errors
>   			 */
>   			bio->bi_status = BLK_STS_OK;
> +
> +			if (btrfs_op(bio) == BTRFS_MAP_WRITE &&
> +			    btrfs_need_stripe_tree_update(bioc)) {
> +				btrfs_get_bioc(bioc);
> +				schedule_work(&bioc->stripe_update_work);

Considering the stripe tree should be a 1:1 map for file extents, can't
we do it in btrfs_finish_ordered_io()?

Thanks,
Qu

> +			}
>   		}
>
>   		btrfs_end_bioc(bioc, bio);
> @@ -6788,6 +6807,8 @@ blk_status_t btrfs_map_bio(struct btrfs_fs_info *fs_info, struct bio *bio,
>   	bioc->orig_bio = first_bio;
>   	bioc->private = first_bio->bi_private;
>   	bioc->end_io = first_bio->bi_end_io;
> +	bioc->logical = logical;
> +	bioc->length = length;
>   	atomic_set(&bioc->stripes_pending, bioc->num_stripes);
>
>   	if ((bioc->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK) &&
> diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
> index 894d289a3b50..4b4235b4432a 100644
> --- a/fs/btrfs/volumes.h
> +++ b/fs/btrfs/volumes.h
> @@ -68,6 +68,9 @@ struct btrfs_io_context {
>   	int mirror_num;
>   	int num_tgtdevs;
>   	int *tgtdev_map;
> +	u64 logical;
> +	u64 length;
> +	struct work_struct stripe_update_work;
>   	/*
>   	 * logical block numbers for the start of each stripe
>   	 * The last one or two are p/q.  These are sorted,
Johannes Thumshirn May 17, 2022, 8:05 a.m. UTC | #3
On 17/05/2022 10:01, Qu Wenruo wrote:
>> @@ -6700,6 +6713,12 @@ static void btrfs_end_bio(struct bio *bio)
>>   			 * go over the max number of errors
>>   			 */
>>   			bio->bi_status = BLK_STS_OK;
>> +
>> +			if (btrfs_op(bio) == BTRFS_MAP_WRITE &&
>> +			    btrfs_need_stripe_tree_update(bioc)) {
>> +				btrfs_get_bioc(bioc);
>> +				schedule_work(&bioc->stripe_update_work);
> Considering the stripe tree should be a 1:1 map for file extents, can't
> we do it in btrfs_finish_ordered_io()?

Unfortunately not at the moment. I need the stripes[] array from
btrfs_io_context to record the per-disk physical locations. Another
possibility would be to lift this array into btrfs_ordered_extent,
then it can be done in btrfs_finish_ordered_io().
Qu Wenruo May 17, 2022, 8:09 a.m. UTC | #4
On 2022/5/17 16:05, Johannes Thumshirn wrote:
> On 17/05/2022 10:01, Qu Wenruo wrote:
>>> @@ -6700,6 +6713,12 @@ static void btrfs_end_bio(struct bio *bio)
>>>    			 * go over the max number of errors
>>>    			 */
>>>    			bio->bi_status = BLK_STS_OK;
>>> +
>>> +			if (btrfs_op(bio) == BTRFS_MAP_WRITE &&
>>> +			    btrfs_need_stripe_tree_update(bioc)) {
>>> +				btrfs_get_bioc(bioc);
>>> +				schedule_work(&bioc->stripe_update_work);
>> Considering the stripe tree should be a 1:1 map for file extents, can't
>> we do it in btrfs_finish_ordered_io()?
>
> Unfortunately not at the moment. I need the stripes[] array from
> btrfs_io_context to record the per-disk physical locations. Another
> possibility would be to lift this array into btrfs_ordered_extent,
> then it can be done in btrfs_finish_ordered_io().

At least to me, lifting it to btrfs_ordered_extent() seems more reasonable.

One problem is, if we write the stripe to stripe tree, and a trans
committed, but power loss happened before btrfs_finish_ordered_io().

Then we would have an orphan stripe item in stripe tree I guess.

Then we may later hit EEXIST doing other stripe tree operations.

Thanks,
Qu
diff mbox series

Patch

diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile
index 4188ba3fd8c3..6b9a00ad532a 100644
--- a/fs/btrfs/Makefile
+++ b/fs/btrfs/Makefile
@@ -30,7 +30,7 @@  btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \
 	   backref.o ulist.o qgroup.o send.o dev-replace.o raid56.o \
 	   uuid-tree.o props.o free-space-tree.o tree-checker.o space-info.o \
 	   block-rsv.o delalloc-space.o block-group.o discard.o reflink.o \
-	   subpage.o tree-mod-log.o
+	   subpage.o tree-mod-log.o raid-stripe-tree.o
 
 btrfs-$(CONFIG_BTRFS_FS_POSIX_ACL) += acl.o
 btrfs-$(CONFIG_BTRFS_FS_CHECK_INTEGRITY) += check-integrity.o
diff --git a/fs/btrfs/raid-stripe-tree.c b/fs/btrfs/raid-stripe-tree.c
new file mode 100644
index 000000000000..426066bd7c0d
--- /dev/null
+++ b/fs/btrfs/raid-stripe-tree.c
@@ -0,0 +1,72 @@ 
+// SPDX-License-Identifier: GPL-2.0
+
+#include "ctree.h"
+#include "transaction.h"
+#include "disk-io.h"
+#include "raid-stripe-tree.h"
+#include "volumes.h"
+
+static void btrfs_insert_raid_extent(struct btrfs_trans_handle *trans,
+				     struct btrfs_io_context *bioc)
+{
+	struct btrfs_fs_info *fs_info = bioc->fs_info;
+	struct btrfs_key stripe_key;
+	struct btrfs_root *stripe_root = fs_info->stripe_root;
+	struct btrfs_dp_stripe *raid_stripe;
+	struct btrfs_stripe_extent *stripe_extent;
+	size_t item_size;
+	int ret;
+	int i;
+
+	item_size = sizeof(struct btrfs_dp_stripe) - sizeof(struct btrfs_stripe_extent) +
+		bioc->num_stripes * sizeof(struct btrfs_stripe_extent);
+
+	raid_stripe = kzalloc(item_size, GFP_NOFS);
+	if (!raid_stripe) {
+		btrfs_abort_transaction(trans, -ENOMEM);
+		return;
+	}
+
+	stripe_extent = &raid_stripe->extents;
+	for (i = 0; i  < bioc->num_stripes; i++) {
+		u64 devid = bioc->stripes[i].dev->devid;
+		u64 physical = bioc->stripes[i].physical;
+
+		btrfs_set_stack_stripe_extent_devid(stripe_extent, devid);
+		btrfs_set_stack_stripe_extent_offset(stripe_extent, physical);
+		stripe_extent++;
+	}
+
+	stripe_key.objectid = bioc->logical;
+	stripe_key.type = BTRFS_RAID_STRIPE_KEY;
+	stripe_key.offset = bioc->length;
+
+	ret = btrfs_insert_item(trans, stripe_root, &stripe_key, raid_stripe,
+				item_size);
+	if (ret) {
+		kfree(raid_stripe);
+		btrfs_abort_transaction(trans, ret);
+		return;
+	}
+
+	kfree(raid_stripe);
+}
+
+void btrfs_raid_stripe_tree_fn(struct work_struct *work)
+{
+	struct btrfs_io_context *bioc;
+	struct btrfs_fs_info *fs_info;
+	struct btrfs_root *root;
+	struct btrfs_trans_handle *trans = NULL;
+
+	bioc = container_of(work, struct btrfs_io_context, stripe_update_work);
+	fs_info = bioc->fs_info;
+	root = fs_info->stripe_root;
+
+	trans = btrfs_join_transaction(root);
+
+	btrfs_insert_raid_extent(trans, bioc);
+	btrfs_end_transaction(trans);
+
+	btrfs_put_bioc(bioc);
+}
diff --git a/fs/btrfs/raid-stripe-tree.h b/fs/btrfs/raid-stripe-tree.h
new file mode 100644
index 000000000000..320a110ecc66
--- /dev/null
+++ b/fs/btrfs/raid-stripe-tree.h
@@ -0,0 +1,28 @@ 
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef BTRFS_RAID_STRIPE_TREE_H
+#define BTRFS_RAID_STRIPE_TREE_H
+
+#include "volumes.h"
+
+void btrfs_raid_stripe_tree_fn(struct work_struct *work);
+
+static inline bool btrfs_need_stripe_tree_update(struct btrfs_io_context *bioc)
+{
+	u64 type = bioc->map_type & BTRFS_BLOCK_GROUP_TYPE_MASK;
+	u64 profile = bioc->map_type & BTRFS_BLOCK_GROUP_PROFILE_MASK;
+
+	if (!bioc->fs_info->stripe_root)
+		return false;
+
+	// for now
+	if (type != BTRFS_BLOCK_GROUP_DATA)
+		return false;
+
+	if (profile & BTRFS_BLOCK_GROUP_RAID1_MASK)
+		return true;
+
+	return false;
+}
+
+#endif
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 3fd17e87815a..36acef2ae5d8 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -33,6 +33,7 @@ 
 #include "block-group.h"
 #include "discard.h"
 #include "zoned.h"
+#include "raid-stripe-tree.h"
 
 #define BTRFS_BLOCK_GROUP_STRIPE_MASK	(BTRFS_BLOCK_GROUP_RAID0 | \
 					 BTRFS_BLOCK_GROUP_RAID10 | \
@@ -5917,6 +5918,7 @@  static struct btrfs_io_context *alloc_btrfs_io_context(struct btrfs_fs_info *fs_
 	bioc->fs_info = fs_info;
 	bioc->tgtdev_map = (int *)(bioc->stripes + total_stripes);
 	bioc->raid_map = (u64 *)(bioc->tgtdev_map + real_stripes);
+	INIT_WORK(&bioc->stripe_update_work, btrfs_raid_stripe_tree_fn);
 
 	return bioc;
 }
@@ -6677,6 +6679,17 @@  static void btrfs_end_bio(struct bio *bio)
 		}
 	}
 
+	if (bio_op(bio) == REQ_OP_ZONE_APPEND) {
+		int i;
+
+		for (i = 0; i < bioc->num_stripes; i++) {
+			if (bioc->stripes[i].dev->bdev != bio->bi_bdev)
+				continue;
+			bioc->stripes[i].physical = bio->bi_iter.bi_sector << SECTOR_SHIFT;
+		}
+	}
+
+
 	if (bio == bioc->orig_bio)
 		is_orig_bio = 1;
 
@@ -6700,6 +6713,12 @@  static void btrfs_end_bio(struct bio *bio)
 			 * go over the max number of errors
 			 */
 			bio->bi_status = BLK_STS_OK;
+
+			if (btrfs_op(bio) == BTRFS_MAP_WRITE &&
+			    btrfs_need_stripe_tree_update(bioc)) {
+				btrfs_get_bioc(bioc);
+				schedule_work(&bioc->stripe_update_work);
+			}
 		}
 
 		btrfs_end_bioc(bioc, bio);
@@ -6788,6 +6807,8 @@  blk_status_t btrfs_map_bio(struct btrfs_fs_info *fs_info, struct bio *bio,
 	bioc->orig_bio = first_bio;
 	bioc->private = first_bio->bi_private;
 	bioc->end_io = first_bio->bi_end_io;
+	bioc->logical = logical;
+	bioc->length = length;
 	atomic_set(&bioc->stripes_pending, bioc->num_stripes);
 
 	if ((bioc->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK) &&
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 894d289a3b50..4b4235b4432a 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -68,6 +68,9 @@  struct btrfs_io_context {
 	int mirror_num;
 	int num_tgtdevs;
 	int *tgtdev_map;
+	u64 logical;
+	u64 length;
+	struct work_struct stripe_update_work;
 	/*
 	 * logical block numbers for the start of each stripe
 	 * The last one or two are p/q.  These are sorted,