Message ID | 35ea1d22a55d8dd30bc9f9dfcd4a48890bf7feaf.1652711187.git.johannes.thumshirn@wdc.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | btrfs: introduce raid-stripe-tree | expand |
On 2022/5/16 22:31, Johannes Thumshirn wrote: > Add boilerplate code to insert raid extents into the raid-stripe-tree on > each write to a RAID1 block-group. > > Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com> > --- > fs/btrfs/Makefile | 2 +- > fs/btrfs/raid-stripe-tree.c | 72 +++++++++++++++++++++++++++++++++++++ > fs/btrfs/raid-stripe-tree.h | 28 +++++++++++++++ > fs/btrfs/volumes.c | 21 +++++++++++ > fs/btrfs/volumes.h | 3 ++ > 5 files changed, 125 insertions(+), 1 deletion(-) > create mode 100644 fs/btrfs/raid-stripe-tree.c > create mode 100644 fs/btrfs/raid-stripe-tree.h > > diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile > index 4188ba3fd8c3..6b9a00ad532a 100644 > --- a/fs/btrfs/Makefile > +++ b/fs/btrfs/Makefile > @@ -30,7 +30,7 @@ btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \ > backref.o ulist.o qgroup.o send.o dev-replace.o raid56.o \ > uuid-tree.o props.o free-space-tree.o tree-checker.o space-info.o \ > block-rsv.o delalloc-space.o block-group.o discard.o reflink.o \ > - subpage.o tree-mod-log.o > + subpage.o tree-mod-log.o raid-stripe-tree.o > > btrfs-$(CONFIG_BTRFS_FS_POSIX_ACL) += acl.o > btrfs-$(CONFIG_BTRFS_FS_CHECK_INTEGRITY) += check-integrity.o > diff --git a/fs/btrfs/raid-stripe-tree.c b/fs/btrfs/raid-stripe-tree.c > new file mode 100644 > index 000000000000..426066bd7c0d > --- /dev/null > +++ b/fs/btrfs/raid-stripe-tree.c > @@ -0,0 +1,72 @@ > +// SPDX-License-Identifier: GPL-2.0 > + > +#include "ctree.h" > +#include "transaction.h" > +#include "disk-io.h" > +#include "raid-stripe-tree.h" > +#include "volumes.h" > + > +static void btrfs_insert_raid_extent(struct btrfs_trans_handle *trans, > + struct btrfs_io_context *bioc) > +{ > + struct btrfs_fs_info *fs_info = bioc->fs_info; > + struct btrfs_key stripe_key; > + struct btrfs_root *stripe_root = fs_info->stripe_root; > + struct btrfs_dp_stripe *raid_stripe; > + struct btrfs_stripe_extent *stripe_extent; > + size_t item_size; > + int ret; > + int i; > + > + item_size = sizeof(struct btrfs_dp_stripe) - sizeof(struct btrfs_stripe_extent) + > + bioc->num_stripes * sizeof(struct btrfs_stripe_extent); > + > + raid_stripe = kzalloc(item_size, GFP_NOFS); > + if (!raid_stripe) { > + btrfs_abort_transaction(trans, -ENOMEM); > + return; > + } > + > + stripe_extent = &raid_stripe->extents; > + for (i = 0; i < bioc->num_stripes; i++) { > + u64 devid = bioc->stripes[i].dev->devid; > + u64 physical = bioc->stripes[i].physical; > + > + btrfs_set_stack_stripe_extent_devid(stripe_extent, devid); > + btrfs_set_stack_stripe_extent_offset(stripe_extent, physical); > + stripe_extent++; > + } > + > + stripe_key.objectid = bioc->logical; > + stripe_key.type = BTRFS_RAID_STRIPE_KEY; > + stripe_key.offset = bioc->length; > + > + ret = btrfs_insert_item(trans, stripe_root, &stripe_key, raid_stripe, > + item_size); > + if (ret) { > + kfree(raid_stripe); > + btrfs_abort_transaction(trans, ret); > + return; > + } > + > + kfree(raid_stripe); > +} > + > +void btrfs_raid_stripe_tree_fn(struct work_struct *work) > +{ > + struct btrfs_io_context *bioc; > + struct btrfs_fs_info *fs_info; > + struct btrfs_root *root; > + struct btrfs_trans_handle *trans = NULL; > + > + bioc = container_of(work, struct btrfs_io_context, stripe_update_work); > + fs_info = bioc->fs_info; > + root = fs_info->stripe_root; > + > + trans = btrfs_join_transaction(root); > + > + btrfs_insert_raid_extent(trans, bioc); > + btrfs_end_transaction(trans); > + > + btrfs_put_bioc(bioc); > +} > diff --git a/fs/btrfs/raid-stripe-tree.h b/fs/btrfs/raid-stripe-tree.h > new file mode 100644 > index 000000000000..320a110ecc66 > --- /dev/null > +++ b/fs/btrfs/raid-stripe-tree.h > @@ -0,0 +1,28 @@ > +/* SPDX-License-Identifier: GPL-2.0 */ > + > +#ifndef BTRFS_RAID_STRIPE_TREE_H > +#define BTRFS_RAID_STRIPE_TREE_H > + > +#include "volumes.h" > + > +void btrfs_raid_stripe_tree_fn(struct work_struct *work); > + > +static inline bool btrfs_need_stripe_tree_update(struct btrfs_io_context *bioc) > +{ > + u64 type = bioc->map_type & BTRFS_BLOCK_GROUP_TYPE_MASK; > + u64 profile = bioc->map_type & BTRFS_BLOCK_GROUP_PROFILE_MASK; > + > + if (!bioc->fs_info->stripe_root) > + return false; > + > + // for now > + if (type != BTRFS_BLOCK_GROUP_DATA) > + return false; OK, for now it's indeed excluding metadata/sys chunks from stripe tree. That's fine for now. But this really brings the problem of bootstrap, thus I'm afraid that we may not support metadata/data for stripe tree mapped chunks forever. This also brings a new problem to us, if we plan to make stripe tree work for metadata/sys, despite the bootstrap problem, we also need to determine if stripe tree is something global, or per-chunk. a) Global switch for stripe tree If global, then every data chunk needs to be stripe-mapped, or we build a complex stripe-tree supported chunk type list. In fact, currently the btrfs_need_stripe_tree_update() is already doing that. Without a proper on-disk indicate, we can never really do stable support for new stripe-tree support on other profiles. b) Per-chunk type stripe tree Then we need an extra type/flag for chunks/block groups to indicate that any read/write into the chunk needs stripe tree update. This allows us to support different chunk types with stripe tree, but needs more complex on-disk change, other than just a simple global flag. Thanks, Qu > + > + if (profile & BTRFS_BLOCK_GROUP_RAID1_MASK) > + return true; > + > + return false; > +} > + > +#endif > diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c > index 3fd17e87815a..36acef2ae5d8 100644 > --- a/fs/btrfs/volumes.c > +++ b/fs/btrfs/volumes.c > @@ -33,6 +33,7 @@ > #include "block-group.h" > #include "discard.h" > #include "zoned.h" > +#include "raid-stripe-tree.h" > > #define BTRFS_BLOCK_GROUP_STRIPE_MASK (BTRFS_BLOCK_GROUP_RAID0 | \ > BTRFS_BLOCK_GROUP_RAID10 | \ > @@ -5917,6 +5918,7 @@ static struct btrfs_io_context *alloc_btrfs_io_context(struct btrfs_fs_info *fs_ > bioc->fs_info = fs_info; > bioc->tgtdev_map = (int *)(bioc->stripes + total_stripes); > bioc->raid_map = (u64 *)(bioc->tgtdev_map + real_stripes); > + INIT_WORK(&bioc->stripe_update_work, btrfs_raid_stripe_tree_fn); > > return bioc; > } > @@ -6677,6 +6679,17 @@ static void btrfs_end_bio(struct bio *bio) > } > } > > + if (bio_op(bio) == REQ_OP_ZONE_APPEND) { > + int i; > + > + for (i = 0; i < bioc->num_stripes; i++) { > + if (bioc->stripes[i].dev->bdev != bio->bi_bdev) > + continue; > + bioc->stripes[i].physical = bio->bi_iter.bi_sector << SECTOR_SHIFT; > + } > + } > + > + > if (bio == bioc->orig_bio) > is_orig_bio = 1; > > @@ -6700,6 +6713,12 @@ static void btrfs_end_bio(struct bio *bio) > * go over the max number of errors > */ > bio->bi_status = BLK_STS_OK; > + > + if (btrfs_op(bio) == BTRFS_MAP_WRITE && > + btrfs_need_stripe_tree_update(bioc)) { > + btrfs_get_bioc(bioc); > + schedule_work(&bioc->stripe_update_work); > + } > } > > btrfs_end_bioc(bioc, bio); > @@ -6788,6 +6807,8 @@ blk_status_t btrfs_map_bio(struct btrfs_fs_info *fs_info, struct bio *bio, > bioc->orig_bio = first_bio; > bioc->private = first_bio->bi_private; > bioc->end_io = first_bio->bi_end_io; > + bioc->logical = logical; > + bioc->length = length; > atomic_set(&bioc->stripes_pending, bioc->num_stripes); > > if ((bioc->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK) && > diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h > index 894d289a3b50..4b4235b4432a 100644 > --- a/fs/btrfs/volumes.h > +++ b/fs/btrfs/volumes.h > @@ -68,6 +68,9 @@ struct btrfs_io_context { > int mirror_num; > int num_tgtdevs; > int *tgtdev_map; > + u64 logical; > + u64 length; > + struct work_struct stripe_update_work; > /* > * logical block numbers for the start of each stripe > * The last one or two are p/q. These are sorted,
On 2022/5/16 22:31, Johannes Thumshirn wrote: > Add boilerplate code to insert raid extents into the raid-stripe-tree on > each write to a RAID1 block-group. > > Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com> > --- > fs/btrfs/Makefile | 2 +- > fs/btrfs/raid-stripe-tree.c | 72 +++++++++++++++++++++++++++++++++++++ > fs/btrfs/raid-stripe-tree.h | 28 +++++++++++++++ > fs/btrfs/volumes.c | 21 +++++++++++ > fs/btrfs/volumes.h | 3 ++ > 5 files changed, 125 insertions(+), 1 deletion(-) > create mode 100644 fs/btrfs/raid-stripe-tree.c > create mode 100644 fs/btrfs/raid-stripe-tree.h > > diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile > index 4188ba3fd8c3..6b9a00ad532a 100644 > --- a/fs/btrfs/Makefile > +++ b/fs/btrfs/Makefile > @@ -30,7 +30,7 @@ btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \ > backref.o ulist.o qgroup.o send.o dev-replace.o raid56.o \ > uuid-tree.o props.o free-space-tree.o tree-checker.o space-info.o \ > block-rsv.o delalloc-space.o block-group.o discard.o reflink.o \ > - subpage.o tree-mod-log.o > + subpage.o tree-mod-log.o raid-stripe-tree.o > > btrfs-$(CONFIG_BTRFS_FS_POSIX_ACL) += acl.o > btrfs-$(CONFIG_BTRFS_FS_CHECK_INTEGRITY) += check-integrity.o > diff --git a/fs/btrfs/raid-stripe-tree.c b/fs/btrfs/raid-stripe-tree.c > new file mode 100644 > index 000000000000..426066bd7c0d > --- /dev/null > +++ b/fs/btrfs/raid-stripe-tree.c > @@ -0,0 +1,72 @@ > +// SPDX-License-Identifier: GPL-2.0 > + > +#include "ctree.h" > +#include "transaction.h" > +#include "disk-io.h" > +#include "raid-stripe-tree.h" > +#include "volumes.h" > + > +static void btrfs_insert_raid_extent(struct btrfs_trans_handle *trans, > + struct btrfs_io_context *bioc) > +{ > + struct btrfs_fs_info *fs_info = bioc->fs_info; > + struct btrfs_key stripe_key; > + struct btrfs_root *stripe_root = fs_info->stripe_root; > + struct btrfs_dp_stripe *raid_stripe; > + struct btrfs_stripe_extent *stripe_extent; > + size_t item_size; > + int ret; > + int i; > + > + item_size = sizeof(struct btrfs_dp_stripe) - sizeof(struct btrfs_stripe_extent) + > + bioc->num_stripes * sizeof(struct btrfs_stripe_extent); > + > + raid_stripe = kzalloc(item_size, GFP_NOFS); > + if (!raid_stripe) { > + btrfs_abort_transaction(trans, -ENOMEM); > + return; > + } > + > + stripe_extent = &raid_stripe->extents; > + for (i = 0; i < bioc->num_stripes; i++) { > + u64 devid = bioc->stripes[i].dev->devid; > + u64 physical = bioc->stripes[i].physical; > + > + btrfs_set_stack_stripe_extent_devid(stripe_extent, devid); > + btrfs_set_stack_stripe_extent_offset(stripe_extent, physical); > + stripe_extent++; > + } > + > + stripe_key.objectid = bioc->logical; > + stripe_key.type = BTRFS_RAID_STRIPE_KEY; > + stripe_key.offset = bioc->length; > + > + ret = btrfs_insert_item(trans, stripe_root, &stripe_key, raid_stripe, > + item_size); > + if (ret) { > + kfree(raid_stripe); > + btrfs_abort_transaction(trans, ret); > + return; > + } > + > + kfree(raid_stripe); > +} > + > +void btrfs_raid_stripe_tree_fn(struct work_struct *work) > +{ > + struct btrfs_io_context *bioc; > + struct btrfs_fs_info *fs_info; > + struct btrfs_root *root; > + struct btrfs_trans_handle *trans = NULL; > + > + bioc = container_of(work, struct btrfs_io_context, stripe_update_work); > + fs_info = bioc->fs_info; > + root = fs_info->stripe_root; > + > + trans = btrfs_join_transaction(root); > + > + btrfs_insert_raid_extent(trans, bioc); > + btrfs_end_transaction(trans); > + > + btrfs_put_bioc(bioc); > +} > diff --git a/fs/btrfs/raid-stripe-tree.h b/fs/btrfs/raid-stripe-tree.h > new file mode 100644 > index 000000000000..320a110ecc66 > --- /dev/null > +++ b/fs/btrfs/raid-stripe-tree.h > @@ -0,0 +1,28 @@ > +/* SPDX-License-Identifier: GPL-2.0 */ > + > +#ifndef BTRFS_RAID_STRIPE_TREE_H > +#define BTRFS_RAID_STRIPE_TREE_H > + > +#include "volumes.h" > + > +void btrfs_raid_stripe_tree_fn(struct work_struct *work); > + > +static inline bool btrfs_need_stripe_tree_update(struct btrfs_io_context *bioc) > +{ > + u64 type = bioc->map_type & BTRFS_BLOCK_GROUP_TYPE_MASK; > + u64 profile = bioc->map_type & BTRFS_BLOCK_GROUP_PROFILE_MASK; > + > + if (!bioc->fs_info->stripe_root) > + return false; > + > + // for now > + if (type != BTRFS_BLOCK_GROUP_DATA) > + return false; > + > + if (profile & BTRFS_BLOCK_GROUP_RAID1_MASK) > + return true; > + > + return false; > +} > + > +#endif > diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c > index 3fd17e87815a..36acef2ae5d8 100644 > --- a/fs/btrfs/volumes.c > +++ b/fs/btrfs/volumes.c > @@ -33,6 +33,7 @@ > #include "block-group.h" > #include "discard.h" > #include "zoned.h" > +#include "raid-stripe-tree.h" > > #define BTRFS_BLOCK_GROUP_STRIPE_MASK (BTRFS_BLOCK_GROUP_RAID0 | \ > BTRFS_BLOCK_GROUP_RAID10 | \ > @@ -5917,6 +5918,7 @@ static struct btrfs_io_context *alloc_btrfs_io_context(struct btrfs_fs_info *fs_ > bioc->fs_info = fs_info; > bioc->tgtdev_map = (int *)(bioc->stripes + total_stripes); > bioc->raid_map = (u64 *)(bioc->tgtdev_map + real_stripes); > + INIT_WORK(&bioc->stripe_update_work, btrfs_raid_stripe_tree_fn); > > return bioc; > } > @@ -6677,6 +6679,17 @@ static void btrfs_end_bio(struct bio *bio) > } > } > > + if (bio_op(bio) == REQ_OP_ZONE_APPEND) { > + int i; > + > + for (i = 0; i < bioc->num_stripes; i++) { > + if (bioc->stripes[i].dev->bdev != bio->bi_bdev) > + continue; > + bioc->stripes[i].physical = bio->bi_iter.bi_sector << SECTOR_SHIFT; > + } > + } > + > + > if (bio == bioc->orig_bio) > is_orig_bio = 1; > > @@ -6700,6 +6713,12 @@ static void btrfs_end_bio(struct bio *bio) > * go over the max number of errors > */ > bio->bi_status = BLK_STS_OK; > + > + if (btrfs_op(bio) == BTRFS_MAP_WRITE && > + btrfs_need_stripe_tree_update(bioc)) { > + btrfs_get_bioc(bioc); > + schedule_work(&bioc->stripe_update_work); Considering the stripe tree should be a 1:1 map for file extents, can't we do it in btrfs_finish_ordered_io()? Thanks, Qu > + } > } > > btrfs_end_bioc(bioc, bio); > @@ -6788,6 +6807,8 @@ blk_status_t btrfs_map_bio(struct btrfs_fs_info *fs_info, struct bio *bio, > bioc->orig_bio = first_bio; > bioc->private = first_bio->bi_private; > bioc->end_io = first_bio->bi_end_io; > + bioc->logical = logical; > + bioc->length = length; > atomic_set(&bioc->stripes_pending, bioc->num_stripes); > > if ((bioc->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK) && > diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h > index 894d289a3b50..4b4235b4432a 100644 > --- a/fs/btrfs/volumes.h > +++ b/fs/btrfs/volumes.h > @@ -68,6 +68,9 @@ struct btrfs_io_context { > int mirror_num; > int num_tgtdevs; > int *tgtdev_map; > + u64 logical; > + u64 length; > + struct work_struct stripe_update_work; > /* > * logical block numbers for the start of each stripe > * The last one or two are p/q. These are sorted,
On 17/05/2022 10:01, Qu Wenruo wrote: >> @@ -6700,6 +6713,12 @@ static void btrfs_end_bio(struct bio *bio) >> * go over the max number of errors >> */ >> bio->bi_status = BLK_STS_OK; >> + >> + if (btrfs_op(bio) == BTRFS_MAP_WRITE && >> + btrfs_need_stripe_tree_update(bioc)) { >> + btrfs_get_bioc(bioc); >> + schedule_work(&bioc->stripe_update_work); > Considering the stripe tree should be a 1:1 map for file extents, can't > we do it in btrfs_finish_ordered_io()? Unfortunately not at the moment. I need the stripes[] array from btrfs_io_context to record the per-disk physical locations. Another possibility would be to lift this array into btrfs_ordered_extent, then it can be done in btrfs_finish_ordered_io().
On 2022/5/17 16:05, Johannes Thumshirn wrote: > On 17/05/2022 10:01, Qu Wenruo wrote: >>> @@ -6700,6 +6713,12 @@ static void btrfs_end_bio(struct bio *bio) >>> * go over the max number of errors >>> */ >>> bio->bi_status = BLK_STS_OK; >>> + >>> + if (btrfs_op(bio) == BTRFS_MAP_WRITE && >>> + btrfs_need_stripe_tree_update(bioc)) { >>> + btrfs_get_bioc(bioc); >>> + schedule_work(&bioc->stripe_update_work); >> Considering the stripe tree should be a 1:1 map for file extents, can't >> we do it in btrfs_finish_ordered_io()? > > Unfortunately not at the moment. I need the stripes[] array from > btrfs_io_context to record the per-disk physical locations. Another > possibility would be to lift this array into btrfs_ordered_extent, > then it can be done in btrfs_finish_ordered_io(). At least to me, lifting it to btrfs_ordered_extent() seems more reasonable. One problem is, if we write the stripe to stripe tree, and a trans committed, but power loss happened before btrfs_finish_ordered_io(). Then we would have an orphan stripe item in stripe tree I guess. Then we may later hit EEXIST doing other stripe tree operations. Thanks, Qu
diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile index 4188ba3fd8c3..6b9a00ad532a 100644 --- a/fs/btrfs/Makefile +++ b/fs/btrfs/Makefile @@ -30,7 +30,7 @@ btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \ backref.o ulist.o qgroup.o send.o dev-replace.o raid56.o \ uuid-tree.o props.o free-space-tree.o tree-checker.o space-info.o \ block-rsv.o delalloc-space.o block-group.o discard.o reflink.o \ - subpage.o tree-mod-log.o + subpage.o tree-mod-log.o raid-stripe-tree.o btrfs-$(CONFIG_BTRFS_FS_POSIX_ACL) += acl.o btrfs-$(CONFIG_BTRFS_FS_CHECK_INTEGRITY) += check-integrity.o diff --git a/fs/btrfs/raid-stripe-tree.c b/fs/btrfs/raid-stripe-tree.c new file mode 100644 index 000000000000..426066bd7c0d --- /dev/null +++ b/fs/btrfs/raid-stripe-tree.c @@ -0,0 +1,72 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include "ctree.h" +#include "transaction.h" +#include "disk-io.h" +#include "raid-stripe-tree.h" +#include "volumes.h" + +static void btrfs_insert_raid_extent(struct btrfs_trans_handle *trans, + struct btrfs_io_context *bioc) +{ + struct btrfs_fs_info *fs_info = bioc->fs_info; + struct btrfs_key stripe_key; + struct btrfs_root *stripe_root = fs_info->stripe_root; + struct btrfs_dp_stripe *raid_stripe; + struct btrfs_stripe_extent *stripe_extent; + size_t item_size; + int ret; + int i; + + item_size = sizeof(struct btrfs_dp_stripe) - sizeof(struct btrfs_stripe_extent) + + bioc->num_stripes * sizeof(struct btrfs_stripe_extent); + + raid_stripe = kzalloc(item_size, GFP_NOFS); + if (!raid_stripe) { + btrfs_abort_transaction(trans, -ENOMEM); + return; + } + + stripe_extent = &raid_stripe->extents; + for (i = 0; i < bioc->num_stripes; i++) { + u64 devid = bioc->stripes[i].dev->devid; + u64 physical = bioc->stripes[i].physical; + + btrfs_set_stack_stripe_extent_devid(stripe_extent, devid); + btrfs_set_stack_stripe_extent_offset(stripe_extent, physical); + stripe_extent++; + } + + stripe_key.objectid = bioc->logical; + stripe_key.type = BTRFS_RAID_STRIPE_KEY; + stripe_key.offset = bioc->length; + + ret = btrfs_insert_item(trans, stripe_root, &stripe_key, raid_stripe, + item_size); + if (ret) { + kfree(raid_stripe); + btrfs_abort_transaction(trans, ret); + return; + } + + kfree(raid_stripe); +} + +void btrfs_raid_stripe_tree_fn(struct work_struct *work) +{ + struct btrfs_io_context *bioc; + struct btrfs_fs_info *fs_info; + struct btrfs_root *root; + struct btrfs_trans_handle *trans = NULL; + + bioc = container_of(work, struct btrfs_io_context, stripe_update_work); + fs_info = bioc->fs_info; + root = fs_info->stripe_root; + + trans = btrfs_join_transaction(root); + + btrfs_insert_raid_extent(trans, bioc); + btrfs_end_transaction(trans); + + btrfs_put_bioc(bioc); +} diff --git a/fs/btrfs/raid-stripe-tree.h b/fs/btrfs/raid-stripe-tree.h new file mode 100644 index 000000000000..320a110ecc66 --- /dev/null +++ b/fs/btrfs/raid-stripe-tree.h @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef BTRFS_RAID_STRIPE_TREE_H +#define BTRFS_RAID_STRIPE_TREE_H + +#include "volumes.h" + +void btrfs_raid_stripe_tree_fn(struct work_struct *work); + +static inline bool btrfs_need_stripe_tree_update(struct btrfs_io_context *bioc) +{ + u64 type = bioc->map_type & BTRFS_BLOCK_GROUP_TYPE_MASK; + u64 profile = bioc->map_type & BTRFS_BLOCK_GROUP_PROFILE_MASK; + + if (!bioc->fs_info->stripe_root) + return false; + + // for now + if (type != BTRFS_BLOCK_GROUP_DATA) + return false; + + if (profile & BTRFS_BLOCK_GROUP_RAID1_MASK) + return true; + + return false; +} + +#endif diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 3fd17e87815a..36acef2ae5d8 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -33,6 +33,7 @@ #include "block-group.h" #include "discard.h" #include "zoned.h" +#include "raid-stripe-tree.h" #define BTRFS_BLOCK_GROUP_STRIPE_MASK (BTRFS_BLOCK_GROUP_RAID0 | \ BTRFS_BLOCK_GROUP_RAID10 | \ @@ -5917,6 +5918,7 @@ static struct btrfs_io_context *alloc_btrfs_io_context(struct btrfs_fs_info *fs_ bioc->fs_info = fs_info; bioc->tgtdev_map = (int *)(bioc->stripes + total_stripes); bioc->raid_map = (u64 *)(bioc->tgtdev_map + real_stripes); + INIT_WORK(&bioc->stripe_update_work, btrfs_raid_stripe_tree_fn); return bioc; } @@ -6677,6 +6679,17 @@ static void btrfs_end_bio(struct bio *bio) } } + if (bio_op(bio) == REQ_OP_ZONE_APPEND) { + int i; + + for (i = 0; i < bioc->num_stripes; i++) { + if (bioc->stripes[i].dev->bdev != bio->bi_bdev) + continue; + bioc->stripes[i].physical = bio->bi_iter.bi_sector << SECTOR_SHIFT; + } + } + + if (bio == bioc->orig_bio) is_orig_bio = 1; @@ -6700,6 +6713,12 @@ static void btrfs_end_bio(struct bio *bio) * go over the max number of errors */ bio->bi_status = BLK_STS_OK; + + if (btrfs_op(bio) == BTRFS_MAP_WRITE && + btrfs_need_stripe_tree_update(bioc)) { + btrfs_get_bioc(bioc); + schedule_work(&bioc->stripe_update_work); + } } btrfs_end_bioc(bioc, bio); @@ -6788,6 +6807,8 @@ blk_status_t btrfs_map_bio(struct btrfs_fs_info *fs_info, struct bio *bio, bioc->orig_bio = first_bio; bioc->private = first_bio->bi_private; bioc->end_io = first_bio->bi_end_io; + bioc->logical = logical; + bioc->length = length; atomic_set(&bioc->stripes_pending, bioc->num_stripes); if ((bioc->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK) && diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 894d289a3b50..4b4235b4432a 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -68,6 +68,9 @@ struct btrfs_io_context { int mirror_num; int num_tgtdevs; int *tgtdev_map; + u64 logical; + u64 length; + struct work_struct stripe_update_work; /* * logical block numbers for the start of each stripe * The last one or two are p/q. These are sorted,
Add boilerplate code to insert raid extents into the raid-stripe-tree on each write to a RAID1 block-group. Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com> --- fs/btrfs/Makefile | 2 +- fs/btrfs/raid-stripe-tree.c | 72 +++++++++++++++++++++++++++++++++++++ fs/btrfs/raid-stripe-tree.h | 28 +++++++++++++++ fs/btrfs/volumes.c | 21 +++++++++++ fs/btrfs/volumes.h | 3 ++ 5 files changed, 125 insertions(+), 1 deletion(-) create mode 100644 fs/btrfs/raid-stripe-tree.c create mode 100644 fs/btrfs/raid-stripe-tree.h