@@ -2749,6 +2749,10 @@ static void end_bio_extent_writepage(struct bio *bio)
u64 end;
struct bvec_iter_all iter_all;
+ btrfs_record_physical_zoned(bio_iovec(bio).bv_page->mapping->host,
+ page_offset(bio_iovec(bio).bv_page) + bio_iovec(bio).bv_offset,
+ bio);
+
ASSERT(!bio_flagged(bio, BIO_CLONED));
bio_for_each_segment_all(bvec, bio, iter_all) {
struct page *page = bvec->bv_page;
@@ -49,6 +49,7 @@
#include "delalloc-space.h"
#include "block-group.h"
#include "space-info.h"
+#include "zoned.h"
struct btrfs_iget_args {
u64 ino;
@@ -2198,7 +2199,13 @@ static blk_status_t btrfs_submit_bio_hook(struct inode *inode, struct bio *bio,
if (btrfs_is_free_space_inode(BTRFS_I(inode)))
metadata = BTRFS_WQ_ENDIO_FREE_SPACE;
- if (bio_op(bio) != REQ_OP_WRITE) {
+ if (bio_op(bio) == REQ_OP_WRITE && btrfs_fs_incompat(fs_info, ZONED)) {
+ /* use zone append writing */
+ bio->bi_opf &= ~REQ_OP_MASK;
+ bio->bi_opf |= REQ_OP_ZONE_APPEND;
+ }
+
+ if (bio_op(bio) != REQ_OP_WRITE && bio_op(bio) != REQ_OP_ZONE_APPEND) {
ret = btrfs_bio_wq_end_io(fs_info, bio, metadata);
if (ret)
goto out;
@@ -2594,6 +2601,9 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
bool clear_reserved_extent = true;
unsigned int clear_bits;
+ if (ordered_extent->disk)
+ btrfs_rewrite_logical_zoned(ordered_extent);
+
start = ordered_extent->file_offset;
end = start + ordered_extent->num_bytes - 1;
@@ -199,6 +199,9 @@ static int __btrfs_add_ordered_extent(struct btrfs_inode *inode, u64 file_offset
entry->compress_type = compress_type;
entry->truncated_len = (u64)-1;
entry->qgroup_rsv = ret;
+ entry->physical = (u64)-1;
+ entry->disk = NULL;
+ entry->partno = (u8)-1;
if (type != BTRFS_ORDERED_IO_DONE && type != BTRFS_ORDERED_COMPLETE)
set_bit(type, &entry->flags);
@@ -118,6 +118,10 @@ struct btrfs_ordered_extent {
struct completion completion;
struct btrfs_work flush_work;
struct list_head work_list;
+
+ u64 physical;
+ struct gendisk *disk;
+ u8 partno;
};
/*
@@ -6505,6 +6505,15 @@ static void submit_stripe_bio(struct btrfs_bio *bbio, struct bio *bio,
btrfs_io_bio(bio)->device = dev;
bio->bi_end_io = btrfs_end_bio;
bio->bi_iter.bi_sector = physical >> 9;
+ /*
+ * For zone append writing, bi_sector must point the beginning of the
+ * zone
+ */
+ if (bio_op(bio) == REQ_OP_ZONE_APPEND) {
+ u64 zone_start = round_down(physical, fs_info->zone_size);
+
+ bio->bi_iter.bi_sector = zone_start >> SECTOR_SHIFT;
+ }
btrfs_debug_in_rcu(fs_info,
"btrfs_map_bio: rw %d 0x%x, sector=%llu, dev=%lu (%s id %llu), size=%u",
bio_op(bio), bio->bi_opf, (u64)bio->bi_iter.bi_sector,
@@ -1065,3 +1065,73 @@ void btrfs_free_redirty_list(struct btrfs_transaction *trans)
}
spin_unlock(&trans->releasing_ebs_lock);
}
+
+void btrfs_record_physical_zoned(struct inode *inode, u64 file_offset,
+ struct bio *bio)
+{
+ struct btrfs_ordered_extent *ordered;
+ struct bio_vec bvec = bio_iovec(bio);
+ u64 physical = ((u64)bio->bi_iter.bi_sector << SECTOR_SHIFT) +
+ bvec.bv_offset;
+
+ if (bio_op(bio) != REQ_OP_ZONE_APPEND)
+ return;
+
+ ordered = btrfs_lookup_ordered_extent(BTRFS_I(inode), file_offset);
+ if (WARN_ON(!ordered))
+ return;
+
+ ordered->physical = physical;
+ ordered->disk = bio->bi_disk;
+ ordered->partno = bio->bi_partno;
+
+ btrfs_put_ordered_extent(ordered);
+}
+
+void btrfs_rewrite_logical_zoned(struct btrfs_ordered_extent *ordered)
+{
+ struct extent_map_tree *em_tree;
+ struct extent_map *em;
+ struct inode *inode = ordered->inode;
+ struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+ struct btrfs_ordered_sum *sum;
+ struct block_device *bdev;
+ u64 orig_logical = ordered->disk_bytenr;
+ u64 *logical = NULL;
+ int nr, stripe_len;
+
+ bdev = bdget_disk(ordered->disk, ordered->partno);
+ if (WARN_ON(!bdev))
+ return;
+
+ if (WARN_ON(__btrfs_rmap_block(fs_info, orig_logical, bdev,
+ ordered->physical, &logical, &nr,
+ &stripe_len)))
+ goto out;
+
+ WARN_ON(nr != 1);
+
+ if (orig_logical == *logical)
+ goto out;
+
+ ordered->disk_bytenr = *logical;
+
+ em_tree = &BTRFS_I(inode)->extent_tree;
+ write_lock(&em_tree->lock);
+ em = search_extent_mapping(em_tree, ordered->file_offset,
+ ordered->num_bytes);
+ em->block_start = *logical;
+ free_extent_map(em);
+ write_unlock(&em_tree->lock);
+
+ list_for_each_entry(sum, &ordered->list, list) {
+ if (*logical < orig_logical)
+ sum->bytenr -= orig_logical - *logical;
+ else
+ sum->bytenr += *logical - orig_logical;
+ }
+
+out:
+ kfree(logical);
+ bdput(bdev);
+}
@@ -50,6 +50,9 @@ void btrfs_calc_zone_unusable(struct btrfs_block_group *cache);
void btrfs_redirty_list_add(struct btrfs_transaction *trans,
struct extent_buffer *eb);
void btrfs_free_redirty_list(struct btrfs_transaction *trans);
+void btrfs_record_physical_zoned(struct inode *inode, u64 file_offset,
+ struct bio *bio);
+void btrfs_rewrite_logical_zoned(struct btrfs_ordered_extent *ordered);
#else /* CONFIG_BLK_DEV_ZONED */
static inline int btrfs_get_dev_zone(struct btrfs_device *device, u64 pos,
struct blk_zone *zone)
@@ -121,6 +124,12 @@ static inline void btrfs_calc_zone_unusable(struct btrfs_block_group *cache) { }
static inline void btrfs_redirty_list_add(struct btrfs_transaction *trans,
struct extent_buffer *eb) { }
static inline void btrfs_free_redirty_list(struct btrfs_transaction *trans) { }
+static inline void btrfs_record_physical_zoned(struct inode *inode,
+ u64 file_offset, struct bio *bio)
+{
+}
+static inline void
+btrfs_rewrite_logical_zoned(struct btrfs_ordered_extent *ordered) { }
#endif
static inline bool btrfs_dev_is_sequential(struct btrfs_device *device, u64 pos)
This commit enables zone append writing for zoned btrfs. Three parts are necessary to enable it. First, it modifies bio to use REQ_OP_ZONE_APPEND in btrfs_submit_bio_hook() and adjust the bi_sector to point the beginning of the zone. Second, it records returned physical address (and disk/partno) to the ordered extent in end_bio_extent_writepage(). Finally, it rewrites logical addresses of the extent mapping and checksum data according to the physical address (using __btrfs_rmap_block). If the returned address match to the originaly allocated address, we can skip the rewriting process. Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com> --- fs/btrfs/extent_io.c | 4 +++ fs/btrfs/inode.c | 12 ++++++- fs/btrfs/ordered-data.c | 3 ++ fs/btrfs/ordered-data.h | 4 +++ fs/btrfs/volumes.c | 9 ++++++ fs/btrfs/zoned.c | 70 +++++++++++++++++++++++++++++++++++++++++ fs/btrfs/zoned.h | 9 ++++++ 7 files changed, 110 insertions(+), 1 deletion(-)