diff mbox series

[v9,21/41] btrfs: use bio_add_zone_append_page for zoned btrfs

Message ID ad4c16f2fff58ea4c6bd034e782b1c354521d696.1604065695.git.naohiro.aota@wdc.com (mailing list archive)
State New, archived
Headers show
Series btrfs: zoned block device support | expand

Commit Message

Naohiro Aota Oct. 30, 2020, 1:51 p.m. UTC
Zoned device has its own hardware restrictions e.g. max_zone_append_size
when using REQ_OP_ZONE_APPEND. To follow the restrictions, use
bio_add_zone_append_page() instead of bio_add_page(). We need target device
to use bio_add_zone_append_page(), so this commit reads the chunk
information to memoize the target device to btrfs_io_bio(bio)->device.

Currently, zoned btrfs only supports SINGLE profile. In the feature,
btrfs_io_bio can hold extent_map and check the restrictions for all the
devices the bio will be mapped.

Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com>
---
 fs/btrfs/extent_io.c | 37 ++++++++++++++++++++++++++++++++++---
 1 file changed, 34 insertions(+), 3 deletions(-)

Comments

Josef Bacik Nov. 3, 2020, 2:55 p.m. UTC | #1
On 10/30/20 9:51 AM, Naohiro Aota wrote:
> Zoned device has its own hardware restrictions e.g. max_zone_append_size
> when using REQ_OP_ZONE_APPEND. To follow the restrictions, use
> bio_add_zone_append_page() instead of bio_add_page(). We need target device
> to use bio_add_zone_append_page(), so this commit reads the chunk
> information to memoize the target device to btrfs_io_bio(bio)->device.
> 
> Currently, zoned btrfs only supports SINGLE profile. In the feature,
> btrfs_io_bio can hold extent_map and check the restrictions for all the
> devices the bio will be mapped.
> 
> Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com>
> ---
>   fs/btrfs/extent_io.c | 37 ++++++++++++++++++++++++++++++++++---
>   1 file changed, 34 insertions(+), 3 deletions(-)
> 
> diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
> index 17285048fb5a..764257eb658f 100644
> --- a/fs/btrfs/extent_io.c
> +++ b/fs/btrfs/extent_io.c
> @@ -3032,6 +3032,7 @@ bool btrfs_bio_add_page(struct bio *bio, struct page *page, u64 logical,
>   {
>   	sector_t sector = logical >> SECTOR_SHIFT;
>   	bool contig;
> +	int ret;
>   
>   	if (prev_bio_flags != bio_flags)
>   		return false;
> @@ -3046,7 +3047,19 @@ bool btrfs_bio_add_page(struct bio *bio, struct page *page, u64 logical,
>   	if (btrfs_bio_fits_in_stripe(page, size, bio, bio_flags))
>   		return false;
>   
> -	return bio_add_page(bio, page, size, pg_offset) == size;
> +	if (bio_op(bio) == REQ_OP_ZONE_APPEND) {
> +		struct bio orig_bio;
> +
> +		memset(&orig_bio, 0, sizeof(orig_bio));
> +		bio_copy_dev(&orig_bio, bio);
> +		bio_set_dev(bio, btrfs_io_bio(bio)->device->bdev);
> +		ret = bio_add_zone_append_page(bio, page, size, pg_offset);
> +		bio_copy_dev(bio, &orig_bio);

Why do we need this in the first place, since we're only supporting single right 
now?  latest_bdev should be the same, so this serves no purpose, right?

And if it does, we need to figure out another solution, because right now this 
leaks references to the bio->bi_blkg, each copy inc's the refcount on the blkg 
for that bio so we're going to leak blkg's like crazy here.  Thanks,

Josef
Naohiro Aota Nov. 10, 2020, 10:42 a.m. UTC | #2
On Tue, Nov 03, 2020 at 09:55:49AM -0500, Josef Bacik wrote:
>On 10/30/20 9:51 AM, Naohiro Aota wrote:
>>Zoned device has its own hardware restrictions e.g. max_zone_append_size
>>when using REQ_OP_ZONE_APPEND. To follow the restrictions, use
>>bio_add_zone_append_page() instead of bio_add_page(). We need target device
>>to use bio_add_zone_append_page(), so this commit reads the chunk
>>information to memoize the target device to btrfs_io_bio(bio)->device.
>>
>>Currently, zoned btrfs only supports SINGLE profile. In the feature,
>>btrfs_io_bio can hold extent_map and check the restrictions for all the
>>devices the bio will be mapped.
>>
>>Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com>
>>---
>>  fs/btrfs/extent_io.c | 37 ++++++++++++++++++++++++++++++++++---
>>  1 file changed, 34 insertions(+), 3 deletions(-)
>>
>>diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
>>index 17285048fb5a..764257eb658f 100644
>>--- a/fs/btrfs/extent_io.c
>>+++ b/fs/btrfs/extent_io.c
>>@@ -3032,6 +3032,7 @@ bool btrfs_bio_add_page(struct bio *bio, struct page *page, u64 logical,
>>  {
>>  	sector_t sector = logical >> SECTOR_SHIFT;
>>  	bool contig;
>>+	int ret;
>>  	if (prev_bio_flags != bio_flags)
>>  		return false;
>>@@ -3046,7 +3047,19 @@ bool btrfs_bio_add_page(struct bio *bio, struct page *page, u64 logical,
>>  	if (btrfs_bio_fits_in_stripe(page, size, bio, bio_flags))
>>  		return false;
>>-	return bio_add_page(bio, page, size, pg_offset) == size;
>>+	if (bio_op(bio) == REQ_OP_ZONE_APPEND) {
>>+		struct bio orig_bio;
>>+
>>+		memset(&orig_bio, 0, sizeof(orig_bio));
>>+		bio_copy_dev(&orig_bio, bio);
>>+		bio_set_dev(bio, btrfs_io_bio(bio)->device->bdev);
>>+		ret = bio_add_zone_append_page(bio, page, size, pg_offset);
>>+		bio_copy_dev(bio, &orig_bio);
>
>Why do we need this in the first place, since we're only supporting 
>single right now?  latest_bdev should be the same, so this serves no 
>purpose, right?
>
>And if it does, we need to figure out another solution, because right 
>now this leaks references to the bio->bi_blkg, each copy inc's the 
>refcount on the blkg for that bio so we're going to leak blkg's like 
>crazy here.  Thanks,
>
>Josef

True. I just tried to make it multipe device ready as much as possible.

I'll drop this part for now, and will fix this when we add multipe device
support.
diff mbox series

Patch

diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 17285048fb5a..764257eb658f 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -3032,6 +3032,7 @@  bool btrfs_bio_add_page(struct bio *bio, struct page *page, u64 logical,
 {
 	sector_t sector = logical >> SECTOR_SHIFT;
 	bool contig;
+	int ret;
 
 	if (prev_bio_flags != bio_flags)
 		return false;
@@ -3046,7 +3047,19 @@  bool btrfs_bio_add_page(struct bio *bio, struct page *page, u64 logical,
 	if (btrfs_bio_fits_in_stripe(page, size, bio, bio_flags))
 		return false;
 
-	return bio_add_page(bio, page, size, pg_offset) == size;
+	if (bio_op(bio) == REQ_OP_ZONE_APPEND) {
+		struct bio orig_bio;
+
+		memset(&orig_bio, 0, sizeof(orig_bio));
+		bio_copy_dev(&orig_bio, bio);
+		bio_set_dev(bio, btrfs_io_bio(bio)->device->bdev);
+		ret = bio_add_zone_append_page(bio, page, size, pg_offset);
+		bio_copy_dev(bio, &orig_bio);
+	} else {
+		ret = bio_add_page(bio, page, size, pg_offset);
+	}
+
+	return ret == size;
 }
 
 /*
@@ -3077,7 +3090,9 @@  static int submit_extent_page(unsigned int opf,
 	int ret = 0;
 	struct bio *bio;
 	size_t page_size = min_t(size_t, size, PAGE_SIZE);
-	struct extent_io_tree *tree = &BTRFS_I(page->mapping->host)->io_tree;
+	struct btrfs_inode *inode = BTRFS_I(page->mapping->host);
+	struct extent_io_tree *tree = &inode->io_tree;
+	struct btrfs_fs_info *fs_info = inode->root->fs_info;
 
 	ASSERT(bio_ret);
 
@@ -3108,11 +3123,27 @@  static int submit_extent_page(unsigned int opf,
 	if (wbc) {
 		struct block_device *bdev;
 
-		bdev = BTRFS_I(page->mapping->host)->root->fs_info->fs_devices->latest_bdev;
+		bdev = fs_info->fs_devices->latest_bdev;
 		bio_set_dev(bio, bdev);
 		wbc_init_bio(wbc, bio);
 		wbc_account_cgroup_owner(wbc, page, page_size);
 	}
+	if (btrfs_is_zoned(fs_info) &&
+	    bio_op(bio) == REQ_OP_ZONE_APPEND) {
+		struct extent_map *em;
+		struct map_lookup *map;
+
+		em = btrfs_get_chunk_map(fs_info, offset, page_size);
+		if (IS_ERR(em))
+			return PTR_ERR(em);
+
+		map = em->map_lookup;
+		/* only support SINGLE profile for now */
+		ASSERT(map->num_stripes == 1);
+		btrfs_io_bio(bio)->device = map->stripes[0].dev;
+
+		free_extent_map(em);
+	}
 
 	*bio_ret = bio;