[2/6] Btrfs: use bio_clone_bioset_partial to simplify DIO submit
diff mbox

Message ID 1492478187-24875-3-git-send-email-bo.li.liu@oracle.com
State New
Headers show

Commit Message

Liu Bo April 18, 2017, 1:16 a.m. UTC
Currently when mapping bio to limit bio to a single stripe length, we
split bio by adding page to bio one by one, but later we don't modify
the vector of bio at all, thus we can use bio_clone_fast to use the
original bio vector directly.

Signed-off-by: Liu Bo <bo.li.liu@oracle.com>
---
 fs/btrfs/extent_io.c |  15 +++++++
 fs/btrfs/extent_io.h |   1 +
 fs/btrfs/inode.c     | 122 +++++++++++++++++++--------------------------------
 3 files changed, 62 insertions(+), 76 deletions(-)

Comments

David Sterba May 11, 2017, 2:16 p.m. UTC | #1
On Mon, Apr 17, 2017 at 06:16:23PM -0700, Liu Bo wrote:
> Currently when mapping bio to limit bio to a single stripe length, we
> split bio by adding page to bio one by one, but later we don't modify
> the vector of bio at all, thus we can use bio_clone_fast to use the
> original bio vector directly.
> 
> Signed-off-by: Liu Bo <bo.li.liu@oracle.com>
> ---
>  fs/btrfs/extent_io.c |  15 +++++++
>  fs/btrfs/extent_io.h |   1 +
>  fs/btrfs/inode.c     | 122 +++++++++++++++++++--------------------------------
>  3 files changed, 62 insertions(+), 76 deletions(-)
> 
> diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
> index 0d4aea4..1b7156c 100644
> --- a/fs/btrfs/extent_io.c
> +++ b/fs/btrfs/extent_io.c
> @@ -2726,6 +2726,21 @@ struct bio *btrfs_io_bio_alloc(gfp_t gfp_mask, unsigned int nr_iovecs)
>  	return bio;
>  }
>  
> +struct bio *btrfs_bio_clone_partial(struct bio *orig, gfp_t gfp_mask, int offset, int size)
> +{
> +	struct bio *bio;
> +
> +	bio = bio_clone_fast(orig, gfp_mask, btrfs_bioset);
> +	if (bio) {

Please switch that to

	bio = ...;
	if (!bio)
		return NULL;

	(the rest)

	return bio;

> +		struct btrfs_io_bio *btrfs_bio = btrfs_io_bio(bio);
> +		btrfs_bio->csum = NULL;
> +		btrfs_bio->csum_allocated = NULL;
> +		btrfs_bio->end_io = NULL;
> +
> +		bio_trim(bio, (offset >> 9), (size >> 9));

Hm, so bio_trim also uses ints for the parameters, let's stick to that.

> +	}
> +	return bio;
> +}
>  
>  static int __must_check submit_one_bio(struct bio *bio, int mirror_num,
>  				       unsigned long bio_flags)
> diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
> index 3e4fad4..3b2bc88 100644
> --- a/fs/btrfs/extent_io.h
> +++ b/fs/btrfs/extent_io.h
> @@ -460,6 +460,7 @@ btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs,
>  		gfp_t gfp_flags);
>  struct bio *btrfs_io_bio_alloc(gfp_t gfp_mask, unsigned int nr_iovecs);
>  struct bio *btrfs_bio_clone(struct bio *bio, gfp_t gfp_mask);
> +struct bio *btrfs_bio_clone_partial(struct bio *orig, gfp_t gfp_mask, int offset, int size);

line over 80 chars

>  
>  struct btrfs_fs_info;
>  struct btrfs_inode;
> diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
> index a18510b..6215720 100644
> --- a/fs/btrfs/inode.c
> +++ b/fs/btrfs/inode.c
> @@ -8230,16 +8230,6 @@ static void btrfs_end_dio_bio(struct bio *bio)
>  	bio_put(bio);
>  }
>  
> -static struct bio *btrfs_dio_bio_alloc(struct block_device *bdev,
> -				       u64 first_sector, gfp_t gfp_flags)
> -{
> -	struct bio *bio;
> -	bio = btrfs_bio_alloc(bdev, first_sector, BIO_MAX_PAGES, gfp_flags);
> -	if (bio)
> -		bio_associate_current(bio);
> -	return bio;
> -}
> -
>  static inline int btrfs_lookup_and_bind_dio_csum(struct inode *inode,
>  						 struct btrfs_dio_private *dip,
>  						 struct bio *bio,
> @@ -8329,24 +8319,22 @@ static int btrfs_submit_direct_hook(struct btrfs_dio_private *dip,
>  	struct btrfs_root *root = BTRFS_I(inode)->root;
>  	struct bio *bio;
>  	struct bio *orig_bio = dip->orig_bio;
> -	struct bio_vec *bvec;
>  	u64 start_sector = orig_bio->bi_iter.bi_sector;
>  	u64 file_offset = dip->logical_offset;
> -	u64 submit_len = 0;
>  	u64 map_length;
> -	u32 blocksize = fs_info->sectorsize;
>  	int async_submit = 0;
> -	int nr_sectors;
> +	int submit_len;
> +	int clone_offset = 0;
> +	int clone_len;
>  	int ret;
> -	int i, j;
>  
> -	map_length = orig_bio->bi_iter.bi_size;
> +	submit_len = map_length = orig_bio->bi_iter.bi_size;

Please do 2 separate initialization statements.

>  	ret = btrfs_map_block(fs_info, btrfs_op(orig_bio), start_sector << 9,
>  			      &map_length, NULL, 0);
>  	if (ret)
>  		return -EIO;
>  
> -	if (map_length >= orig_bio->bi_iter.bi_size) {
> +	if (map_length >= submit_len) {
>  		bio = orig_bio;
>  		dip->flags |= BTRFS_DIO_ORIG_BIO_SUBMITTED;
>  		goto submit;
> @@ -8358,70 +8346,52 @@ static int btrfs_submit_direct_hook(struct btrfs_dio_private *dip,
>  	else
>  		async_submit = 1;
>  
> -	bio = btrfs_dio_bio_alloc(orig_bio->bi_bdev, start_sector, GFP_NOFS);
> -	if (!bio)
> -		return -ENOMEM;
> -
> -	bio->bi_opf = orig_bio->bi_opf;
> -	bio->bi_private = dip;
> -	bio->bi_end_io = btrfs_end_dio_bio;
> -	btrfs_io_bio(bio)->logical = file_offset;
> +	/* bio split */
>  	atomic_inc(&dip->pending_bios);
> +	while (submit_len > 0) {
> +		/* map_length < submit_len, it's a int */
> +		clone_len = min(submit_len, (int)map_length);

The types are mixed, map_length is u64 and cannot be easily switched to
int (cascading change to several btrfs functions). The other way would
require similar changes outside of btrfs.

At least please use min_t here. I'd rather see some sanity check
regarding silent trimming of map_length than just relying on the output
of btrfs_map_block.

> +		bio = btrfs_bio_clone_partial(orig_bio, GFP_NOFS, clone_offset, clone_len);
> +		if (!bio)
> +			goto out_err;
> +		/* the above clone call also clone blkcg of orig_bio */
> +
> +		bio->bi_private = dip;
> +		bio->bi_end_io = btrfs_end_dio_bio;
> +		btrfs_io_bio(bio)->logical = file_offset;
> +
> +		ASSERT(submit_len >= clone_len);
> +		submit_len -= clone_len;
> +		if (submit_len == 0)
> +			break;
>  
> -	bio_for_each_segment_all(bvec, orig_bio, j) {
> -		nr_sectors = BTRFS_BYTES_TO_BLKS(fs_info, bvec->bv_len);
> -		i = 0;
> -next_block:
> -		if (unlikely(map_length < submit_len + blocksize ||
> -		    bio_add_page(bio, bvec->bv_page, blocksize,
> -			    bvec->bv_offset + (i * blocksize)) < blocksize)) {
> -			/*
> -			 * inc the count before we submit the bio so
> -			 * we know the end IO handler won't happen before
> -			 * we inc the count. Otherwise, the dip might get freed
> -			 * before we're done setting it up
> -			 */
> -			atomic_inc(&dip->pending_bios);
> -			ret = __btrfs_submit_dio_bio(bio, inode,
> -						     file_offset, skip_sum,
> -						     async_submit);
> -			if (ret) {
> -				bio_put(bio);
> -				atomic_dec(&dip->pending_bios);
> -				goto out_err;
> -			}
> -
> -			start_sector += submit_len >> 9;
> -			file_offset += submit_len;
> -
> -			submit_len = 0;
> +		/*
> +		 * increase the count before we submit the bio so we know the
> +		 * end IO handler won't happen before we increase the
> +		 * count. Otherwise, the dip might get freed before we're done
> +		 * setting it up.

Small nit, as you already reformat and improve the comment, "Increase
the count ..."

> +		 */
> +		atomic_inc(&dip->pending_bios);
>  
> -			bio = btrfs_dio_bio_alloc(orig_bio->bi_bdev,
> -						  start_sector, GFP_NOFS);
> -			if (!bio)
> -				goto out_err;
> -			bio->bi_opf = orig_bio->bi_opf;
> -			bio->bi_private = dip;
> -			bio->bi_end_io = btrfs_end_dio_bio;
> -			btrfs_io_bio(bio)->logical = file_offset;
> +		ret = __btrfs_submit_dio_bio(bio, inode,
> +					     file_offset, skip_sum,
> +					     async_submit);

Also here, indentation level is removed, the arguments can be
reformated.

> +		if (ret) {
> +			bio_put(bio);
> +			atomic_dec(&dip->pending_bios);
> +			goto out_err;
> +		}
>  
> -			map_length = orig_bio->bi_iter.bi_size;
> -			ret = btrfs_map_block(fs_info, btrfs_op(orig_bio),
> -					      start_sector << 9,
> -					      &map_length, NULL, 0);
> -			if (ret) {
> -				bio_put(bio);
> -				goto out_err;
> -			}
> +		clone_offset += clone_len;
> +		start_sector += clone_len >> 9;
> +		file_offset += clone_len;
>  
> -			goto next_block;
> -		} else {
> -			submit_len += blocksize;
> -			if (--nr_sectors) {
> -				i++;
> -				goto next_block;
> -			}
> -		}
> +		map_length = submit_len;
> +		ret = btrfs_map_block(fs_info, btrfs_op(orig_bio),
> +				      (start_sector << 9),
> +				      &map_length, NULL, 0);
> +		if (ret)
> +			goto out_err;
>  	}

So, I think I understand the change, at least enough to make me
comfortable to put the series to for-next, once you update it. Thanks.
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Christoph Hellwig May 16, 2017, 2:37 p.m. UTC | #2
>  }
>  
> +struct bio *btrfs_bio_clone_partial(struct bio *orig, gfp_t gfp_mask, int offset, int size)
> +{
> +	struct bio *bio;
> +
> +	bio = bio_clone_fast(orig, gfp_mask, btrfs_bioset);
> +	if (bio) {

bio_clone_fast will never fail when backed by a bioset, which this
one always is.  Also you always pass GFP_NPFS as the gfp_mask argument,
it might make sense to hardcode that here.

> +		struct btrfs_io_bio *btrfs_bio = btrfs_io_bio(bio);
> +		btrfs_bio->csum = NULL;
> +		btrfs_bio->csum_allocated = NULL;
> +		btrfs_bio->end_io = NULL;
> +
> +		bio_trim(bio, (offset >> 9), (size >> 9));

No need for the inner braces here.

Last but not least do you even need this as a separate helper?

> +struct bio *btrfs_bio_clone_partial(struct bio *orig, gfp_t gfp_mask, int offset, int size);

Over long line, please trim to 80 characters
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Liu Bo May 16, 2017, 5:15 p.m. UTC | #3
On Tue, May 16, 2017 at 07:37:37AM -0700, Christoph Hellwig wrote:
> >  }
> >  
> > +struct bio *btrfs_bio_clone_partial(struct bio *orig, gfp_t gfp_mask, int offset, int size)
> > +{
> > +	struct bio *bio;
> > +
> > +	bio = bio_clone_fast(orig, gfp_mask, btrfs_bioset);
> > +	if (bio) {
> 
> bio_clone_fast will never fail when backed by a bioset, which this
> one always is.  Also you always pass GFP_NPFS as the gfp_mask argument,
> it might make sense to hardcode that here.
> 

I see.

> > +		struct btrfs_io_bio *btrfs_bio = btrfs_io_bio(bio);
> > +		btrfs_bio->csum = NULL;
> > +		btrfs_bio->csum_allocated = NULL;
> > +		btrfs_bio->end_io = NULL;
> > +
> > +		bio_trim(bio, (offset >> 9), (size >> 9));
> 
> No need for the inner braces here.
> 
> Last but not least do you even need this as a separate helper?
> 

Not necessary indeed, but I need to access %btrfs_bioset which is
'static' defined in extent_io.c

> > +struct bio *btrfs_bio_clone_partial(struct bio *orig, gfp_t gfp_mask, int offset, int size);
> 
> Over long line, please trim to 80 characters

OK, fixed.

Thanks for the comments.

Thanks,

-liubo
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Patch
diff mbox

diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 0d4aea4..1b7156c 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -2726,6 +2726,21 @@  struct bio *btrfs_io_bio_alloc(gfp_t gfp_mask, unsigned int nr_iovecs)
 	return bio;
 }
 
+struct bio *btrfs_bio_clone_partial(struct bio *orig, gfp_t gfp_mask, int offset, int size)
+{
+	struct bio *bio;
+
+	bio = bio_clone_fast(orig, gfp_mask, btrfs_bioset);
+	if (bio) {
+		struct btrfs_io_bio *btrfs_bio = btrfs_io_bio(bio);
+		btrfs_bio->csum = NULL;
+		btrfs_bio->csum_allocated = NULL;
+		btrfs_bio->end_io = NULL;
+
+		bio_trim(bio, (offset >> 9), (size >> 9));
+	}
+	return bio;
+}
 
 static int __must_check submit_one_bio(struct bio *bio, int mirror_num,
 				       unsigned long bio_flags)
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 3e4fad4..3b2bc88 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -460,6 +460,7 @@  btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs,
 		gfp_t gfp_flags);
 struct bio *btrfs_io_bio_alloc(gfp_t gfp_mask, unsigned int nr_iovecs);
 struct bio *btrfs_bio_clone(struct bio *bio, gfp_t gfp_mask);
+struct bio *btrfs_bio_clone_partial(struct bio *orig, gfp_t gfp_mask, int offset, int size);
 
 struct btrfs_fs_info;
 struct btrfs_inode;
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index a18510b..6215720 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -8230,16 +8230,6 @@  static void btrfs_end_dio_bio(struct bio *bio)
 	bio_put(bio);
 }
 
-static struct bio *btrfs_dio_bio_alloc(struct block_device *bdev,
-				       u64 first_sector, gfp_t gfp_flags)
-{
-	struct bio *bio;
-	bio = btrfs_bio_alloc(bdev, first_sector, BIO_MAX_PAGES, gfp_flags);
-	if (bio)
-		bio_associate_current(bio);
-	return bio;
-}
-
 static inline int btrfs_lookup_and_bind_dio_csum(struct inode *inode,
 						 struct btrfs_dio_private *dip,
 						 struct bio *bio,
@@ -8329,24 +8319,22 @@  static int btrfs_submit_direct_hook(struct btrfs_dio_private *dip,
 	struct btrfs_root *root = BTRFS_I(inode)->root;
 	struct bio *bio;
 	struct bio *orig_bio = dip->orig_bio;
-	struct bio_vec *bvec;
 	u64 start_sector = orig_bio->bi_iter.bi_sector;
 	u64 file_offset = dip->logical_offset;
-	u64 submit_len = 0;
 	u64 map_length;
-	u32 blocksize = fs_info->sectorsize;
 	int async_submit = 0;
-	int nr_sectors;
+	int submit_len;
+	int clone_offset = 0;
+	int clone_len;
 	int ret;
-	int i, j;
 
-	map_length = orig_bio->bi_iter.bi_size;
+	submit_len = map_length = orig_bio->bi_iter.bi_size;
 	ret = btrfs_map_block(fs_info, btrfs_op(orig_bio), start_sector << 9,
 			      &map_length, NULL, 0);
 	if (ret)
 		return -EIO;
 
-	if (map_length >= orig_bio->bi_iter.bi_size) {
+	if (map_length >= submit_len) {
 		bio = orig_bio;
 		dip->flags |= BTRFS_DIO_ORIG_BIO_SUBMITTED;
 		goto submit;
@@ -8358,70 +8346,52 @@  static int btrfs_submit_direct_hook(struct btrfs_dio_private *dip,
 	else
 		async_submit = 1;
 
-	bio = btrfs_dio_bio_alloc(orig_bio->bi_bdev, start_sector, GFP_NOFS);
-	if (!bio)
-		return -ENOMEM;
-
-	bio->bi_opf = orig_bio->bi_opf;
-	bio->bi_private = dip;
-	bio->bi_end_io = btrfs_end_dio_bio;
-	btrfs_io_bio(bio)->logical = file_offset;
+	/* bio split */
 	atomic_inc(&dip->pending_bios);
+	while (submit_len > 0) {
+		/* map_length < submit_len, it's a int */
+		clone_len = min(submit_len, (int)map_length);
+		bio = btrfs_bio_clone_partial(orig_bio, GFP_NOFS, clone_offset, clone_len);
+		if (!bio)
+			goto out_err;
+		/* the above clone call also clone blkcg of orig_bio */
+
+		bio->bi_private = dip;
+		bio->bi_end_io = btrfs_end_dio_bio;
+		btrfs_io_bio(bio)->logical = file_offset;
+
+		ASSERT(submit_len >= clone_len);
+		submit_len -= clone_len;
+		if (submit_len == 0)
+			break;
 
-	bio_for_each_segment_all(bvec, orig_bio, j) {
-		nr_sectors = BTRFS_BYTES_TO_BLKS(fs_info, bvec->bv_len);
-		i = 0;
-next_block:
-		if (unlikely(map_length < submit_len + blocksize ||
-		    bio_add_page(bio, bvec->bv_page, blocksize,
-			    bvec->bv_offset + (i * blocksize)) < blocksize)) {
-			/*
-			 * inc the count before we submit the bio so
-			 * we know the end IO handler won't happen before
-			 * we inc the count. Otherwise, the dip might get freed
-			 * before we're done setting it up
-			 */
-			atomic_inc(&dip->pending_bios);
-			ret = __btrfs_submit_dio_bio(bio, inode,
-						     file_offset, skip_sum,
-						     async_submit);
-			if (ret) {
-				bio_put(bio);
-				atomic_dec(&dip->pending_bios);
-				goto out_err;
-			}
-
-			start_sector += submit_len >> 9;
-			file_offset += submit_len;
-
-			submit_len = 0;
+		/*
+		 * increase the count before we submit the bio so we know the
+		 * end IO handler won't happen before we increase the
+		 * count. Otherwise, the dip might get freed before we're done
+		 * setting it up.
+		 */
+		atomic_inc(&dip->pending_bios);
 
-			bio = btrfs_dio_bio_alloc(orig_bio->bi_bdev,
-						  start_sector, GFP_NOFS);
-			if (!bio)
-				goto out_err;
-			bio->bi_opf = orig_bio->bi_opf;
-			bio->bi_private = dip;
-			bio->bi_end_io = btrfs_end_dio_bio;
-			btrfs_io_bio(bio)->logical = file_offset;
+		ret = __btrfs_submit_dio_bio(bio, inode,
+					     file_offset, skip_sum,
+					     async_submit);
+		if (ret) {
+			bio_put(bio);
+			atomic_dec(&dip->pending_bios);
+			goto out_err;
+		}
 
-			map_length = orig_bio->bi_iter.bi_size;
-			ret = btrfs_map_block(fs_info, btrfs_op(orig_bio),
-					      start_sector << 9,
-					      &map_length, NULL, 0);
-			if (ret) {
-				bio_put(bio);
-				goto out_err;
-			}
+		clone_offset += clone_len;
+		start_sector += clone_len >> 9;
+		file_offset += clone_len;
 
-			goto next_block;
-		} else {
-			submit_len += blocksize;
-			if (--nr_sectors) {
-				i++;
-				goto next_block;
-			}
-		}
+		map_length = submit_len;
+		ret = btrfs_map_block(fs_info, btrfs_op(orig_bio),
+				      (start_sector << 9),
+				      &map_length, NULL, 0);
+		if (ret)
+			goto out_err;
 	}
 
 submit: