[V11,15/19] block: enable multipage bvecs
diff mbox series

Message ID 20181121032327.8434-16-ming.lei@redhat.com
State New
Headers show
Series
  • block: support multi-page bvec
Related show

Commit Message

Ming Lei Nov. 21, 2018, 3:23 a.m. UTC
This patch pulls the trigger for multi-page bvecs.

Signed-off-by: Ming Lei <ming.lei@redhat.com>
---
 block/bio.c       | 32 +++++++++++++++++++++++++++-----
 fs/iomap.c        |  2 +-
 fs/xfs/xfs_aops.c |  2 +-
 3 files changed, 29 insertions(+), 7 deletions(-)

Comments

Christoph Hellwig Nov. 21, 2018, 2:55 p.m. UTC | #1
On Wed, Nov 21, 2018 at 11:23:23AM +0800, Ming Lei wrote:
>  	if (bio->bi_vcnt > 0) {
> -		struct bio_vec *bv = &bio->bi_io_vec[bio->bi_vcnt - 1];
> +		struct bio_vec bv;
> +		struct bio_vec *seg = &bio->bi_io_vec[bio->bi_vcnt - 1];
>  
> -		if (page == bv->bv_page && off == bv->bv_offset + bv->bv_len) {
> -			bv->bv_len += len;
> +		bvec_last_segment(seg, &bv);
> +
> +		if (page == bv.bv_page && off == bv.bv_offset + bv.bv_len) {

I think this we can simplify the try to merge into bio case a bit,
and also document it better with something like this:

diff --git a/block/bio.c b/block/bio.c
index 854676edc438..cc913281a723 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -822,54 +822,40 @@ EXPORT_SYMBOL(bio_add_pc_page);
  * @page: page to add
  * @len: length of the data to add
  * @off: offset of the data in @page
+ * @same_page: if %true only merge if the new data is in the same physical
+ *		page as the last segment of the bio.
  *
- * Try to add the data at @page + @off to the last page of @bio.  This is a
+ * Try to add the data at @page + @off to the last bvec of @bio.  This is a
  * a useful optimisation for file systems with a block size smaller than the
  * page size.
  *
  * Return %true on success or %false on failure.
  */
 bool __bio_try_merge_page(struct bio *bio, struct page *page,
-		unsigned int len, unsigned int off)
+		unsigned int len, unsigned int off, bool same_page)
 {
 	if (WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED)))
 		return false;
 
 	if (bio->bi_vcnt > 0) {
-		struct bio_vec bv;
-		struct bio_vec *seg = &bio->bi_io_vec[bio->bi_vcnt - 1];
-
-		bvec_last_segment(seg, &bv);
-
-		if (page == bv.bv_page && off == bv.bv_offset + bv.bv_len) {
-			seg->bv_len += len;
-			bio->bi_iter.bi_size += len;
-			return true;
-		}
+		struct bio_vec *bv = &bio->bi_io_vec[bio->bi_vcnt - 1];
+		phys_addr_t vec_addr = page_to_phys(bv->bv_page);
+		phys_addr_t page_addr = page_to_phys(page);
+
+		if (vec_addr + bv->bv_offset + bv->bv_len != page_addr + off)
+			return false;
+		if (same_page &&
+		    (vec_addr & PAGE_SIZE) != (page_addr & PAGE_SIZE))
+			return false;
+
+		bv->bv_len += len;
+		bio->bi_iter.bi_size += len;
+		return true;
 	}
 	return false;
 }
 EXPORT_SYMBOL_GPL(__bio_try_merge_page);
 
-static bool bio_try_merge_segment(struct bio *bio, struct page *page,
-				  unsigned int len, unsigned int off)
-{
-	if (WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED)))
-		return false;
-
-	if (bio->bi_vcnt > 0) {
-		struct bio_vec *seg = &bio->bi_io_vec[bio->bi_vcnt - 1];
-
-		if (page_to_phys(seg->bv_page) + seg->bv_offset + seg->bv_len ==
-		    page_to_phys(page) + off) {
-			seg->bv_len += len;
-			bio->bi_iter.bi_size += len;
-			return true;
-		}
-	}
-	return false;
-}
-
 /**
  * __bio_add_page - add page to a bio in a new segment
  * @bio: destination bio
@@ -910,7 +896,7 @@ EXPORT_SYMBOL_GPL(__bio_add_page);
 int bio_add_page(struct bio *bio, struct page *page,
 		 unsigned int len, unsigned int offset)
 {
-	if (!bio_try_merge_segment(bio, page, len, offset)) {
+	if (!__bio_try_merge_page(bio, page, len, offset, false)) {
 		if (bio_full(bio))
 			return 0;
 		__bio_add_page(bio, page, len, offset);
diff --git a/fs/iomap.c b/fs/iomap.c
index ccc2ba115f4d..d918acb9bfc9 100644
--- a/fs/iomap.c
+++ b/fs/iomap.c
@@ -313,7 +313,7 @@ iomap_readpage_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
 	 */
 	sector = iomap_sector(iomap, pos);
 	if (ctx->bio && bio_end_sector(ctx->bio) == sector) {
-		if (__bio_try_merge_page(ctx->bio, page, plen, poff))
+		if (__bio_try_merge_page(ctx->bio, page, plen, poff, true))
 			goto done;
 		is_contig = true;
 	}
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 5c2190216614..b9fd44168f61 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -616,7 +616,7 @@ xfs_add_to_ioend(
 				bdev, sector);
 	}
 
-	if (!__bio_try_merge_page(wpc->ioend->io_bio, page, len, poff)) {
+	if (!__bio_try_merge_page(wpc->ioend->io_bio, page, len, poff, true)) {
 		if (iop)
 			atomic_inc(&iop->write_count);
 		if (bio_full(wpc->ioend->io_bio))
diff --git a/include/linux/bio.h b/include/linux/bio.h
index e5b975fa0558..f08e6940c1ab 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -442,7 +442,7 @@ extern int bio_add_page(struct bio *, struct page *, unsigned int,unsigned int);
 extern int bio_add_pc_page(struct request_queue *, struct bio *, struct page *,
 			   unsigned int, unsigned int);
 bool __bio_try_merge_page(struct bio *bio, struct page *page,
-		unsigned int len, unsigned int off);
+		unsigned int len, unsigned int off, bool same_page);
 void __bio_add_page(struct bio *bio, struct page *page,
 		unsigned int len, unsigned int off);
 int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter);
Ming Lei Nov. 21, 2018, 3:48 p.m. UTC | #2
On Wed, Nov 21, 2018 at 03:55:02PM +0100, Christoph Hellwig wrote:
> On Wed, Nov 21, 2018 at 11:23:23AM +0800, Ming Lei wrote:
> >  	if (bio->bi_vcnt > 0) {
> > -		struct bio_vec *bv = &bio->bi_io_vec[bio->bi_vcnt - 1];
> > +		struct bio_vec bv;
> > +		struct bio_vec *seg = &bio->bi_io_vec[bio->bi_vcnt - 1];
> >  
> > -		if (page == bv->bv_page && off == bv->bv_offset + bv->bv_len) {
> > -			bv->bv_len += len;
> > +		bvec_last_segment(seg, &bv);
> > +
> > +		if (page == bv.bv_page && off == bv.bv_offset + bv.bv_len) {
> 
> I think this we can simplify the try to merge into bio case a bit,
> and also document it better with something like this:
> 
> diff --git a/block/bio.c b/block/bio.c
> index 854676edc438..cc913281a723 100644
> --- a/block/bio.c
> +++ b/block/bio.c
> @@ -822,54 +822,40 @@ EXPORT_SYMBOL(bio_add_pc_page);
>   * @page: page to add
>   * @len: length of the data to add
>   * @off: offset of the data in @page
> + * @same_page: if %true only merge if the new data is in the same physical
> + *		page as the last segment of the bio.
>   *
> - * Try to add the data at @page + @off to the last page of @bio.  This is a
> + * Try to add the data at @page + @off to the last bvec of @bio.  This is a
>   * a useful optimisation for file systems with a block size smaller than the
>   * page size.
>   *
>   * Return %true on success or %false on failure.
>   */
>  bool __bio_try_merge_page(struct bio *bio, struct page *page,
> -		unsigned int len, unsigned int off)
> +		unsigned int len, unsigned int off, bool same_page)
>  {
>  	if (WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED)))
>  		return false;
>  
>  	if (bio->bi_vcnt > 0) {
> -		struct bio_vec bv;
> -		struct bio_vec *seg = &bio->bi_io_vec[bio->bi_vcnt - 1];
> -
> -		bvec_last_segment(seg, &bv);
> -
> -		if (page == bv.bv_page && off == bv.bv_offset + bv.bv_len) {
> -			seg->bv_len += len;
> -			bio->bi_iter.bi_size += len;
> -			return true;
> -		}
> +		struct bio_vec *bv = &bio->bi_io_vec[bio->bi_vcnt - 1];
> +		phys_addr_t vec_addr = page_to_phys(bv->bv_page);
> +		phys_addr_t page_addr = page_to_phys(page);
> +
> +		if (vec_addr + bv->bv_offset + bv->bv_len != page_addr + off)
> +			return false;
> +		if (same_page &&
> +		    (vec_addr & PAGE_SIZE) != (page_addr & PAGE_SIZE))
> +			return false;

I guess the correct check should be:

		end_addr = vec_addr + bv->bv_offset + bv->bv_len;
		if (same_page &&
		    (end_addr & PAGE_MASK) != (page_addr & PAGE_MASK))
			return false;

And this approach is good, will take it in V12.

Thanks,
Ming
Christoph Hellwig Nov. 21, 2018, 4:12 p.m. UTC | #3
On Wed, Nov 21, 2018 at 11:48:13PM +0800, Ming Lei wrote:
> I guess the correct check should be:
> 
> 		end_addr = vec_addr + bv->bv_offset + bv->bv_len;
> 		if (same_page &&
> 		    (end_addr & PAGE_MASK) != (page_addr & PAGE_MASK))
> 			return false;

Indeed.
Ming Lei Nov. 23, 2018, 10:50 a.m. UTC | #4
On Wed, Nov 21, 2018 at 05:12:06PM +0100, Christoph Hellwig wrote:
> On Wed, Nov 21, 2018 at 11:48:13PM +0800, Ming Lei wrote:
> > I guess the correct check should be:
> > 
> > 		end_addr = vec_addr + bv->bv_offset + bv->bv_len;
> > 		if (same_page &&
> > 		    (end_addr & PAGE_MASK) != (page_addr & PAGE_MASK))
> > 			return false;
> 
> Indeed.

The above is still not totally correct, and it should have been:

 		end_addr = vec_addr + bv->bv_offset + bv->bv_len - 1;
 		if (same_page && (end_addr & PAGE_MASK) != page_addr)
 			return false;

Also bv->bv_len should be guaranteed as being bigger than zero.

It also shows that it is quite easy to figure out the last page as
wrong, :-(


Thanks,
Ming

Patch
diff mbox series

diff --git a/block/bio.c b/block/bio.c
index 0f1635b9ec50..854676edc438 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -823,7 +823,7 @@  EXPORT_SYMBOL(bio_add_pc_page);
  * @len: length of the data to add
  * @off: offset of the data in @page
  *
- * Try to add the data at @page + @off to the last bvec of @bio.  This is a
+ * Try to add the data at @page + @off to the last page of @bio.  This is a
  * a useful optimisation for file systems with a block size smaller than the
  * page size.
  *
@@ -836,10 +836,13 @@  bool __bio_try_merge_page(struct bio *bio, struct page *page,
 		return false;
 
 	if (bio->bi_vcnt > 0) {
-		struct bio_vec *bv = &bio->bi_io_vec[bio->bi_vcnt - 1];
+		struct bio_vec bv;
+		struct bio_vec *seg = &bio->bi_io_vec[bio->bi_vcnt - 1];
 
-		if (page == bv->bv_page && off == bv->bv_offset + bv->bv_len) {
-			bv->bv_len += len;
+		bvec_last_segment(seg, &bv);
+
+		if (page == bv.bv_page && off == bv.bv_offset + bv.bv_len) {
+			seg->bv_len += len;
 			bio->bi_iter.bi_size += len;
 			return true;
 		}
@@ -848,6 +851,25 @@  bool __bio_try_merge_page(struct bio *bio, struct page *page,
 }
 EXPORT_SYMBOL_GPL(__bio_try_merge_page);
 
+static bool bio_try_merge_segment(struct bio *bio, struct page *page,
+				  unsigned int len, unsigned int off)
+{
+	if (WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED)))
+		return false;
+
+	if (bio->bi_vcnt > 0) {
+		struct bio_vec *seg = &bio->bi_io_vec[bio->bi_vcnt - 1];
+
+		if (page_to_phys(seg->bv_page) + seg->bv_offset + seg->bv_len ==
+		    page_to_phys(page) + off) {
+			seg->bv_len += len;
+			bio->bi_iter.bi_size += len;
+			return true;
+		}
+	}
+	return false;
+}
+
 /**
  * __bio_add_page - add page to a bio in a new segment
  * @bio: destination bio
@@ -888,7 +910,7 @@  EXPORT_SYMBOL_GPL(__bio_add_page);
 int bio_add_page(struct bio *bio, struct page *page,
 		 unsigned int len, unsigned int offset)
 {
-	if (!__bio_try_merge_page(bio, page, len, offset)) {
+	if (!bio_try_merge_segment(bio, page, len, offset)) {
 		if (bio_full(bio))
 			return 0;
 		__bio_add_page(bio, page, len, offset);
diff --git a/fs/iomap.c b/fs/iomap.c
index f5fb8bf75cc8..ccc2ba115f4d 100644
--- a/fs/iomap.c
+++ b/fs/iomap.c
@@ -344,7 +344,7 @@  iomap_readpage_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
 		ctx->bio->bi_end_io = iomap_read_end_io;
 	}
 
-	__bio_add_page(ctx->bio, page, plen, poff);
+	bio_add_page(ctx->bio, page, plen, poff);
 done:
 	/*
 	 * Move the caller beyond our range so that it keeps making progress.
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 1f1829e506e8..5c2190216614 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -621,7 +621,7 @@  xfs_add_to_ioend(
 			atomic_inc(&iop->write_count);
 		if (bio_full(wpc->ioend->io_bio))
 			xfs_chain_bio(wpc->ioend, wbc, bdev, sector);
-		__bio_add_page(wpc->ioend->io_bio, page, len, poff);
+		bio_add_page(wpc->ioend->io_bio, page, len, poff);
 	}
 
 	wpc->ioend->io_size += len;