diff mbox series

block: advance by bvec's length for bio_for_each_bvec

Message ID 20190228032421.23161-1-ming.lei@redhat.com (mailing list archive)
State New, archived
Headers show
Series block: advance by bvec's length for bio_for_each_bvec | expand

Commit Message

Ming Lei Feb. 28, 2019, 3:24 a.m. UTC
bio_for_each_bvec is used in fast path of bio splitting and sg mapping,
and what we want to do is to iterate over multi-page bvecs, instead of pages.
However, bvec_iter_advance() is invisble for this requirement, and
always advance by page size.

This way isn't efficient for multipage bvec iterator, also bvec_iter_len()
isn't as fast as mp_bvec_iter_len().

So advance by multi-page bvec's length instead of page size for bio_for_each_bvec().

More than 1% IOPS improvement can be observed in io_uring test on null_blk.

Cc: Omar Sandoval <osandov@fb.com>
Cc: Christoph Hellwig <hch@lst.de>
Signed-off-by: Ming Lei <ming.lei@redhat.com>
---
 include/linux/bio.h  | 13 +++++++++----
 include/linux/bvec.h | 13 ++++++++++---
 2 files changed, 19 insertions(+), 7 deletions(-)

Comments

Jens Axboe Feb. 28, 2019, 1 p.m. UTC | #1
On 2/27/19 8:24 PM, Ming Lei wrote:
> bio_for_each_bvec is used in fast path of bio splitting and sg mapping,
> and what we want to do is to iterate over multi-page bvecs, instead of pages.
> However, bvec_iter_advance() is invisble for this requirement, and
> always advance by page size.
> 
> This way isn't efficient for multipage bvec iterator, also bvec_iter_len()
> isn't as fast as mp_bvec_iter_len().
> 
> So advance by multi-page bvec's length instead of page size for bio_for_each_bvec().
> 
> More than 1% IOPS improvement can be observed in io_uring test on null_blk.

Thanks Ming, I tested this last night with good results.
Christoph Hellwig Feb. 28, 2019, 1:58 p.m. UTC | #2
On Thu, Feb 28, 2019 at 11:24:21AM +0800, Ming Lei wrote:
> bio_for_each_bvec is used in fast path of bio splitting and sg mapping,
> and what we want to do is to iterate over multi-page bvecs, instead of pages.
> However, bvec_iter_advance() is invisble for this requirement, and
> always advance by page size.
> 
> This way isn't efficient for multipage bvec iterator, also bvec_iter_len()
> isn't as fast as mp_bvec_iter_len().
> 
> So advance by multi-page bvec's length instead of page size for bio_for_each_bvec().
> 
> More than 1% IOPS improvement can be observed in io_uring test on null_blk.

We've been there before, and I still insist that there is not good
reason ever to clamp the iteration to page size in bvec_iter_advance.
Callers that iterate over it already do that in the callers.

So here is a resurretion and rebase of my patch from back then to
just do the right thing:

diff --git a/include/linux/bvec.h b/include/linux/bvec.h
index 2c32e3e151a0..cf06c0647c4f 100644
--- a/include/linux/bvec.h
+++ b/include/linux/bvec.h
@@ -112,14 +112,15 @@ static inline bool bvec_iter_advance(const struct bio_vec *bv,
 	}
 
 	while (bytes) {
-		unsigned iter_len = bvec_iter_len(bv, *iter);
-		unsigned len = min(bytes, iter_len);
+		const struct bio_vec *cur = bv + iter->bi_idx;
+		unsigned len = min3(bytes, iter->bi_size,
+				    cur->bv_len - iter->bi_bvec_done);
 
 		bytes -= len;
 		iter->bi_size -= len;
 		iter->bi_bvec_done += len;
 
-		if (iter->bi_bvec_done == __bvec_iter_bvec(bv, *iter)->bv_len) {
+		if (iter->bi_bvec_done == cur->bv_len) {
 			iter->bi_bvec_done = 0;
 			iter->bi_idx++;
 		}
Ming Lei Feb. 28, 2019, 3:20 p.m. UTC | #3
On Thu, Feb 28, 2019 at 05:58:32AM -0800, Christoph Hellwig wrote:
> On Thu, Feb 28, 2019 at 11:24:21AM +0800, Ming Lei wrote:
> > bio_for_each_bvec is used in fast path of bio splitting and sg mapping,
> > and what we want to do is to iterate over multi-page bvecs, instead of pages.
> > However, bvec_iter_advance() is invisble for this requirement, and
> > always advance by page size.
> > 
> > This way isn't efficient for multipage bvec iterator, also bvec_iter_len()
> > isn't as fast as mp_bvec_iter_len().
> > 
> > So advance by multi-page bvec's length instead of page size for bio_for_each_bvec().
> > 
> > More than 1% IOPS improvement can be observed in io_uring test on null_blk.
> 
> We've been there before, and I still insist that there is not good
> reason ever to clamp the iteration to page size in bvec_iter_advance.
> Callers that iterate over it already do that in the callers.
> 
> So here is a resurretion and rebase of my patch from back then to
> just do the right thing:
> 
> diff --git a/include/linux/bvec.h b/include/linux/bvec.h
> index 2c32e3e151a0..cf06c0647c4f 100644
> --- a/include/linux/bvec.h
> +++ b/include/linux/bvec.h
> @@ -112,14 +112,15 @@ static inline bool bvec_iter_advance(const struct bio_vec *bv,
>  	}
>  
>  	while (bytes) {
> -		unsigned iter_len = bvec_iter_len(bv, *iter);
> -		unsigned len = min(bytes, iter_len);
> +		const struct bio_vec *cur = bv + iter->bi_idx;
> +		unsigned len = min3(bytes, iter->bi_size,
> +				    cur->bv_len - iter->bi_bvec_done);
>  
>  		bytes -= len;
>  		iter->bi_size -= len;
>  		iter->bi_bvec_done += len;
>  
> -		if (iter->bi_bvec_done == __bvec_iter_bvec(bv, *iter)->bv_len) {
> +		if (iter->bi_bvec_done == cur->bv_len) {
>  			iter->bi_bvec_done = 0;
>  			iter->bi_idx++;
>  		}

Yeah, this change is the correct thing to do, and there shouldn't be
performance drop with this patch for Jens' test case, I guess.

Thanks,
Ming
Jens Axboe Feb. 28, 2019, 3:23 p.m. UTC | #4
On 2/28/19 6:58 AM, Christoph Hellwig wrote:
> On Thu, Feb 28, 2019 at 11:24:21AM +0800, Ming Lei wrote:
>> bio_for_each_bvec is used in fast path of bio splitting and sg mapping,
>> and what we want to do is to iterate over multi-page bvecs, instead of pages.
>> However, bvec_iter_advance() is invisble for this requirement, and
>> always advance by page size.
>>
>> This way isn't efficient for multipage bvec iterator, also bvec_iter_len()
>> isn't as fast as mp_bvec_iter_len().
>>
>> So advance by multi-page bvec's length instead of page size for bio_for_each_bvec().
>>
>> More than 1% IOPS improvement can be observed in io_uring test on null_blk.
> 
> We've been there before, and I still insist that there is not good
> reason ever to clamp the iteration to page size in bvec_iter_advance.
> Callers that iterate over it already do that in the callers.
> 
> So here is a resurretion and rebase of my patch from back then to
> just do the right thing:

Care to resend as a proper patch?
diff mbox series

Patch

diff --git a/include/linux/bio.h b/include/linux/bio.h
index bb6090aa165d..29c7dd348dc2 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -134,17 +134,22 @@  static inline bool bio_full(struct bio *bio)
 	for (i = 0, iter_all.idx = 0; iter_all.idx < (bio)->bi_vcnt; iter_all.idx++)	\
 		mp_bvec_for_each_segment(bvl, &((bio)->bi_io_vec[iter_all.idx]), i, iter_all)
 
-static inline void bio_advance_iter(struct bio *bio, struct bvec_iter *iter,
-				    unsigned bytes)
+static inline void __bio_advance_iter(struct bio *bio, struct bvec_iter *iter,
+				      unsigned bytes, bool bvec)
 {
 	iter->bi_sector += bytes >> 9;
 
 	if (bio_no_advance_iter(bio))
 		iter->bi_size -= bytes;
 	else
-		bvec_iter_advance(bio->bi_io_vec, iter, bytes);
+		__bvec_iter_advance(bio->bi_io_vec, iter, bytes, bvec);
 		/* TODO: It is reasonable to complete bio with error here. */
 }
+static inline void bio_advance_iter(struct bio *bio, struct bvec_iter *iter,
+				    unsigned bytes)
+{
+	return __bio_advance_iter(bio, iter, bytes, false);
+}
 
 #define __bio_for_each_segment(bvl, bio, iter, start)			\
 	for (iter = (start);						\
@@ -159,7 +164,7 @@  static inline void bio_advance_iter(struct bio *bio, struct bvec_iter *iter,
 	for (iter = (start);						\
 	     (iter).bi_size &&						\
 		((bvl = mp_bvec_iter_bvec((bio)->bi_io_vec, (iter))), 1); \
-	     bio_advance_iter((bio), &(iter), (bvl).bv_len))
+	     __bio_advance_iter((bio), &(iter), (bvl).bv_len, true))
 
 /* iterate over multi-page bvec */
 #define bio_for_each_bvec(bvl, bio, iter)			\
diff --git a/include/linux/bvec.h b/include/linux/bvec.h
index 2c32e3e151a0..98a140fa4dac 100644
--- a/include/linux/bvec.h
+++ b/include/linux/bvec.h
@@ -102,8 +102,8 @@  static inline struct page *bvec_nth_page(struct page *page, int idx)
 	.bv_offset	= bvec_iter_offset((bvec), (iter)),	\
 })
 
-static inline bool bvec_iter_advance(const struct bio_vec *bv,
-		struct bvec_iter *iter, unsigned bytes)
+static inline bool __bvec_iter_advance(const struct bio_vec *bv,
+		struct bvec_iter *iter, unsigned bytes, bool bvec)
 {
 	if (WARN_ONCE(bytes > iter->bi_size,
 		     "Attempted to advance past end of bvec iter\n")) {
@@ -112,7 +112,8 @@  static inline bool bvec_iter_advance(const struct bio_vec *bv,
 	}
 
 	while (bytes) {
-		unsigned iter_len = bvec_iter_len(bv, *iter);
+		unsigned iter_len = bvec ? mp_bvec_iter_len(bv, *iter) :
+			bvec_iter_len(bv, *iter);
 		unsigned len = min(bytes, iter_len);
 
 		bytes -= len;
@@ -127,6 +128,12 @@  static inline bool bvec_iter_advance(const struct bio_vec *bv,
 	return true;
 }
 
+static inline bool bvec_iter_advance(const struct bio_vec *bv,
+		struct bvec_iter *iter, unsigned bytes)
+{
+	return __bvec_iter_advance(bv, iter, bytes, false);
+}
+
 #define for_each_bvec(bvl, bio_vec, iter, start)			\
 	for (iter = (start);						\
 	     (iter).bi_size &&						\