[V11,03/19] block: introduce bio_for_each_bvec()
diff mbox series

Message ID 20181121032327.8434-4-ming.lei@redhat.com
State New
Headers show
Series
  • block: support multi-page bvec
Related show

Commit Message

Ming Lei Nov. 21, 2018, 3:23 a.m. UTC
This helper is used for iterating over multi-page bvec for bio
split & merge code.

Reviewed-by: Omar Sandoval <osandov@fb.com>
Signed-off-by: Ming Lei <ming.lei@redhat.com>
---
 include/linux/bio.h  | 25 ++++++++++++++++++++++---
 include/linux/bvec.h | 36 +++++++++++++++++++++++++++++-------
 2 files changed, 51 insertions(+), 10 deletions(-)

Comments

Christoph Hellwig Nov. 21, 2018, 1:32 p.m. UTC | #1
> +#define bio_iter_mp_iovec(bio, iter)				\
> +	segment_iter_bvec((bio)->bi_io_vec, (iter))

Besides the mp naming we'd like to get rid off there also is just
a single user of this macro, please just expand it there.

> +#define segment_iter_bvec(bvec, iter)				\
> +((struct bio_vec) {							\
> +	.bv_page	= segment_iter_page((bvec), (iter)),	\
> +	.bv_len		= segment_iter_len((bvec), (iter)),	\
> +	.bv_offset	= segment_iter_offset((bvec), (iter)),	\
> +})

And for this one please keep the segment vs bvec versions of these
macros close together in the file please, right now it follow the
bvec_iter_bvec variant closely.

> +static inline void __bio_advance_iter(struct bio *bio, struct bvec_iter *iter,
> +				      unsigned bytes, unsigned max_seg_len)
>  {
>  	iter->bi_sector += bytes >> 9;
>  
>  	if (bio_no_advance_iter(bio))
>  		iter->bi_size -= bytes;
>  	else
> -		bvec_iter_advance(bio->bi_io_vec, iter, bytes);
> +		__bvec_iter_advance(bio->bi_io_vec, iter, bytes, max_seg_len);
>  		/* TODO: It is reasonable to complete bio with error here. */
>  }
>  
> +static inline void bio_advance_iter(struct bio *bio, struct bvec_iter *iter,
> +				    unsigned bytes)
> +{
> +	__bio_advance_iter(bio, iter, bytes, PAGE_SIZE);
> +}

Btw, I think the remaining users of bio_advance_iter() in bio.h
should probably switch to using __bio_advance_iter to make them a little
more clear to read.

> +/* returns one real segment(multi-page bvec) each time */

space before the brace, please.

> +#define BVEC_MAX_LEN  ((unsigned int)-1)

>  	while (bytes) {
> +		unsigned segment_len = segment_iter_len(bv, *iter);
>  
> -		iter->bi_bvec_done += len;
> +		if (max_seg_len < BVEC_MAX_LEN)
> +			segment_len = min_t(unsigned, segment_len,
> +					    max_seg_len -
> +					    bvec_iter_offset(bv, *iter));
> +
> +		segment_len = min(bytes, segment_len);

Please stick to passing the magic zero here as can often generate more
efficient code.

Talking about efficent code - I wonder how much code size we'd save
by moving this function out of line..

But while looking over this I wonder why we even need the max_seg_len
here.  The only thing __bvec_iter_advance does it to move bi_bvec_done
and bi_idx forward, with corresponding decrements of bi_size.  As far
as I can tell the only thing that max_seg_len does is that we need
to more iterations of the while loop to archive the same thing.

And actual bvec used by the caller will be obtained using
bvec_iter_bvec or segment_iter_bvec depending on if they want multi-page
or single-page variants.
Ming Lei Nov. 21, 2018, 3:31 p.m. UTC | #2
On Wed, Nov 21, 2018 at 02:32:44PM +0100, Christoph Hellwig wrote:
> > +#define bio_iter_mp_iovec(bio, iter)				\
> > +	segment_iter_bvec((bio)->bi_io_vec, (iter))
> 
> Besides the mp naming we'd like to get rid off there also is just
> a single user of this macro, please just expand it there.

OK.

> 
> > +#define segment_iter_bvec(bvec, iter)				\
> > +((struct bio_vec) {							\
> > +	.bv_page	= segment_iter_page((bvec), (iter)),	\
> > +	.bv_len		= segment_iter_len((bvec), (iter)),	\
> > +	.bv_offset	= segment_iter_offset((bvec), (iter)),	\
> > +})
> 
> And for this one please keep the segment vs bvec versions of these
> macros close together in the file please, right now it follow the
> bvec_iter_bvec variant closely.

OK.

> 
> > +static inline void __bio_advance_iter(struct bio *bio, struct bvec_iter *iter,
> > +				      unsigned bytes, unsigned max_seg_len)
> >  {
> >  	iter->bi_sector += bytes >> 9;
> >  
> >  	if (bio_no_advance_iter(bio))
> >  		iter->bi_size -= bytes;
> >  	else
> > -		bvec_iter_advance(bio->bi_io_vec, iter, bytes);
> > +		__bvec_iter_advance(bio->bi_io_vec, iter, bytes, max_seg_len);
> >  		/* TODO: It is reasonable to complete bio with error here. */
> >  }
> >  
> > +static inline void bio_advance_iter(struct bio *bio, struct bvec_iter *iter,
> > +				    unsigned bytes)
> > +{
> > +	__bio_advance_iter(bio, iter, bytes, PAGE_SIZE);
> > +}
> 
> Btw, I think the remaining users of bio_advance_iter() in bio.h
> should probably switch to using __bio_advance_iter to make them a little
> more clear to read.

Good point.

> 
> > +/* returns one real segment(multi-page bvec) each time */
> 
> space before the brace, please.

OK.

> 
> > +#define BVEC_MAX_LEN  ((unsigned int)-1)
> 
> >  	while (bytes) {
> > +		unsigned segment_len = segment_iter_len(bv, *iter);
> >  
> > -		iter->bi_bvec_done += len;
> > +		if (max_seg_len < BVEC_MAX_LEN)
> > +			segment_len = min_t(unsigned, segment_len,
> > +					    max_seg_len -
> > +					    bvec_iter_offset(bv, *iter));
> > +
> > +		segment_len = min(bytes, segment_len);
> 
> Please stick to passing the magic zero here as can often generate more
> efficient code.

But zero may decrease the code readability. Actually the passed
'max_seg_len' is just a constant, and complier should have generated
same efficient code for any constant, either 0 or other.

> 
> Talking about efficent code - I wonder how much code size we'd save
> by moving this function out of line..

That is good point, see the following diff:

[mingl@hp kernel]$ diff -u inline.size non_inline.size
--- inline.size	2018-11-21 23:24:52.305312076 +0800
+++ non_inline.size	2018-11-21 23:24:59.908393010 +0800
@@ -1,2 +1,2 @@
    text	   data	    bss	    dec	    hex	filename
-13429213	6893922	4292692	24615827	1779b93	vmlinux.inline
+13429153	6893346	4292692	24615191	1779917	vmlinux.non_inline

vmlinux(non_inline) is built by just moving/exporting __bvec_iter_advance()
into block/bio.c.

The difference is about 276bytes.

> 
> But while looking over this I wonder why we even need the max_seg_len
> here.  The only thing __bvec_iter_advance does it to move bi_bvec_done
> and bi_idx forward, with corresponding decrements of bi_size.  As far
> as I can tell the only thing that max_seg_len does is that we need
> to more iterations of the while loop to archive the same thing.
> 
> And actual bvec used by the caller will be obtained using
> bvec_iter_bvec or segment_iter_bvec depending on if they want multi-page
> or single-page variants.

Right, we let __bvec_iter_advance() serve for both multi-page and single-page
case, then we have to tell it via one way or another, now we use the constant
of 'max_seg_len'.

Or you suggest to implement two versions of __bvec_iter_advance()?

Thanks,
Ming
Christoph Hellwig Nov. 21, 2018, 4:10 p.m. UTC | #3
On Wed, Nov 21, 2018 at 11:31:36PM +0800, Ming Lei wrote:
> > But while looking over this I wonder why we even need the max_seg_len
> > here.  The only thing __bvec_iter_advance does it to move bi_bvec_done
> > and bi_idx forward, with corresponding decrements of bi_size.  As far
> > as I can tell the only thing that max_seg_len does is that we need
> > to more iterations of the while loop to archive the same thing.
> > 
> > And actual bvec used by the caller will be obtained using
> > bvec_iter_bvec or segment_iter_bvec depending on if they want multi-page
> > or single-page variants.
> 
> Right, we let __bvec_iter_advance() serve for both multi-page and single-page
> case, then we have to tell it via one way or another, now we use the constant
> of 'max_seg_len'.
> 
> Or you suggest to implement two versions of __bvec_iter_advance()?

No - I think we can always use the code without any segment in
bvec_iter_advance.  Because bvec_iter_advance only operates on the
iteractor, the generation of an actual single-page or multi-page
bvec is left to the caller using the bvec_iter_bvec or segment_iter_bvec
helpers.  The only difference is how many bytes you can move the
iterator forward in a single loop iteration - so if you pass in
PAGE_SIZE as the max_seg_len you just will have to loop more often
for a large enough bytes, but not actually do anything different.
Christoph Hellwig Nov. 21, 2018, 5:12 p.m. UTC | #4
On Wed, Nov 21, 2018 at 05:10:25PM +0100, Christoph Hellwig wrote:
> No - I think we can always use the code without any segment in
> bvec_iter_advance.  Because bvec_iter_advance only operates on the
> iteractor, the generation of an actual single-page or multi-page
> bvec is left to the caller using the bvec_iter_bvec or segment_iter_bvec
> helpers.  The only difference is how many bytes you can move the
> iterator forward in a single loop iteration - so if you pass in
> PAGE_SIZE as the max_seg_len you just will have to loop more often
> for a large enough bytes, but not actually do anything different.

FYI, this patch reverts the max_seg_len related changes back to where
we are in mainline, and as expected everything works fine for me:

diff --git a/include/linux/bio.h b/include/linux/bio.h
index e5b975fa0558..926550ce2d21 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -137,24 +137,18 @@ static inline bool bio_full(struct bio *bio)
 	for (i = 0, iter_all.idx = 0; iter_all.idx < (bio)->bi_vcnt; iter_all.idx++)	\
 		bvec_for_each_segment(bvl, &((bio)->bi_io_vec[iter_all.idx]), i, iter_all)
 
-static inline void __bio_advance_iter(struct bio *bio, struct bvec_iter *iter,
-				      unsigned bytes, unsigned max_seg_len)
+static inline void bio_advance_iter(struct bio *bio, struct bvec_iter *iter,
+				    unsigned bytes)
 {
 	iter->bi_sector += bytes >> 9;
 
 	if (bio_no_advance_iter(bio))
 		iter->bi_size -= bytes;
 	else
-		__bvec_iter_advance(bio->bi_io_vec, iter, bytes, max_seg_len);
+		bvec_iter_advance(bio->bi_io_vec, iter, bytes);
 		/* TODO: It is reasonable to complete bio with error here. */
 }
 
-static inline void bio_advance_iter(struct bio *bio, struct bvec_iter *iter,
-				    unsigned bytes)
-{
-	__bio_advance_iter(bio, iter, bytes, PAGE_SIZE);
-}
-
 #define __bio_for_each_segment(bvl, bio, iter, start)			\
 	for (iter = (start);						\
 	     (iter).bi_size &&						\
@@ -168,7 +162,7 @@ static inline void bio_advance_iter(struct bio *bio, struct bvec_iter *iter,
 	for (iter = (start);						\
 	     (iter).bi_size &&						\
 		((bvl = bio_iter_mp_iovec((bio), (iter))), 1);	\
-	     __bio_advance_iter((bio), &(iter), (bvl).bv_len, BVEC_MAX_LEN))
+	     bio_advance_iter((bio), &(iter), (bvl).bv_len))
 
 /* returns one real segment(multi-page bvec) each time */
 #define bio_for_each_bvec(bvl, bio, iter)			\
diff --git a/include/linux/bvec.h b/include/linux/bvec.h
index cab36d838ed0..138b4007b8f2 100644
--- a/include/linux/bvec.h
+++ b/include/linux/bvec.h
@@ -25,8 +25,6 @@
 #include <linux/errno.h>
 #include <linux/mm.h>
 
-#define BVEC_MAX_LEN  ((unsigned int)-1)
-
 /*
  * was unsigned short, but we might as well be ready for > 64kB I/O pages
  */
@@ -102,8 +100,8 @@ struct bvec_iter_all {
 	.bv_offset	= segment_iter_offset((bvec), (iter)),	\
 })
 
-static inline bool __bvec_iter_advance(const struct bio_vec *bv,
-		struct bvec_iter *iter, unsigned bytes, unsigned max_seg_len)
+static inline bool bvec_iter_advance(const struct bio_vec *bv,
+		struct bvec_iter *iter, unsigned bytes)
 {
 	if (WARN_ONCE(bytes > iter->bi_size,
 		     "Attempted to advance past end of bvec iter\n")) {
@@ -112,18 +110,12 @@ static inline bool __bvec_iter_advance(const struct bio_vec *bv,
 	}
 
 	while (bytes) {
-		unsigned segment_len = segment_iter_len(bv, *iter);
-
-		if (max_seg_len < BVEC_MAX_LEN)
-			segment_len = min_t(unsigned, segment_len,
-					    max_seg_len -
-					    bvec_iter_offset(bv, *iter));
+		unsigned iter_len = bvec_iter_len(bv, *iter);
+		unsigned len = min(bytes, iter_len);
 
-		segment_len = min(bytes, segment_len);
-
-		bytes -= segment_len;
-		iter->bi_size -= segment_len;
-		iter->bi_bvec_done += segment_len;
+		bytes -= len;
+		iter->bi_size -= len;
+		iter->bi_bvec_done += len;
 
 		if (iter->bi_bvec_done == __bvec_iter_bvec(bv, *iter)->bv_len) {
 			iter->bi_bvec_done = 0;
@@ -157,13 +149,6 @@ static inline bool bvec_iter_rewind(const struct bio_vec *bv,
 	return true;
 }
 
-static inline bool bvec_iter_advance(const struct bio_vec *bv,
-				     struct bvec_iter *iter,
-				     unsigned bytes)
-{
-	return __bvec_iter_advance(bv, iter, bytes, PAGE_SIZE);
-}
-
 #define for_each_bvec(bvl, bio_vec, iter, start)			\
 	for (iter = (start);						\
 	     (iter).bi_size &&						\
Ming Lei Nov. 22, 2018, 1:17 a.m. UTC | #5
On Wed, Nov 21, 2018 at 05:10:25PM +0100, Christoph Hellwig wrote:
> On Wed, Nov 21, 2018 at 11:31:36PM +0800, Ming Lei wrote:
> > > But while looking over this I wonder why we even need the max_seg_len
> > > here.  The only thing __bvec_iter_advance does it to move bi_bvec_done
> > > and bi_idx forward, with corresponding decrements of bi_size.  As far
> > > as I can tell the only thing that max_seg_len does is that we need
> > > to more iterations of the while loop to archive the same thing.
> > > 
> > > And actual bvec used by the caller will be obtained using
> > > bvec_iter_bvec or segment_iter_bvec depending on if they want multi-page
> > > or single-page variants.
> > 
> > Right, we let __bvec_iter_advance() serve for both multi-page and single-page
> > case, then we have to tell it via one way or another, now we use the constant
> > of 'max_seg_len'.
> > 
> > Or you suggest to implement two versions of __bvec_iter_advance()?
> 
> No - I think we can always use the code without any segment in
> bvec_iter_advance.  Because bvec_iter_advance only operates on the
> iteractor, the generation of an actual single-page or multi-page
> bvec is left to the caller using the bvec_iter_bvec or segment_iter_bvec
> helpers.  The only difference is how many bytes you can move the
> iterator forward in a single loop iteration - so if you pass in
> PAGE_SIZE as the max_seg_len you just will have to loop more often
> for a large enough bytes, but not actually do anything different.

Yeah, I see that.

The difference is made by bio_iter_iovec()/bio_iter_mp_iovec() in
__bio_for_each_segment()/__bio_for_each_bvec().


Thanks,
Ming
Ming Lei Nov. 22, 2018, 10:15 a.m. UTC | #6
On Wed, Nov 21, 2018 at 06:12:17PM +0100, Christoph Hellwig wrote:
> On Wed, Nov 21, 2018 at 05:10:25PM +0100, Christoph Hellwig wrote:
> > No - I think we can always use the code without any segment in
> > bvec_iter_advance.  Because bvec_iter_advance only operates on the
> > iteractor, the generation of an actual single-page or multi-page
> > bvec is left to the caller using the bvec_iter_bvec or segment_iter_bvec
> > helpers.  The only difference is how many bytes you can move the
> > iterator forward in a single loop iteration - so if you pass in
> > PAGE_SIZE as the max_seg_len you just will have to loop more often
> > for a large enough bytes, but not actually do anything different.
> 
> FYI, this patch reverts the max_seg_len related changes back to where
> we are in mainline, and as expected everything works fine for me:
> 
> diff --git a/include/linux/bio.h b/include/linux/bio.h
> index e5b975fa0558..926550ce2d21 100644
> --- a/include/linux/bio.h
> +++ b/include/linux/bio.h
> @@ -137,24 +137,18 @@ static inline bool bio_full(struct bio *bio)
>  	for (i = 0, iter_all.idx = 0; iter_all.idx < (bio)->bi_vcnt; iter_all.idx++)	\
>  		bvec_for_each_segment(bvl, &((bio)->bi_io_vec[iter_all.idx]), i, iter_all)
>  
> -static inline void __bio_advance_iter(struct bio *bio, struct bvec_iter *iter,
> -				      unsigned bytes, unsigned max_seg_len)
> +static inline void bio_advance_iter(struct bio *bio, struct bvec_iter *iter,
> +				    unsigned bytes)
>  {
>  	iter->bi_sector += bytes >> 9;
>  
>  	if (bio_no_advance_iter(bio))
>  		iter->bi_size -= bytes;
>  	else
> -		__bvec_iter_advance(bio->bi_io_vec, iter, bytes, max_seg_len);
> +		bvec_iter_advance(bio->bi_io_vec, iter, bytes);
>  		/* TODO: It is reasonable to complete bio with error here. */
>  }
>  
> -static inline void bio_advance_iter(struct bio *bio, struct bvec_iter *iter,
> -				    unsigned bytes)
> -{
> -	__bio_advance_iter(bio, iter, bytes, PAGE_SIZE);
> -}
> -
>  #define __bio_for_each_segment(bvl, bio, iter, start)			\
>  	for (iter = (start);						\
>  	     (iter).bi_size &&						\
> @@ -168,7 +162,7 @@ static inline void bio_advance_iter(struct bio *bio, struct bvec_iter *iter,
>  	for (iter = (start);						\
>  	     (iter).bi_size &&						\
>  		((bvl = bio_iter_mp_iovec((bio), (iter))), 1);	\
> -	     __bio_advance_iter((bio), &(iter), (bvl).bv_len, BVEC_MAX_LEN))
> +	     bio_advance_iter((bio), &(iter), (bvl).bv_len))
>  
>  /* returns one real segment(multi-page bvec) each time */
>  #define bio_for_each_bvec(bvl, bio, iter)			\
> diff --git a/include/linux/bvec.h b/include/linux/bvec.h
> index cab36d838ed0..138b4007b8f2 100644
> --- a/include/linux/bvec.h
> +++ b/include/linux/bvec.h
> @@ -25,8 +25,6 @@
>  #include <linux/errno.h>
>  #include <linux/mm.h>
>  
> -#define BVEC_MAX_LEN  ((unsigned int)-1)
> -
>  /*
>   * was unsigned short, but we might as well be ready for > 64kB I/O pages
>   */
> @@ -102,8 +100,8 @@ struct bvec_iter_all {
>  	.bv_offset	= segment_iter_offset((bvec), (iter)),	\
>  })
>  
> -static inline bool __bvec_iter_advance(const struct bio_vec *bv,
> -		struct bvec_iter *iter, unsigned bytes, unsigned max_seg_len)
> +static inline bool bvec_iter_advance(const struct bio_vec *bv,
> +		struct bvec_iter *iter, unsigned bytes)
>  {
>  	if (WARN_ONCE(bytes > iter->bi_size,
>  		     "Attempted to advance past end of bvec iter\n")) {
> @@ -112,18 +110,12 @@ static inline bool __bvec_iter_advance(const struct bio_vec *bv,
>  	}
>  
>  	while (bytes) {
> -		unsigned segment_len = segment_iter_len(bv, *iter);
> -
> -		if (max_seg_len < BVEC_MAX_LEN)
> -			segment_len = min_t(unsigned, segment_len,
> -					    max_seg_len -
> -					    bvec_iter_offset(bv, *iter));
> +		unsigned iter_len = bvec_iter_len(bv, *iter);
> +		unsigned len = min(bytes, iter_len);

It may not work to always use bvec_iter_len() here, and 'segment_len'
should be max length of the passed 'bv', however we don't know if it is
single-page or mutli-page bvec if no one tells us.

Thanks,
Ming
Christoph Hellwig Nov. 22, 2018, 10:23 a.m. UTC | #7
On Thu, Nov 22, 2018 at 06:15:28PM +0800, Ming Lei wrote:
> >  	while (bytes) {
> > -		unsigned segment_len = segment_iter_len(bv, *iter);
> > -
> > -		if (max_seg_len < BVEC_MAX_LEN)
> > -			segment_len = min_t(unsigned, segment_len,
> > -					    max_seg_len -
> > -					    bvec_iter_offset(bv, *iter));
> > +		unsigned iter_len = bvec_iter_len(bv, *iter);
> > +		unsigned len = min(bytes, iter_len);
> 
> It may not work to always use bvec_iter_len() here, and 'segment_len'
> should be max length of the passed 'bv', however we don't know if it is
> single-page or mutli-page bvec if no one tells us.

The plain revert I sent isn't totally correct in your current tree
indeed because of the odd change that segment_iter_len now is the
full bvec len, so it should be changed to that until we switch to the
sane naming scheme used elsewhere.

But except for that, it will work.  The bvec we operate on (part of the
array in the bio pointed to by the iter) is always a multi-page capable
one. We only fake up a single page on for users using segment_iter_len.

Think of what you are doing in the (I think mostly hypothetical) case
that we call bvec_iter_advance with a bytes > PAGE_SIZE on a segment
small than page size.

In this case we will limit the current iteration to the while loop
until we git a page boundary.  This means we will not hit the condition
moving to the next (real, in the array) bvec at the end of the loop,
and just go on into the next iteration of the loop with the reduced
amount of bytes.  Depending on how large byte is this might repeat
a few more times.  Then on the final one we hit the limit and move
to the next (real, in the array) bvec.

Now if we don't have the segment limit we do exactly the same thing,
just a whole lot more efficiently in a single loop iteration.
tree where segment_iter_len is the
Christoph Hellwig Nov. 22, 2018, 10:30 a.m. UTC | #8
Btw, this patch instead of the plain rever might make it a little
more clear what is going on by skipping the confusing helper altogher
and operating on the raw bvec array:


diff --git a/include/linux/bio.h b/include/linux/bio.h
index e5b975fa0558..926550ce2d21 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -137,24 +137,18 @@ static inline bool bio_full(struct bio *bio)
 	for (i = 0, iter_all.idx = 0; iter_all.idx < (bio)->bi_vcnt; iter_all.idx++)	\
 		bvec_for_each_segment(bvl, &((bio)->bi_io_vec[iter_all.idx]), i, iter_all)
 
-static inline void __bio_advance_iter(struct bio *bio, struct bvec_iter *iter,
-				      unsigned bytes, unsigned max_seg_len)
+static inline void bio_advance_iter(struct bio *bio, struct bvec_iter *iter,
+				    unsigned bytes)
 {
 	iter->bi_sector += bytes >> 9;
 
 	if (bio_no_advance_iter(bio))
 		iter->bi_size -= bytes;
 	else
-		__bvec_iter_advance(bio->bi_io_vec, iter, bytes, max_seg_len);
+		bvec_iter_advance(bio->bi_io_vec, iter, bytes);
 		/* TODO: It is reasonable to complete bio with error here. */
 }
 
-static inline void bio_advance_iter(struct bio *bio, struct bvec_iter *iter,
-				    unsigned bytes)
-{
-	__bio_advance_iter(bio, iter, bytes, PAGE_SIZE);
-}
-
 #define __bio_for_each_segment(bvl, bio, iter, start)			\
 	for (iter = (start);						\
 	     (iter).bi_size &&						\
@@ -168,7 +162,7 @@ static inline void bio_advance_iter(struct bio *bio, struct bvec_iter *iter,
 	for (iter = (start);						\
 	     (iter).bi_size &&						\
 		((bvl = bio_iter_mp_iovec((bio), (iter))), 1);	\
-	     __bio_advance_iter((bio), &(iter), (bvl).bv_len, BVEC_MAX_LEN))
+	     bio_advance_iter((bio), &(iter), (bvl).bv_len))
 
 /* returns one real segment(multi-page bvec) each time */
 #define bio_for_each_bvec(bvl, bio, iter)			\
diff --git a/include/linux/bvec.h b/include/linux/bvec.h
index cab36d838ed0..7d0f9bdb6f05 100644
--- a/include/linux/bvec.h
+++ b/include/linux/bvec.h
@@ -25,8 +25,6 @@
 #include <linux/errno.h>
 #include <linux/mm.h>
 
-#define BVEC_MAX_LEN  ((unsigned int)-1)
-
 /*
  * was unsigned short, but we might as well be ready for > 64kB I/O pages
  */
@@ -102,8 +100,8 @@ struct bvec_iter_all {
 	.bv_offset	= segment_iter_offset((bvec), (iter)),	\
 })
 
-static inline bool __bvec_iter_advance(const struct bio_vec *bv,
-		struct bvec_iter *iter, unsigned bytes, unsigned max_seg_len)
+static inline bool bvec_iter_advance(const struct bio_vec *bv,
+		struct bvec_iter *iter, unsigned bytes)
 {
 	if (WARN_ONCE(bytes > iter->bi_size,
 		     "Attempted to advance past end of bvec iter\n")) {
@@ -112,20 +110,15 @@ static inline bool __bvec_iter_advance(const struct bio_vec *bv,
 	}
 
 	while (bytes) {
-		unsigned segment_len = segment_iter_len(bv, *iter);
-
-		if (max_seg_len < BVEC_MAX_LEN)
-			segment_len = min_t(unsigned, segment_len,
-					    max_seg_len -
-					    bvec_iter_offset(bv, *iter));
+		const struct bio_vec *cur = bv + iter->bi_idx;
+		unsigned len = min3(bytes, iter->bi_size,
+				    cur->bv_len - iter->bi_bvec_done);
 
-		segment_len = min(bytes, segment_len);
-
-		bytes -= segment_len;
-		iter->bi_size -= segment_len;
-		iter->bi_bvec_done += segment_len;
+		bytes -= len;
+		iter->bi_size -= len;
+		iter->bi_bvec_done += len;
 
-		if (iter->bi_bvec_done == __bvec_iter_bvec(bv, *iter)->bv_len) {
+		if (iter->bi_bvec_done == cur->bv_len) {
 			iter->bi_bvec_done = 0;
 			iter->bi_idx++;
 		}
@@ -157,13 +150,6 @@ static inline bool bvec_iter_rewind(const struct bio_vec *bv,
 	return true;
 }
 
-static inline bool bvec_iter_advance(const struct bio_vec *bv,
-				     struct bvec_iter *iter,
-				     unsigned bytes)
-{
-	return __bvec_iter_advance(bv, iter, bytes, PAGE_SIZE);
-}
-
 #define for_each_bvec(bvl, bio_vec, iter, start)			\
 	for (iter = (start);						\
 	     (iter).bi_size &&						\
Ming Lei Nov. 22, 2018, 2:57 p.m. UTC | #9
On Thu, Nov 22, 2018 at 11:30:33AM +0100, Christoph Hellwig wrote:
> Btw, this patch instead of the plain rever might make it a little
> more clear what is going on by skipping the confusing helper altogher
> and operating on the raw bvec array:
> 
> 
> diff --git a/include/linux/bio.h b/include/linux/bio.h
> index e5b975fa0558..926550ce2d21 100644
> --- a/include/linux/bio.h
> +++ b/include/linux/bio.h
> @@ -137,24 +137,18 @@ static inline bool bio_full(struct bio *bio)
>  	for (i = 0, iter_all.idx = 0; iter_all.idx < (bio)->bi_vcnt; iter_all.idx++)	\
>  		bvec_for_each_segment(bvl, &((bio)->bi_io_vec[iter_all.idx]), i, iter_all)
>  
> -static inline void __bio_advance_iter(struct bio *bio, struct bvec_iter *iter,
> -				      unsigned bytes, unsigned max_seg_len)
> +static inline void bio_advance_iter(struct bio *bio, struct bvec_iter *iter,
> +				    unsigned bytes)
>  {
>  	iter->bi_sector += bytes >> 9;
>  
>  	if (bio_no_advance_iter(bio))
>  		iter->bi_size -= bytes;
>  	else
> -		__bvec_iter_advance(bio->bi_io_vec, iter, bytes, max_seg_len);
> +		bvec_iter_advance(bio->bi_io_vec, iter, bytes);
>  		/* TODO: It is reasonable to complete bio with error here. */
>  }
>  
> -static inline void bio_advance_iter(struct bio *bio, struct bvec_iter *iter,
> -				    unsigned bytes)
> -{
> -	__bio_advance_iter(bio, iter, bytes, PAGE_SIZE);
> -}
> -
>  #define __bio_for_each_segment(bvl, bio, iter, start)			\
>  	for (iter = (start);						\
>  	     (iter).bi_size &&						\
> @@ -168,7 +162,7 @@ static inline void bio_advance_iter(struct bio *bio, struct bvec_iter *iter,
>  	for (iter = (start);						\
>  	     (iter).bi_size &&						\
>  		((bvl = bio_iter_mp_iovec((bio), (iter))), 1);	\
> -	     __bio_advance_iter((bio), &(iter), (bvl).bv_len, BVEC_MAX_LEN))
> +	     bio_advance_iter((bio), &(iter), (bvl).bv_len))
>  
>  /* returns one real segment(multi-page bvec) each time */
>  #define bio_for_each_bvec(bvl, bio, iter)			\
> diff --git a/include/linux/bvec.h b/include/linux/bvec.h
> index cab36d838ed0..7d0f9bdb6f05 100644
> --- a/include/linux/bvec.h
> +++ b/include/linux/bvec.h
> @@ -25,8 +25,6 @@
>  #include <linux/errno.h>
>  #include <linux/mm.h>
>  
> -#define BVEC_MAX_LEN  ((unsigned int)-1)
> -
>  /*
>   * was unsigned short, but we might as well be ready for > 64kB I/O pages
>   */
> @@ -102,8 +100,8 @@ struct bvec_iter_all {
>  	.bv_offset	= segment_iter_offset((bvec), (iter)),	\
>  })
>  
> -static inline bool __bvec_iter_advance(const struct bio_vec *bv,
> -		struct bvec_iter *iter, unsigned bytes, unsigned max_seg_len)
> +static inline bool bvec_iter_advance(const struct bio_vec *bv,
> +		struct bvec_iter *iter, unsigned bytes)
>  {
>  	if (WARN_ONCE(bytes > iter->bi_size,
>  		     "Attempted to advance past end of bvec iter\n")) {
> @@ -112,20 +110,15 @@ static inline bool __bvec_iter_advance(const struct bio_vec *bv,
>  	}
>  
>  	while (bytes) {
> -		unsigned segment_len = segment_iter_len(bv, *iter);
> -
> -		if (max_seg_len < BVEC_MAX_LEN)
> -			segment_len = min_t(unsigned, segment_len,
> -					    max_seg_len -
> -					    bvec_iter_offset(bv, *iter));
> +		const struct bio_vec *cur = bv + iter->bi_idx;
> +		unsigned len = min3(bytes, iter->bi_size,
> +				    cur->bv_len - iter->bi_bvec_done);
>  
> -		segment_len = min(bytes, segment_len);
> -
> -		bytes -= segment_len;
> -		iter->bi_size -= segment_len;
> -		iter->bi_bvec_done += segment_len;
> +		bytes -= len;
> +		iter->bi_size -= len;
> +		iter->bi_bvec_done += len;
>  
> -		if (iter->bi_bvec_done == __bvec_iter_bvec(bv, *iter)->bv_len) {
> +		if (iter->bi_bvec_done == cur->bv_len) {
>  			iter->bi_bvec_done = 0;
>  			iter->bi_idx++;
>  		}

I'd rather not do the optimization part in this patchset, given it doesn't
belong to this patchset, and it may decrease readability. So I plan to revert
the delta part in V12 first.

Thanks,
Ming

Patch
diff mbox series

diff --git a/include/linux/bio.h b/include/linux/bio.h
index 056fb627edb3..7560209d6a8a 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -76,6 +76,9 @@ 
 #define bio_data_dir(bio) \
 	(op_is_write(bio_op(bio)) ? WRITE : READ)
 
+#define bio_iter_mp_iovec(bio, iter)				\
+	segment_iter_bvec((bio)->bi_io_vec, (iter))
+
 /*
  * Check whether this bio carries any data or not. A NULL bio is allowed.
  */
@@ -135,18 +138,24 @@  static inline bool bio_full(struct bio *bio)
 #define bio_for_each_segment_all(bvl, bio, i)				\
 	for (i = 0, bvl = (bio)->bi_io_vec; i < (bio)->bi_vcnt; i++, bvl++)
 
-static inline void bio_advance_iter(struct bio *bio, struct bvec_iter *iter,
-				    unsigned bytes)
+static inline void __bio_advance_iter(struct bio *bio, struct bvec_iter *iter,
+				      unsigned bytes, unsigned max_seg_len)
 {
 	iter->bi_sector += bytes >> 9;
 
 	if (bio_no_advance_iter(bio))
 		iter->bi_size -= bytes;
 	else
-		bvec_iter_advance(bio->bi_io_vec, iter, bytes);
+		__bvec_iter_advance(bio->bi_io_vec, iter, bytes, max_seg_len);
 		/* TODO: It is reasonable to complete bio with error here. */
 }
 
+static inline void bio_advance_iter(struct bio *bio, struct bvec_iter *iter,
+				    unsigned bytes)
+{
+	__bio_advance_iter(bio, iter, bytes, PAGE_SIZE);
+}
+
 #define __bio_for_each_segment(bvl, bio, iter, start)			\
 	for (iter = (start);						\
 	     (iter).bi_size &&						\
@@ -156,6 +165,16 @@  static inline void bio_advance_iter(struct bio *bio, struct bvec_iter *iter,
 #define bio_for_each_segment(bvl, bio, iter)				\
 	__bio_for_each_segment(bvl, bio, iter, (bio)->bi_iter)
 
+#define __bio_for_each_bvec(bvl, bio, iter, start)		\
+	for (iter = (start);						\
+	     (iter).bi_size &&						\
+		((bvl = bio_iter_mp_iovec((bio), (iter))), 1);	\
+	     __bio_advance_iter((bio), &(iter), (bvl).bv_len, BVEC_MAX_LEN))
+
+/* returns one real segment(multi-page bvec) each time */
+#define bio_for_each_bvec(bvl, bio, iter)			\
+	__bio_for_each_bvec(bvl, bio, iter, (bio)->bi_iter)
+
 #define bio_iter_last(bvec, iter) ((iter).bi_size == (bvec).bv_len)
 
 static inline unsigned bio_segments(struct bio *bio)
diff --git a/include/linux/bvec.h b/include/linux/bvec.h
index ed90bbf4c9c9..b279218c5c4d 100644
--- a/include/linux/bvec.h
+++ b/include/linux/bvec.h
@@ -25,6 +25,8 @@ 
 #include <linux/errno.h>
 #include <linux/mm.h>
 
+#define BVEC_MAX_LEN  ((unsigned int)-1)
+
 /*
  * was unsigned short, but we might as well be ready for > 64kB I/O pages
  */
@@ -87,8 +89,15 @@  struct bvec_iter {
 	.bv_offset	= bvec_iter_offset((bvec), (iter)),	\
 })
 
-static inline bool bvec_iter_advance(const struct bio_vec *bv,
-		struct bvec_iter *iter, unsigned bytes)
+#define segment_iter_bvec(bvec, iter)				\
+((struct bio_vec) {							\
+	.bv_page	= segment_iter_page((bvec), (iter)),	\
+	.bv_len		= segment_iter_len((bvec), (iter)),	\
+	.bv_offset	= segment_iter_offset((bvec), (iter)),	\
+})
+
+static inline bool __bvec_iter_advance(const struct bio_vec *bv,
+		struct bvec_iter *iter, unsigned bytes, unsigned max_seg_len)
 {
 	if (WARN_ONCE(bytes > iter->bi_size,
 		     "Attempted to advance past end of bvec iter\n")) {
@@ -97,12 +106,18 @@  static inline bool bvec_iter_advance(const struct bio_vec *bv,
 	}
 
 	while (bytes) {
-		unsigned iter_len = bvec_iter_len(bv, *iter);
-		unsigned len = min(bytes, iter_len);
+		unsigned segment_len = segment_iter_len(bv, *iter);
 
-		bytes -= len;
-		iter->bi_size -= len;
-		iter->bi_bvec_done += len;
+		if (max_seg_len < BVEC_MAX_LEN)
+			segment_len = min_t(unsigned, segment_len,
+					    max_seg_len -
+					    bvec_iter_offset(bv, *iter));
+
+		segment_len = min(bytes, segment_len);
+
+		bytes -= segment_len;
+		iter->bi_size -= segment_len;
+		iter->bi_bvec_done += segment_len;
 
 		if (iter->bi_bvec_done == __bvec_iter_bvec(bv, *iter)->bv_len) {
 			iter->bi_bvec_done = 0;
@@ -136,6 +151,13 @@  static inline bool bvec_iter_rewind(const struct bio_vec *bv,
 	return true;
 }
 
+static inline bool bvec_iter_advance(const struct bio_vec *bv,
+				     struct bvec_iter *iter,
+				     unsigned bytes)
+{
+	return __bvec_iter_advance(bv, iter, bytes, PAGE_SIZE);
+}
+
 #define for_each_bvec(bvl, bio_vec, iter, start)			\
 	for (iter = (start);						\
 	     (iter).bi_size &&						\