diff mbox series

[v7,1/3] bio: limit bio max size

Message ID 20210413025502.31579-2-nanich.lee@samsung.com (mailing list archive)
State New, archived
Headers show
Series limit bio max size | expand

Commit Message

Changheun Lee April 13, 2021, 2:55 a.m. UTC
bio size can grow up to 4GB when muli-page bvec is enabled.
but sometimes it would lead to inefficient behaviors.
in case of large chunk direct I/O, - 32MB chunk read in user space -
all pages for 32MB would be merged to a bio structure if the pages
physical addresses are contiguous. it makes some delay to submit
until merge complete. bio max size should be limited to a proper size.

When 32MB chunk read with direct I/O option is coming from userspace,
kernel behavior is below now in do_direct_IO() loop. it's timeline.

 | bio merge for 32MB. total 8,192 pages are merged.
 | total elapsed time is over 2ms.
 |------------------ ... ----------------------->|
                                                 | 8,192 pages merged a bio.
                                                 | at this time, first bio submit is done.
                                                 | 1 bio is split to 32 read request and issue.
                                                 |--------------->
                                                  |--------------->
                                                   |--------------->
                                                              ......
                                                                   |--------------->
                                                                    |--------------->|
                          total 19ms elapsed to complete 32MB read done from device. |

If bio max size is limited with 1MB, behavior is changed below.

 | bio merge for 1MB. 256 pages are merged for each bio.
 | total 32 bio will be made.
 | total elapsed time is over 2ms. it's same.
 | but, first bio submit timing is fast. about 100us.
 |--->|--->|--->|---> ... -->|--->|--->|--->|--->|
      | 256 pages merged a bio.
      | at this time, first bio submit is done.
      | and 1 read request is issued for 1 bio.
      |--------------->
           |--------------->
                |--------------->
                                      ......
                                                 |--------------->
                                                  |--------------->|
        total 17ms elapsed to complete 32MB read done from device. |

As a result, read request issue timing is faster if bio max size is limited.
Current kernel behavior with multipage bvec, super large bio can be created.
And it lead to delay first I/O request issue.

Signed-off-by: Changheun Lee <nanich.lee@samsung.com>
---
 block/bio.c            | 13 ++++++++++++-
 block/blk-settings.c   | 17 +++++++++++++++++
 include/linux/bio.h    |  4 +++-
 include/linux/blkdev.h |  4 ++++
 4 files changed, 36 insertions(+), 2 deletions(-)

Comments

Bart Van Assche April 13, 2021, 4:18 p.m. UTC | #1
On 4/12/21 7:55 PM, Changheun Lee wrote:
> +unsigned int bio_max_size(struct bio *bio)
> +{
> +	struct request_queue *q = bio->bi_bdev->bd_disk->queue;
> +
> +	if (blk_queue_limit_bio_size(q))
> +		return blk_queue_get_max_sectors(q, bio_op(bio))
> +			<< SECTOR_SHIFT;
> +
> +	return UINT_MAX;
> +}

This patch adds an if-statement to the hot path and that may have a
slight negative performance impact. I recommend to follow the approach
of max_hw_sectors. That means removing QUEUE_FLAG_LIMIT_BIO_SIZE and to
initialize the maximum bio size to UINT_MAX in blk_set_default_limits().

Thanks,

Bart.
Changheun Lee April 15, 2021, 10:38 a.m. UTC | #2
> On 4/12/21 7:55 PM, Changheun Lee wrote:
> > +unsigned int bio_max_size(struct bio *bio)
> > +{
> > +	struct request_queue *q = bio->bi_bdev->bd_disk->queue;
> > +
> > +	if (blk_queue_limit_bio_size(q))
> > +		return blk_queue_get_max_sectors(q, bio_op(bio))
> > +			<< SECTOR_SHIFT;
> > +
> > +	return UINT_MAX;
> > +}
> 
> This patch adds an if-statement to the hot path and that may have a
> slight negative performance impact. I recommend to follow the approach
> of max_hw_sectors. That means removing QUEUE_FLAG_LIMIT_BIO_SIZE and to
> initialize the maximum bio size to UINT_MAX in blk_set_default_limits().
> 
> Thanks,
> 
> Bart.

I modified as Bart's approach. Thanks for your advice.
It's more simple than before. I think it looks good.
Please, review below. I'll prepare new version base on this.


diff --git a/block/bio.c b/block/bio.c
index 50e579088aca..9e5061ecc317 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -255,6 +255,13 @@ void bio_init(struct bio *bio, struct bio_vec *table,
 }
 EXPORT_SYMBOL(bio_init);
 
+unsigned int bio_max_size(struct bio *bio)
+{
+	struct request_queue *q = bio->bi_bdev->bd_disk->queue;
+
+	return q->limits.bio_max_bytes;
+}
+
 /**
  * bio_reset - reinitialize a bio
  * @bio:	bio to reset
@@ -866,7 +873,7 @@ bool __bio_try_merge_page(struct bio *bio, struct page *page,
 		struct bio_vec *bv = &bio->bi_io_vec[bio->bi_vcnt - 1];
 
 		if (page_is_mergeable(bv, page, len, off, same_page)) {
-			if (bio->bi_iter.bi_size > UINT_MAX - len) {
+			if (bio->bi_iter.bi_size > bio_max_size(bio) - len) {
 				*same_page = false;
 				return false;
 			}
diff --git a/block/blk-settings.c b/block/blk-settings.c
index b4aa2f37fab6..b167e8db856b 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -37,6 +37,7 @@ EXPORT_SYMBOL_GPL(blk_queue_rq_timeout);
  */
 void blk_set_default_limits(struct queue_limits *lim)
 {
+	lim->bio_max_bytes = UINT_MAX;
 	lim->max_segments = BLK_MAX_SEGMENTS;
 	lim->max_discard_segments = 1;
 	lim->max_integrity_segments = 0;
@@ -167,6 +168,7 @@ void blk_queue_max_hw_sectors(struct request_queue *q, unsigned int max_hw_secto
 	max_sectors = round_down(max_sectors,
 				 limits->logical_block_size >> SECTOR_SHIFT);
 	limits->max_sectors = max_sectors;
+	limits->bio_max_bytes = max_sectors << SECTOR_SHIFT;
 
 	q->backing_dev_info->io_pages = max_sectors >> (PAGE_SHIFT - 9);
 }
@@ -538,6 +540,8 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
 {
 	unsigned int top, bottom, alignment, ret = 0;
 
+	t->bio_max_bytes = min_not_zero(t->bio_max_bytes, b->bio_max_bytes);
+
 	t->max_sectors = min_not_zero(t->max_sectors, b->max_sectors);
 	t->max_hw_sectors = min_not_zero(t->max_hw_sectors, b->max_hw_sectors);
 	t->max_dev_sectors = min_not_zero(t->max_dev_sectors, b->max_dev_sectors);
diff --git a/include/linux/bio.h b/include/linux/bio.h
index d0246c92a6e8..e5add63da3af 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -106,6 +106,8 @@ static inline void *bio_data(struct bio *bio)
 	return NULL;
 }
 
+extern unsigned int bio_max_size(struct bio *bio);
+
 /**
  * bio_full - check if the bio is full
  * @bio:	bio to check
@@ -119,7 +121,7 @@ static inline bool bio_full(struct bio *bio, unsigned len)
 	if (bio->bi_vcnt >= bio->bi_max_vecs)
 		return true;
 
-	if (bio->bi_iter.bi_size > UINT_MAX - len)
+	if (bio->bi_iter.bi_size > bio_max_size(bio) - len)
 		return true;
 
 	return false;
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 158aefae1030..c205d60ac611 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -312,6 +312,8 @@ enum blk_zoned_model {
 };
 
 struct queue_limits {
+	unsigned int		bio_max_bytes;
+
 	unsigned long		bounce_pfn;
 	unsigned long		seg_boundary_mask;
 	unsigned long		virt_boundary_mask;
Bart Van Assche April 15, 2021, 7:18 p.m. UTC | #3
On 4/15/21 3:38 AM, Changheun Lee wrote:
> @@ -167,6 +168,7 @@ void blk_queue_max_hw_sectors(struct request_queue *q, unsigned int max_hw_secto
>  	max_sectors = round_down(max_sectors,
>  				 limits->logical_block_size >> SECTOR_SHIFT);
>  	limits->max_sectors = max_sectors;
> +	limits->bio_max_bytes = max_sectors << SECTOR_SHIFT;
>  
>  	q->backing_dev_info->io_pages = max_sectors >> (PAGE_SHIFT - 9);
>  }

Can the new shift operation overflow? If so, how about using
check_shl_overflow()?

> @@ -538,6 +540,8 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
>  {
>  	unsigned int top, bottom, alignment, ret = 0;
>  
> +	t->bio_max_bytes = min_not_zero(t->bio_max_bytes, b->bio_max_bytes);
> +
>  	t->max_sectors = min_not_zero(t->max_sectors, b->max_sectors);
>  	t->max_hw_sectors = min_not_zero(t->max_hw_sectors, b->max_hw_sectors);
>  	t->max_dev_sectors = min_not_zero(t->max_dev_sectors, b->max_dev_sectors);

The above will limit bio_max_bytes for all stacked block devices, which
is something we do not want. I propose to set t->bio_max_bytes to
UINT_MAX in blk_stack_limits() and to let the stacked driver (e.g.
dm-crypt) decide whether or not to lower that value.

> diff --git a/include/linux/bio.h b/include/linux/bio.h
> index d0246c92a6e8..e5add63da3af 100644
> --- a/include/linux/bio.h
> +++ b/include/linux/bio.h
> @@ -106,6 +106,8 @@ static inline void *bio_data(struct bio *bio)
>  	return NULL;
>  }
>  
> +extern unsigned int bio_max_size(struct bio *bio);

You may want to define bio_max_size() as an inline function in bio.h
such that no additional function calls are introduced in the hot path.

Thanks,

Bart.
Changheun Lee April 16, 2021, 5:50 a.m. UTC | #4
> On 4/15/21 3:38 AM, Changheun Lee wrote:
> > @@ -167,6 +168,7 @@ void blk_queue_max_hw_sectors(struct request_queue *q, unsigned int max_hw_secto
> >  	max_sectors = round_down(max_sectors,
> >  				 limits->logical_block_size >> SECTOR_SHIFT);
> >  	limits->max_sectors = max_sectors;
> > +	limits->bio_max_bytes = max_sectors << SECTOR_SHIFT;
> >  
> >  	q->backing_dev_info->io_pages = max_sectors >> (PAGE_SHIFT - 9);
> >  }
> 
> Can the new shift operation overflow? If so, how about using
> check_shl_overflow()?

OK, I'll check.

> 
> > @@ -538,6 +540,8 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
> >  {
> >  	unsigned int top, bottom, alignment, ret = 0;
> >  
> > +	t->bio_max_bytes = min_not_zero(t->bio_max_bytes, b->bio_max_bytes);
> > +
> >  	t->max_sectors = min_not_zero(t->max_sectors, b->max_sectors);
> >  	t->max_hw_sectors = min_not_zero(t->max_hw_sectors, b->max_hw_sectors);
> >  	t->max_dev_sectors = min_not_zero(t->max_dev_sectors, b->max_dev_sectors);
> 
> The above will limit bio_max_bytes for all stacked block devices, which
> is something we do not want. I propose to set t->bio_max_bytes to
> UINT_MAX in blk_stack_limits() and to let the stacked driver (e.g.
> dm-crypt) decide whether or not to lower that value.
> 

Actually, bio size should be limited in dm-crypt too. Because almost I/O
from user space will be gone to dm-crypt first. I/O issue timing will be
delayed if bio size is not limited in dm-crypt.
Do you have any idea to decide whether takes lower bio max size, or not
in the stacked driver?
Add a flag to decide this in driver layer like before?
Or insert code manually in each stacked driver if it is needed?

> > diff --git a/include/linux/bio.h b/include/linux/bio.h
> > index d0246c92a6e8..e5add63da3af 100644
> > --- a/include/linux/bio.h
> > +++ b/include/linux/bio.h
> > @@ -106,6 +106,8 @@ static inline void *bio_data(struct bio *bio)
> >  	return NULL;
> >  }
> >  
> > +extern unsigned int bio_max_size(struct bio *bio);
> 
> You may want to define bio_max_size() as an inline function in bio.h
> such that no additional function calls are introduced in the hot path.

Thanks, I'll try.

> 
> Thanks,
> 
> Bart.
Bart Van Assche April 16, 2021, 3:28 p.m. UTC | #5
On 4/15/21 10:50 PM, Changheun Lee wrote:
>> On 4/15/21 3:38 AM, Changheun Lee wrote:
>>> @@ -538,6 +540,8 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
>>>  {
>>>  	unsigned int top, bottom, alignment, ret = 0;
>>>  
>>> +	t->bio_max_bytes = min_not_zero(t->bio_max_bytes, b->bio_max_bytes);
>>> +
>>>  	t->max_sectors = min_not_zero(t->max_sectors, b->max_sectors);
>>>  	t->max_hw_sectors = min_not_zero(t->max_hw_sectors, b->max_hw_sectors);
>>>  	t->max_dev_sectors = min_not_zero(t->max_dev_sectors, b->max_dev_sectors);
>>
>> The above will limit bio_max_bytes for all stacked block devices, which
>> is something we do not want. I propose to set t->bio_max_bytes to
>> UINT_MAX in blk_stack_limits() and to let the stacked driver (e.g.
>> dm-crypt) decide whether or not to lower that value.
> 
> Actually, bio size should be limited in dm-crypt too. Because almost I/O
> from user space will be gone to dm-crypt first. I/O issue timing will be
> delayed if bio size is not limited in dm-crypt.
> Do you have any idea to decide whether takes lower bio max size, or not
> in the stacked driver?
> Add a flag to decide this in driver layer like before?
> Or insert code manually in each stacked driver if it is needed?

There will be fewer stacked drivers for which the bio size has to be
limited than for which the bio size has not to be limited. Hence the
proposal to set t->bio_max_bytes to UINT_MAX in blk_stack_limits() and
to let the stacked driver (e.g. dm-crypt) decide whether or not to lower
that value.

Thanks,

Bart.
Changheun Lee April 19, 2021, 5:49 a.m. UTC | #6
> > @@ -167,6 +168,7 @@ void blk_queue_max_hw_sectors(struct request_queue *q, unsigned int max_hw_secto
> >  	max_sectors = round_down(max_sectors,
> >  				 limits->logical_block_size >> SECTOR_SHIFT);
> >  	limits->max_sectors = max_sectors;
> > +	limits->bio_max_bytes = max_sectors << SECTOR_SHIFT;
> >  
> >  	q->backing_dev_info->io_pages = max_sectors >> (PAGE_SHIFT - 9);
> >  }
> 
> Can the new shift operation overflow? If so, how about using
> check_shl_overflow()?

Actually, overflow might be not heppen in case of physical device.
But I modified as below. feedback about this.

@@ -168,6 +169,9 @@ void blk_queue_max_hw_sectors(struct request_queue *q, unsigned int max_hw_secto
 				 limits->logical_block_size >> SECTOR_SHIFT);
 	limits->max_sectors = max_sectors;
 
+	limits->bio_max_bytes = check_shl_overflow(max_sectors, SECTOR_SHIFT,
+		&limits->bio_max_bytes) ? UINT_MAX : max_sectors << SECTOR_SHIFT;
+
 	q->backing_dev_info->io_pages = max_sectors >> (PAGE_SHIFT - 9);
 }
 EXPORT_SYMBOL(blk_queue_max_hw_sectors);

> 
> >>> @@ -538,6 +540,8 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
> >>>  {
> >>>  	unsigned int top, bottom, alignment, ret = 0;
> >>>  
> >>> +	t->bio_max_bytes = min_not_zero(t->bio_max_bytes, b->bio_max_bytes);
> >>> +
> >>>  	t->max_sectors = min_not_zero(t->max_sectors, b->max_sectors);
> >>>  	t->max_hw_sectors = min_not_zero(t->max_hw_sectors, b->max_hw_sectors);
> >>>  	t->max_dev_sectors = min_not_zero(t->max_dev_sectors, b->max_dev_sectors);
> >>
> >> The above will limit bio_max_bytes for all stacked block devices, which
> >> is something we do not want. I propose to set t->bio_max_bytes to
> >> UINT_MAX in blk_stack_limits() and to let the stacked driver (e.g.
> >> dm-crypt) decide whether or not to lower that value.
> > 
> > Actually, bio size should be limited in dm-crypt too. Because almost I/O
> > from user space will be gone to dm-crypt first. I/O issue timing will be
> > delayed if bio size is not limited in dm-crypt.
> > Do you have any idea to decide whether takes lower bio max size, or not
> > in the stacked driver?
> > Add a flag to decide this in driver layer like before?
> > Or insert code manually in each stacked driver if it is needed?
> 
> There will be fewer stacked drivers for which the bio size has to be
> limited than for which the bio size has not to be limited. Hence the
> proposal to set t->bio_max_bytes to UINT_MAX in blk_stack_limits() and
> to let the stacked driver (e.g. dm-crypt) decide whether or not to lower
> that value.

I see what you said. I'll set t->bio_max_bytes to UINT_MAX in
blk_stack_limits() as you mentioned.

> 
> > diff --git a/include/linux/bio.h b/include/linux/bio.h
> > index d0246c92a6e8..e5add63da3af 100644
> > --- a/include/linux/bio.h
> > +++ b/include/linux/bio.h
> > @@ -106,6 +106,8 @@ static inline void *bio_data(struct bio *bio)
> >  	return NULL;
> >  }
> >  
> > +extern unsigned int bio_max_size(struct bio *bio);
> 
> You may want to define bio_max_size() as an inline function in bio.h
> such that no additional function calls are introduced in the hot path.

I tried, but it is not easy. because request_queue structure of blkdev.h
should be referred in bio.h. I think it's not good to apply as a inline function.


Thanks,

Changheun Lee.
Bart Van Assche April 19, 2021, 10:16 p.m. UTC | #7
On 4/18/21 10:49 PM, Changheun Lee wrote:
>>> @@ -167,6 +168,7 @@ void blk_queue_max_hw_sectors(struct request_queue *q, unsigned int max_hw_secto
>>>  	max_sectors = round_down(max_sectors,
>>>  				 limits->logical_block_size >> SECTOR_SHIFT);
>>>  	limits->max_sectors = max_sectors;
>>> +	limits->bio_max_bytes = max_sectors << SECTOR_SHIFT;
>>>  
>>>  	q->backing_dev_info->io_pages = max_sectors >> (PAGE_SHIFT - 9);
>>>  }
>>
>> Can the new shift operation overflow? If so, how about using
>> check_shl_overflow()?
> 
> Actually, overflow might be not heppen in case of physical device.
> But I modified as below. feedback about this.
> 
> @@ -168,6 +169,9 @@ void blk_queue_max_hw_sectors(struct request_queue *q, unsigned int max_hw_secto
>  				 limits->logical_block_size >> SECTOR_SHIFT);
>  	limits->max_sectors = max_sectors;
>  
> +	limits->bio_max_bytes = check_shl_overflow(max_sectors, SECTOR_SHIFT,
> +		&limits->bio_max_bytes) ? UINT_MAX : max_sectors << SECTOR_SHIFT;
> +
>  	q->backing_dev_info->io_pages = max_sectors >> (PAGE_SHIFT - 9);
>  }
>  EXPORT_SYMBOL(blk_queue_max_hw_sectors);

If no overflow occurs, check_shl_overflow() stores the result in the
memory location the third argument points at. So the above expression
can be simplified into the following:

if (check_shl_overflow(max_sectors, SECTOR_SHIFT, &limits->bio_max_bytes)) {
	limits->bio_max_bytes = UINT_MAX;
}

>>> diff --git a/include/linux/bio.h b/include/linux/bio.h
>>> index d0246c92a6e8..e5add63da3af 100644
>>> --- a/include/linux/bio.h
>>> +++ b/include/linux/bio.h
>>> @@ -106,6 +106,8 @@ static inline void *bio_data(struct bio *bio)
>>>  	return NULL;
>>>  }
>>>  
>>> +extern unsigned int bio_max_size(struct bio *bio);
>>
>> You may want to define bio_max_size() as an inline function in bio.h
>> such that no additional function calls are introduced in the hot path.
> 
> I tried, but it is not easy. because request_queue structure of blkdev.h
> should be referred in bio.h. I think it's not good to apply as a inline function.

Please don't worry about this. Inlining bio_max_size() is not a big
concern to me.

Thanks,

Bart.
Changheun Lee April 20, 2021, 12:53 a.m. UTC | #8
> On 4/18/21 10:49 PM, Changheun Lee wrote:
> >>> @@ -167,6 +168,7 @@ void blk_queue_max_hw_sectors(struct request_queue *q, unsigned int max_hw_secto
> >>>  	max_sectors = round_down(max_sectors,
> >>>  				 limits->logical_block_size >> SECTOR_SHIFT);
> >>>  	limits->max_sectors = max_sectors;
> >>> +	limits->bio_max_bytes = max_sectors << SECTOR_SHIFT;
> >>>  
> >>>  	q->backing_dev_info->io_pages = max_sectors >> (PAGE_SHIFT - 9);
> >>>  }
> >>
> >> Can the new shift operation overflow? If so, how about using
> >> check_shl_overflow()?
> > 
> > Actually, overflow might be not heppen in case of physical device.
> > But I modified as below. feedback about this.
> > 
> > @@ -168,6 +169,9 @@ void blk_queue_max_hw_sectors(struct request_queue *q, unsigned int max_hw_secto
> >  				 limits->logical_block_size >> SECTOR_SHIFT);
> >  	limits->max_sectors = max_sectors;
> >  
> > +	limits->bio_max_bytes = check_shl_overflow(max_sectors, SECTOR_SHIFT,
> > +		&limits->bio_max_bytes) ? UINT_MAX : max_sectors << SECTOR_SHIFT;
> > +
> >  	q->backing_dev_info->io_pages = max_sectors >> (PAGE_SHIFT - 9);
> >  }
> >  EXPORT_SYMBOL(blk_queue_max_hw_sectors);
> 
> If no overflow occurs, check_shl_overflow() stores the result in the
> memory location the third argument points at. So the above expression
> can be simplified into the following:
> 
> if (check_shl_overflow(max_sectors, SECTOR_SHIFT, &limits->bio_max_bytes)) {
> 	limits->bio_max_bytes = UINT_MAX;
> }

OK. No problem. It might be more readable.

> 
> >>> diff --git a/include/linux/bio.h b/include/linux/bio.h
> >>> index d0246c92a6e8..e5add63da3af 100644
> >>> --- a/include/linux/bio.h
> >>> +++ b/include/linux/bio.h
> >>> @@ -106,6 +106,8 @@ static inline void *bio_data(struct bio *bio)
> >>>  	return NULL;
> >>>  }
> >>>  
> >>> +extern unsigned int bio_max_size(struct bio *bio);
> >>
> >> You may want to define bio_max_size() as an inline function in bio.h
> >> such that no additional function calls are introduced in the hot path.
> > 
> > I tried, but it is not easy. because request_queue structure of blkdev.h
> > should be referred in bio.h. I think it's not good to apply as a inline function.
> 
> Please don't worry about this. Inlining bio_max_size() is not a big
> concern to me.
> 
> Thanks,
> 
> Bart.

I think removing of UINT_MAX setting in blk_stack_limits() might be good.
Because default queue limits for stacking device will be set in
blk_set_stacking_limits(), and blk_set_default_limits() will be called
automatically. So setting of UINT_MAX in blk_stack_limits() is not needed.
And setting in blk_stack_limits() can overwrite bio_max_bytes as a default
after stacking driver set to proper bio_max_bytes value.


Thanks,

Changheun Lee.
diff mbox series

Patch

diff --git a/block/bio.c b/block/bio.c
index 50e579088aca..4bad97f1d37b 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -255,6 +255,17 @@  void bio_init(struct bio *bio, struct bio_vec *table,
 }
 EXPORT_SYMBOL(bio_init);
 
+unsigned int bio_max_size(struct bio *bio)
+{
+	struct request_queue *q = bio->bi_bdev->bd_disk->queue;
+
+	if (blk_queue_limit_bio_size(q))
+		return blk_queue_get_max_sectors(q, bio_op(bio))
+			<< SECTOR_SHIFT;
+
+	return UINT_MAX;
+}
+
 /**
  * bio_reset - reinitialize a bio
  * @bio:	bio to reset
@@ -866,7 +877,7 @@  bool __bio_try_merge_page(struct bio *bio, struct page *page,
 		struct bio_vec *bv = &bio->bi_io_vec[bio->bi_vcnt - 1];
 
 		if (page_is_mergeable(bv, page, len, off, same_page)) {
-			if (bio->bi_iter.bi_size > UINT_MAX - len) {
+			if (bio->bi_iter.bi_size > bio_max_size(bio) - len) {
 				*same_page = false;
 				return false;
 			}
diff --git a/block/blk-settings.c b/block/blk-settings.c
index b4aa2f37fab6..1d94b97cea4f 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -928,6 +928,23 @@  void blk_queue_set_zoned(struct gendisk *disk, enum blk_zoned_model model)
 }
 EXPORT_SYMBOL_GPL(blk_queue_set_zoned);
 
+/**
+ * blk_queue_set_limit_bio_size - set limit bio size flag
+ * @q:		the request queue for the device
+ * @limit:	limit bio size on(true), or off
+ *
+ * bio max size will be limited to queue max sectors size,
+ * if limit is true.
+ */
+void blk_queue_set_limit_bio_size(struct request_queue *q, bool limit)
+{
+	if (limit)
+		blk_queue_flag_set(QUEUE_FLAG_LIMIT_BIO_SIZE, q);
+	else
+		blk_queue_flag_clear(QUEUE_FLAG_LIMIT_BIO_SIZE, q);
+}
+EXPORT_SYMBOL_GPL(blk_queue_set_limit_bio_size);
+
 static int __init blk_settings_init(void)
 {
 	blk_max_low_pfn = max_low_pfn - 1;
diff --git a/include/linux/bio.h b/include/linux/bio.h
index d0246c92a6e8..e5add63da3af 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -106,6 +106,8 @@  static inline void *bio_data(struct bio *bio)
 	return NULL;
 }
 
+extern unsigned int bio_max_size(struct bio *bio);
+
 /**
  * bio_full - check if the bio is full
  * @bio:	bio to check
@@ -119,7 +121,7 @@  static inline bool bio_full(struct bio *bio, unsigned len)
 	if (bio->bi_vcnt >= bio->bi_max_vecs)
 		return true;
 
-	if (bio->bi_iter.bi_size > UINT_MAX - len)
+	if (bio->bi_iter.bi_size > bio_max_size(bio) - len)
 		return true;
 
 	return false;
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 158aefae1030..c69a6ed7a189 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -617,6 +617,7 @@  struct request_queue {
 #define QUEUE_FLAG_RQ_ALLOC_TIME 27	/* record rq->alloc_time_ns */
 #define QUEUE_FLAG_HCTX_ACTIVE	28	/* at least one blk-mq hctx is active */
 #define QUEUE_FLAG_NOWAIT       29	/* device supports NOWAIT */
+#define QUEUE_FLAG_LIMIT_BIO_SIZE 30	/* limit bio size */
 
 #define QUEUE_FLAG_MQ_DEFAULT	((1 << QUEUE_FLAG_IO_STAT) |		\
 				 (1 << QUEUE_FLAG_SAME_COMP) |		\
@@ -663,6 +664,8 @@  bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q);
 #define blk_queue_fua(q)	test_bit(QUEUE_FLAG_FUA, &(q)->queue_flags)
 #define blk_queue_registered(q)	test_bit(QUEUE_FLAG_REGISTERED, &(q)->queue_flags)
 #define blk_queue_nowait(q)	test_bit(QUEUE_FLAG_NOWAIT, &(q)->queue_flags)
+#define blk_queue_limit_bio_size(q)	\
+	test_bit(QUEUE_FLAG_LIMIT_BIO_SIZE, &(q)->queue_flags)
 
 extern void blk_set_pm_only(struct request_queue *q);
 extern void blk_clear_pm_only(struct request_queue *q);
@@ -1183,6 +1186,7 @@  extern void blk_queue_required_elevator_features(struct request_queue *q,
 						 unsigned int features);
 extern bool blk_queue_can_use_dma_map_merging(struct request_queue *q,
 					      struct device *dev);
+extern void blk_queue_set_limit_bio_size(struct request_queue *q, bool limit);
 
 /*
  * Number of physical segments as sent to the device.