diff mbox series

block: add larger order folio instead of pages for passthrough I/O

Message ID 20241114135335.21327-1-c.gameti@samsung.com (mailing list archive)
State New
Headers show
Series block: add larger order folio instead of pages for passthrough I/O | expand

Commit Message

Chinmay Gameti Nov. 14, 2024, 1:53 p.m. UTC
This patch supports adding a large folio to bio for passthrough I/Os,
when mTHP is enabled. Similar to block I/O folio changes [1].

These changes reduce the overhead incurred by bio_map_user_iov for
passthrough I/O.

Perf diff before and after this change:

perf diff for write I/O with 64K block size:
        0.84%   -0.49%  [kernel.kallsyms]       [k] bio_map_user_iov
        1.23%           [kernel.kallsyms]       [k] bvec_try_merge_page
        0.88%           [kernel.kallsyms]       [k] bvec_try_merge_hw_page
perf diff for read I/O with 64K block size:
        1.74%   -1.1%   [kernel.kallsyms]       [k] bio_map_user_iov
        2.39%           [kernel.kallsyms]       [k] bvec_try_merge_page
        1.74%           [kernel.kallsyms]       [k] bvec_try_merge_hw_page

perf diff for write I/O with 128K block size:
        0.84%   -0.6%   [kernel.kallsyms]       [k] bio_map_user_iov
        1.49%           [kernel.kallsyms]       [k] bvec_try_merge_page
        1.19%           [kernel.kallsyms]       [k] bvec_try_merge_hw_page
perf diff for read I/O with 128K block size:
        2.35%   -1.56%  [kernel.kallsyms]       [k] bio_map_user_iov
        4.53%           [kernel.kallsyms]       [k] bvec_try_merge_page
        3.39%           [kernel.kallsyms]       [k] bvec_try_merge_hw_page

[1] commit ed9832bc08db ("block: introduce folio awareness and add a
bigger size from folio")

Signed-off-by: Chinmay Gameti <c.gameti@samsung.com>
Signed-off-by: Kundan Kumar <kundan.kumar@samsung.com>
---
 block/bio.c         |  8 ++++----
 block/blk-map.c     | 31 +++++++++++++++++++++++--------
 include/linux/bio.h |  4 ++++
 3 files changed, 31 insertions(+), 12 deletions(-)

Comments

Christoph Hellwig Nov. 14, 2024, 4:17 p.m. UTC | #1
On Thu, Nov 14, 2024 at 07:23:35PM +0530, Chinmay Gameti wrote:
> +unsigned int get_contig_folio_len(unsigned int *num_pages,
> +				  struct page **pages, unsigned int i,
> +				  struct folio *folio, size_t left,
> +				  size_t offset)

Not a good name for a non-static function (not even for a stic
one to be honest).

> @@ -313,21 +314,35 @@ static int bio_map_user_iov(struct request *rq, struct iov_iter *iter,
>  		if (unlikely(offs & queue_dma_alignment(rq->q)))
>  			j = 0;
>  		else {
> -			for (j = 0; j < npages; j++) {
> +			for (j = 0; j < npages; j += num_pages) {
>  				struct page *page = pages[j];
> -				unsigned int n = PAGE_SIZE - offs;
> +				struct folio *folio = page_folio(page);
>  				bool same_page = false;
>  
> -				if (n > bytes)
> -					n = bytes;
>  
> -				if (!bio_add_hw_page(rq->q, bio, page, n, offs,
> -						     max_sectors, &same_page))
> +				folio_offset = ((size_t)folio_page_idx(folio,
> +						page) << PAGE_SHIFT) + offs;

I'm not sure if Jens want to rush something like this in for 6.13, but if
we're aiming for the next merge window I actually have a 3/4 done series
that rips out bio_add_hw_page and all the passthrough special casing by
simply running the 'do we need to split the bio' helper on the free-form
bio and return an error if we do.  That means all this code will go away,
and you'll automatically get all the work done for the normal path for
passthrough as well.

> diff --git a/include/linux/bio.h b/include/linux/bio.h
> index 60830a6a5939..1e5fbc875ecc 100644
> --- a/include/linux/bio.h
> +++ b/include/linux/bio.h
> @@ -422,6 +422,10 @@ void __bio_add_page(struct bio *bio, struct page *page,
>  		unsigned int len, unsigned int off);
>  void bio_add_folio_nofail(struct bio *bio, struct folio *folio, size_t len,
>  			  size_t off);
> +unsigned int get_contig_folio_len(unsigned int *num_pages,
> +				  struct page **pages, unsigned int i,
> +				  struct folio *folio, size_t left,
> +				  size_t offset);

And this really should bot be in a public header.
Jens Axboe Nov. 14, 2024, 7:59 p.m. UTC | #2
On 11/14/24 9:17 AM, Christoph Hellwig wrote:
>> @@ -313,21 +314,35 @@ static int bio_map_user_iov(struct request *rq, struct iov_iter *iter,
>>  		if (unlikely(offs & queue_dma_alignment(rq->q)))
>>  			j = 0;
>>  		else {
>> -			for (j = 0; j < npages; j++) {
>> +			for (j = 0; j < npages; j += num_pages) {
>>  				struct page *page = pages[j];
>> -				unsigned int n = PAGE_SIZE - offs;
>> +				struct folio *folio = page_folio(page);
>>  				bool same_page = false;
>>  
>> -				if (n > bytes)
>> -					n = bytes;
>>  
>> -				if (!bio_add_hw_page(rq->q, bio, page, n, offs,
>> -						     max_sectors, &same_page))
>> +				folio_offset = ((size_t)folio_page_idx(folio,
>> +						page) << PAGE_SHIFT) + offs;
> 
> I'm not sure if Jens want to rush something like this in for 6.13, but if
> we're aiming for the next merge window I actually have a 3/4 done series
> that rips out bio_add_hw_page and all the passthrough special casing by
> simply running the 'do we need to split the bio' helper on the free-form
> bio and return an error if we do.  That means all this code will go away,
> and you'll automatically get all the work done for the normal path for
> passthrough as well.

I'd rather it simmer a bit first, so I'd say we have time since 6.13 is
coming up really soon.
diff mbox series

Patch

diff --git a/block/bio.c b/block/bio.c
index 699a78c85c75..50e8b565f368 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -1203,10 +1203,10 @@  static int bio_iov_add_folio(struct bio *bio, struct folio *folio, size_t len,
 	return 0;
 }
 
-static unsigned int get_contig_folio_len(unsigned int *num_pages,
-					 struct page **pages, unsigned int i,
-					 struct folio *folio, size_t left,
-					 size_t offset)
+unsigned int get_contig_folio_len(unsigned int *num_pages,
+				  struct page **pages, unsigned int i,
+				  struct folio *folio, size_t left,
+				  size_t offset)
 {
 	size_t bytes = left;
 	size_t contig_sz = min_t(size_t, PAGE_SIZE - offset, bytes);
diff --git a/block/blk-map.c b/block/blk-map.c
index b5fd1d857461..216d08c8b0de 100644
--- a/block/blk-map.c
+++ b/block/blk-map.c
@@ -295,8 +295,9 @@  static int bio_map_user_iov(struct request *rq, struct iov_iter *iter,
 		struct page *stack_pages[UIO_FASTIOV];
 		struct page **pages = stack_pages;
 		ssize_t bytes;
-		size_t offs;
 		int npages;
+		unsigned int num_pages;
+		size_t offs, folio_offset, len;
 
 		if (nr_vecs > ARRAY_SIZE(stack_pages))
 			pages = NULL;
@@ -313,21 +314,35 @@  static int bio_map_user_iov(struct request *rq, struct iov_iter *iter,
 		if (unlikely(offs & queue_dma_alignment(rq->q)))
 			j = 0;
 		else {
-			for (j = 0; j < npages; j++) {
+			for (j = 0; j < npages; j += num_pages) {
 				struct page *page = pages[j];
-				unsigned int n = PAGE_SIZE - offs;
+				struct folio *folio = page_folio(page);
 				bool same_page = false;
 
-				if (n > bytes)
-					n = bytes;
 
-				if (!bio_add_hw_page(rq->q, bio, page, n, offs,
-						     max_sectors, &same_page))
+				folio_offset = ((size_t)folio_page_idx(folio,
+						page) << PAGE_SHIFT) + offs;
+
+				len = min(folio_size(folio) - folio_offset,
+					  (size_t)bytes);
+
+				num_pages = DIV_ROUND_UP(offs + len,
+							 PAGE_SIZE);
+
+				if (num_pages > 1)
+					len = get_contig_folio_len(
+							&num_pages, pages, j,
+							folio, (size_t)bytes,
+							offs);
+
+				if (!bio_add_hw_folio(rq->q, bio, folio, len,
+						      folio_offset, max_sectors,
+						      &same_page))
 					break;
 
 				if (same_page)
 					bio_release_page(bio, page);
-				bytes -= n;
+				bytes -= len;
 				offs = 0;
 			}
 		}
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 60830a6a5939..1e5fbc875ecc 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -422,6 +422,10 @@  void __bio_add_page(struct bio *bio, struct page *page,
 		unsigned int len, unsigned int off);
 void bio_add_folio_nofail(struct bio *bio, struct folio *folio, size_t len,
 			  size_t off);
+unsigned int get_contig_folio_len(unsigned int *num_pages,
+				  struct page **pages, unsigned int i,
+				  struct folio *folio, size_t left,
+				  size_t offset);
 int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter);
 void bio_iov_bvec_set(struct bio *bio, struct iov_iter *iter);
 void __bio_release_pages(struct bio *bio, bool mark_dirty);