diff mbox series

[v6,20/51] block: Add bio_for_each_thp_segment_all

Message ID 20200610201345.13273-21-willy@infradead.org (mailing list archive)
State New, archived
Headers show
Series Large pages in the page cache | expand

Commit Message

Matthew Wilcox June 10, 2020, 8:13 p.m. UTC
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>

Iterate once for each THP instead of once for each base page.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
 include/linux/bio.h  | 13 +++++++++++++
 include/linux/bvec.h | 23 +++++++++++++++++++++++
 2 files changed, 36 insertions(+)

Comments

Matthew Wilcox June 11, 2020, 6:20 p.m. UTC | #1
On Wed, Jun 10, 2020 at 01:13:14PM -0700, Matthew Wilcox wrote:
> +static inline void bvec_thp_advance(const struct bio_vec *bvec,
> +				struct bvec_iter_all *iter_all)
> +{
> +	struct bio_vec *bv = &iter_all->bv;
> +	unsigned int page_size = thp_size(bvec->bv_page);
> +
> +	if (iter_all->done) {
> +		bv->bv_page += thp_nr_pages(bv->bv_page);
> +		bv->bv_offset = 0;
> +	} else {
> +		BUG_ON(bvec->bv_offset >= page_size);
> +		bv->bv_page = bvec->bv_page;
> +		bv->bv_offset = bvec->bv_offset & (page_size - 1);
> +	}
> +	bv->bv_len = min(page_size - bv->bv_offset,
> +			 bvec->bv_len - iter_all->done);
> +	iter_all->done += bv->bv_len;
> +
> +	if (iter_all->done == bvec->bv_len) {
> +		iter_all->idx++;
> +		iter_all->done = 0;
> +	}
> +}

If, for example, we have an order-2 page followed by two order-0 pages
(thanks, generic/127!) in the bvec, we'll end up skipping the third
page because we calculate the size based on bvec->bv_page instead of
bv->bv_page.

+++ b/include/linux/bvec.h
@@ -166,15 +166,19 @@ static inline void bvec_thp_advance(const struct bio_vec *bvec,
                                struct bvec_iter_all *iter_all)
 {
        struct bio_vec *bv = &iter_all->bv;
-       unsigned int page_size = thp_size(bvec->bv_page);
+       unsigned int page_size;
 
        if (iter_all->done) {
                bv->bv_page += thp_nr_pages(bv->bv_page);
+               page_size = thp_size(bv->bv_page);
                bv->bv_offset = 0;
        } else {
-               BUG_ON(bvec->bv_offset >= page_size);
-               bv->bv_page = bvec->bv_page;
-               bv->bv_offset = bvec->bv_offset & (page_size - 1);
+               bv->bv_page = thp_head(bvec->bv_page +
+                               (bvec->bv_offset >> PAGE_SHIFT));
+               page_size = thp_size(bv->bv_page);
+               bv->bv_offset = bvec->bv_offset -
+                               (bv->bv_page - bvec->bv_page) * PAGE_SIZE;
+               BUG_ON(bv->bv_offset >= page_size);
        }
        bv->bv_len = min(page_size - bv->bv_offset,
                         bvec->bv_len - iter_all->done);

The previous code also wasn't handling the case fixed in 6bedf00e55e5
where a split bio might end up splitting a bvec.  That BUG_ON can probably
come out after a few months of testing.
diff mbox series

Patch

diff --git a/include/linux/bio.h b/include/linux/bio.h
index 91676d4b2dfe..1489e196abf5 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -131,12 +131,25 @@  static inline bool bio_next_segment(const struct bio *bio,
 	return true;
 }
 
+static inline bool bio_next_thp_segment(const struct bio *bio,
+				    struct bvec_iter_all *iter)
+{
+	if (iter->idx >= bio->bi_vcnt)
+		return false;
+
+	bvec_thp_advance(&bio->bi_io_vec[iter->idx], iter);
+	return true;
+}
+
 /*
  * drivers should _never_ use the all version - the bio may have been split
  * before it got to the driver and the driver won't own all of it
  */
 #define bio_for_each_segment_all(bvl, bio, iter) \
 	for (bvl = bvec_init_iter_all(&iter); bio_next_segment((bio), &iter); )
+#define bio_for_each_thp_segment_all(bvl, bio, iter) \
+	for (bvl = bvec_init_iter_all(&iter); \
+	     bio_next_thp_segment((bio), &iter); )
 
 static inline void bio_advance_iter(const struct bio *bio,
 				    struct bvec_iter *iter, unsigned int bytes)
diff --git a/include/linux/bvec.h b/include/linux/bvec.h
index ac0c7299d5b8..71b435e573e1 100644
--- a/include/linux/bvec.h
+++ b/include/linux/bvec.h
@@ -162,4 +162,27 @@  static inline void bvec_advance(const struct bio_vec *bvec,
 	}
 }
 
+static inline void bvec_thp_advance(const struct bio_vec *bvec,
+				struct bvec_iter_all *iter_all)
+{
+	struct bio_vec *bv = &iter_all->bv;
+	unsigned int page_size = thp_size(bvec->bv_page);
+
+	if (iter_all->done) {
+		bv->bv_page += thp_nr_pages(bv->bv_page);
+		bv->bv_offset = 0;
+	} else {
+		BUG_ON(bvec->bv_offset >= page_size);
+		bv->bv_page = bvec->bv_page;
+		bv->bv_offset = bvec->bv_offset & (page_size - 1);
+	}
+	bv->bv_len = min(page_size - bv->bv_offset,
+			 bvec->bv_len - iter_all->done);
+	iter_all->done += bv->bv_len;
+
+	if (iter_all->done == bvec->bv_len) {
+		iter_all->idx++;
+		iter_all->done = 0;
+	}
+}
 #endif /* __LINUX_BVEC_ITER_H */