@@ -828,6 +828,23 @@ int bio_add_page(struct bio *bio, struct page *page,
}
EXPORT_SYMBOL(bio_add_page);
+static int __bio_iov_bvec_add_pages(struct bio *bio, struct iov_iter *iter)
+{
+ const struct bio_vec *bv = iter->bvec;
+ unsigned int len;
+ size_t size;
+
+ len = min_t(size_t, bv->bv_len, iter->count);
+ size = bio_add_page(bio, bv->bv_page, len,
+ bv->bv_offset + iter->iov_offset);
+ if (size == len) {
+ iov_iter_advance(iter, size);
+ return 0;
+ }
+
+ return -EINVAL;
+}
+
#define PAGE_PTRS_PER_BVEC (sizeof(struct bio_vec) / sizeof(struct page *))
/**
@@ -876,23 +893,43 @@ static int __bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
}
/**
- * bio_iov_iter_get_pages - pin user or kernel pages and add them to a bio
+ * bio_iov_iter_get_pages - add user or kernel pages to a bio
* @bio: bio to add pages to
- * @iter: iov iterator describing the region to be mapped
+ * @iter: iov iterator describing the region to be added
+ *
+ * This takes either an iterator pointing to user memory, or one pointing to
+ * kernel pages (BVEC iterator). If we're adding user pages, we pin them and
+ * map them into the kernel. On IO completion, the caller should put those
+ * pages. If we're adding kernel pages, we just have to add the pages to the
+ * bio directly. We don't grab an extra reference to those pages (the user
+ * should already have that), and we don't put the page on IO completion.
+ * The caller needs to check if the bio is flagged BIO_HOLD_PAGES on IO
+ * completion. If it isn't, then pages should be released.
*
- * Pins pages from *iter and appends them to @bio's bvec array. The
- * pages will have to be released using put_page() when done.
* The function tries, but does not guarantee, to pin as many pages as
- * fit into the bio, or are requested in *iter, whatever is smaller.
- * If MM encounters an error pinning the requested pages, it stops.
- * Error is returned only if 0 pages could be pinned.
+ * fit into the bio, or are requested in *iter, whatever is smaller. If
+ * MM encounters an error pinning the requested pages, it stops. Error
+ * is returned only if 0 pages could be pinned.
*/
int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
{
+ const bool is_bvec = iov_iter_is_bvec(iter);
unsigned short orig_vcnt = bio->bi_vcnt;
+ /*
+ * If this is a BVEC iter, then the pages are kernel pages. Don't
+ * release them on IO completion.
+ */
+ if (is_bvec)
+ bio_set_flag(bio, BIO_HOLD_PAGES);
+
do {
- int ret = __bio_iov_iter_get_pages(bio, iter);
+ int ret;
+
+ if (is_bvec)
+ ret = __bio_iov_bvec_add_pages(bio, iter);
+ else
+ ret = __bio_iov_iter_get_pages(bio, iter);
if (unlikely(ret))
return bio->bi_vcnt > orig_vcnt ? 0 : ret;
@@ -1634,7 +1671,8 @@ static void bio_dirty_fn(struct work_struct *work)
next = bio->bi_private;
bio_set_pages_dirty(bio);
- bio_release_pages(bio);
+ if (!bio_flagged(bio, BIO_HOLD_PAGES))
+ bio_release_pages(bio);
bio_put(bio);
}
}
@@ -1650,7 +1688,8 @@ void bio_check_pages_dirty(struct bio *bio)
goto defer;
}
- bio_release_pages(bio);
+ if (!bio_flagged(bio, BIO_HOLD_PAGES))
+ bio_release_pages(bio);
bio_put(bio);
return;
defer:
@@ -338,8 +338,9 @@ static void blkdev_bio_end_io(struct bio *bio)
struct bio_vec *bvec;
int i;
- bio_for_each_segment_all(bvec, bio, i)
- put_page(bvec->bv_page);
+ if (!bio_flagged(bio, BIO_HOLD_PAGES))
+ bio_for_each_segment_all(bvec, bio, i)
+ put_page(bvec->bv_page);
bio_put(bio);
}
}
@@ -1582,8 +1582,9 @@ static void iomap_dio_bio_end_io(struct bio *bio)
struct bio_vec *bvec;
int i;
- bio_for_each_segment_all(bvec, bio, i)
- put_page(bvec->bv_page);
+ if (!bio_flagged(bio, BIO_HOLD_PAGES))
+ bio_for_each_segment_all(bvec, bio, i)
+ put_page(bvec->bv_page);
bio_put(bio);
}
}
@@ -215,6 +215,7 @@ struct bio {
/*
* bio flags
*/
+#define BIO_HOLD_PAGES 0 /* don't put O_DIRECT pages */
#define BIO_SEG_VALID 1 /* bi_phys_segments valid */
#define BIO_CLONED 2 /* doesn't own data */
#define BIO_BOUNCED 3 /* bio is a bounce bio */
For an ITER_BVEC, we can just iterate the iov and add the pages to the bio directly. This requires that the caller doesn't releases the pages on IO completion, we add a BIO_HOLD_PAGES flag for that. The current two callers of bio_iov_iter_get_pages() are updated to check if they need to release pages on completion. This makes them work with bvecs that contain kernel mapped pages already. Signed-off-by: Jens Axboe <axboe@kernel.dk> --- block/bio.c | 59 ++++++++++++++++++++++++++++++++------- fs/block_dev.c | 5 ++-- fs/iomap.c | 5 ++-- include/linux/blk_types.h | 1 + 4 files changed, 56 insertions(+), 14 deletions(-)