diff mbox series

[v5,1/3] block: bio-integrity: add PI iovec to bio

Message ID 20220920144618.1111138-2-a.buev@yadro.com (mailing list archive)
State New
Headers show
Series implement direct IO with integrity | expand

Commit Message

Alexander V. Buev Sept. 20, 2022, 2:46 p.m. UTC
Added functions to attach user PI iovec pages to bio and release this
pages via bio_integrity_free.

Signed-off-by: Alexander V. Buev <a.buev@yadro.com>
---
 block/bio-integrity.c | 163 ++++++++++++++++++++++++++++++++++++++++++
 include/linux/bio.h   |   8 +++
 2 files changed, 171 insertions(+)

Comments

Christoph Hellwig Sept. 27, 2022, 7:47 a.m. UTC | #1
On Tue, Sep 20, 2022 at 05:46:16PM +0300, Alexander V. Buev wrote:
> Added functions to attach user PI iovec pages to bio and release this
> pages via bio_integrity_free.

I'd much prefer if for the first version we could just go down the
copy_from/to_user route as mentioned below and avoid the extra
complexity of the get_user_pages path.  Once we get the interface
right we can consider adding the get_user_pages version back if
we have benchmarks for relevant workloads that justify it.  In that
case we'll also need into refactoring thing so that more code is
shared with the bio data path.
diff mbox series

Patch

diff --git a/block/bio-integrity.c b/block/bio-integrity.c
index 3f5685c00e36..bd6b74ae2c95 100644
--- a/block/bio-integrity.c
+++ b/block/bio-integrity.c
@@ -10,6 +10,7 @@ 
 #include <linux/mempool.h>
 #include <linux/export.h>
 #include <linux/bio.h>
+#include <linux/uio.h>
 #include <linux/workqueue.h>
 #include <linux/slab.h>
 #include "blk.h"
@@ -91,6 +92,18 @@  struct bio_integrity_payload *bio_integrity_alloc(struct bio *bio,
 }
 EXPORT_SYMBOL(bio_integrity_alloc);
 
+void bio_integrity_release_pages(struct bio *bio)
+{
+	struct bio_integrity_payload *bip = bio_integrity(bio);
+	struct bio_vec *bv = bip->bip_vec;
+	unsigned short i;
+
+	for (i = 0; i < bip->bip_vcnt; i++) {
+		put_page(bv->bv_page);
+		bv++;
+	}
+}
+
 /**
  * bio_integrity_free - Free bio integrity payload
  * @bio:	bio containing bip to be freed
@@ -105,6 +118,10 @@  void bio_integrity_free(struct bio *bio)
 
 	if (bip->bip_flags & BIP_BLOCK_INTEGRITY)
 		kfree(bvec_virt(bip->bip_vec));
+	else {
+		if (bip->bip_flags & BIP_RELEASE_PAGES)
+			bio_integrity_release_pages(bio);
+	}
 
 	__bio_integrity_free(bs, bip);
 	bio->bi_integrity = NULL;
@@ -378,6 +395,152 @@  void bio_integrity_advance(struct bio *bio, unsigned int bytes_done)
 	bvec_iter_advance(bip->bip_vec, &bip->bip_iter, bytes);
 }
 
+static inline
+struct page **__bio_integrity_temp_pages(struct bio *bio, unsigned int nr_needed_page)
+{
+	unsigned int nr_avail_page = 0;
+	struct bio_integrity_payload *bip = bio_integrity(bio);
+
+	if (bip->bip_max_vcnt > nr_needed_page) {
+		nr_avail_page = (bip->bip_max_vcnt - nr_needed_page) *
+			sizeof(struct bio_vec)/sizeof(struct page *);
+	}
+
+	if (nr_avail_page >= nr_needed_page)
+		return (struct page **) (bip->bip_vec + nr_needed_page);
+	else {
+		if (bio->bi_max_vecs - bio->bi_vcnt) {
+			nr_avail_page = (bio->bi_max_vecs - bio->bi_vcnt) *
+				sizeof(struct bio_vec)/sizeof(struct page *);
+			if (nr_avail_page >= nr_needed_page)
+				return (struct page **) (bio->bi_io_vec + bio->bi_vcnt);
+		}
+	}
+
+	return NULL;
+}
+
+/**
+ * bio_integrity_add_iovec - Add PI io vector
+ * @bio:	bio whose integrity vector to update
+ * @pi_iter:	iov_iter pointed to data added to @bio's integrity
+ *
+ * Description: Pins pages for *pi_iov and appends them to @bio's integrity.
+ */
+int bio_integrity_add_iovec(struct bio *bio, struct iov_iter *pi_iter)
+{
+	struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev);
+	struct bio_integrity_payload *bip;
+	struct page **pi_page = 0, **bio_page;
+	unsigned int nr_vec_page;
+	int ret;
+	ssize_t size;
+	size_t offset, pg_num, page_count;
+
+	if (unlikely(!(bi && bi->flags & BLK_INTEGRITY_DEVICE_CAPABLE))) {
+		pr_err("Device %d:%d is not integrity capable",
+			MAJOR(bio->bi_bdev->bd_dev), MINOR(bio->bi_bdev->bd_dev));
+		return -EINVAL;
+	}
+
+	nr_vec_page = iov_iter_npages(pi_iter,
+		queue_max_integrity_segments(bdev_get_queue(bio->bi_bdev)));
+	bip = bio_integrity(bio);
+	if (bip) {
+		if (nr_vec_page > (bip->bip_max_vcnt - bip->bip_vcnt))
+			return -ENOMEM;
+	} else {
+		bip = bio_integrity_alloc(bio, GFP_NOIO, nr_vec_page);
+		if (IS_ERR(bip))
+			return PTR_ERR(bip);
+	}
+
+	/* get space for page pointers array */
+	bio_page = __bio_integrity_temp_pages(bio, nr_vec_page);
+
+	if (likely(bio_page))
+		pi_page = bio_page;
+	else {
+		pi_page = kcalloc(nr_vec_page,
+					sizeof(struct pi_page *), GFP_NOIO);
+		if (!pi_page) {
+			ret = -ENOMEM;
+			goto error;
+		}
+	}
+
+	bip->bip_iter.bi_size = pi_iter->count;
+	bip->bio_iter = bio->bi_iter;
+	bip_set_seed(bip, bio->bi_iter.bi_sector);
+
+	if (bi->flags & BLK_INTEGRITY_IP_CHECKSUM)
+		bip->bip_flags |= BIP_IP_CHECKSUM;
+
+	do {
+		size = iov_iter_get_pages2(pi_iter, pi_page, LONG_MAX,
+						nr_vec_page, &offset);
+		if (unlikely(size <= 0)) {
+			pr_err("Failed to pin integrity buffer for %d:%d\n",
+				MAJOR(bio->bi_bdev->bd_dev),
+				MINOR(bio->bi_bdev->bd_dev));
+			pr_err("Buffer size=%zu pages=%u err=%zi\n",
+				pi_iter->count, nr_vec_page, size);
+			ret = (size) ? size : -EFAULT;
+			goto error;
+		}
+
+		page_count = DIV_ROUND_UP(offset + size, PAGE_SIZE);
+
+		/* fill bio integrity biovecs the given pages */
+		for (pg_num = 0; pg_num < page_count; ++pg_num) {
+			size_t page_len;
+
+			page_len = min_t(size_t, PAGE_SIZE - offset, size);
+			ret = bio_integrity_add_page(bio, pi_page[pg_num],
+							page_len, offset);
+			if (unlikely(ret != page_len)) {
+				while ((1 + pg_num) > 0) {
+					put_page(pi_page[pg_num]);
+					pg_num--;
+				}
+				ret = -ENOMEM;
+				goto error;
+			}
+			size -= page_len;
+			offset = 0;
+			bip->bip_flags |= BIP_RELEASE_PAGES;
+		}
+
+		nr_vec_page -= page_count;
+
+	} while (pi_iter->count && nr_vec_page);
+
+
+	if (pi_iter->count) {
+		pr_err("Failed to pin whole integrity buffer for %d:%d\n",
+			MAJOR(bio->bi_bdev->bd_dev),
+			MINOR(bio->bi_bdev->bd_dev));
+		pr_err("Data of size=%zi not pined\n", pi_iter->count);
+		ret = -EFAULT;
+		goto error;
+	}
+
+	if (pi_page != bio_page)
+		kfree(pi_page);
+
+	return 0;
+
+error:
+	if (bio_integrity(bio))
+		bio_integrity_free(bio);
+
+	if (pi_page && pi_page != bio_page)
+		kfree(pi_page);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(bio_integrity_add_iovec);
+
 /**
  * bio_integrity_trim - Trim integrity vector
  * @bio:	bio whose integrity vector to update
diff --git a/include/linux/bio.h b/include/linux/bio.h
index ca22b06700a9..e7e328425c90 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -317,6 +317,7 @@  enum bip_flags {
 	BIP_CTRL_NOCHECK	= 1 << 2, /* disable HBA integrity checking */
 	BIP_DISK_NOCHECK	= 1 << 3, /* disable disk integrity checking */
 	BIP_IP_CHECKSUM		= 1 << 4, /* IP checksum */
+	BIP_RELEASE_PAGES	= 1 << 5, /* release pages after io completion */
 };
 
 /*
@@ -699,6 +700,7 @@  extern struct bio_integrity_payload *bio_integrity_alloc(struct bio *, gfp_t, un
 extern int bio_integrity_add_page(struct bio *, struct page *, unsigned int, unsigned int);
 extern bool bio_integrity_prep(struct bio *);
 extern void bio_integrity_advance(struct bio *, unsigned int);
+extern int bio_integrity_add_iovec(struct bio *bio, struct iov_iter *iter);
 extern void bio_integrity_trim(struct bio *);
 extern int bio_integrity_clone(struct bio *, struct bio *, gfp_t);
 extern int bioset_integrity_create(struct bio_set *, int);
@@ -739,6 +741,12 @@  static inline void bio_integrity_advance(struct bio *bio,
 	return;
 }
 
+static inline int bio_integrity_add_iovec(struct bio *bio,
+					struct iov_iter *pi_iter)
+{
+	return 0;
+}
+
 static inline void bio_integrity_trim(struct bio *bio)
 {
 	return;