diff mbox series

[v2,02/14] block: bio-integrity: modify bio_integrity_add_page()

Message ID 20230510084854epcms2p756a3e1055399ead6bf539d3419c74c3e@epcms2p7 (mailing list archive)
State New, archived
Headers show
Series Change the integrity configuration method in block | expand

Commit Message

Jinyoung Choi May 10, 2023, 8:48 a.m. UTC
Considering the constraints of hardware, the physically continuous pages
were to be composed of one bio_vec.
Previously, bio_vec was created separately for each page entering the
parameter.

The page merge process for data and integrity is almost the same. Thus,
the bio was not used as a parameter, but the values referred to in
the merge process were passed to the parameter. At this time, the
parameter could be more than seven, so the page_merge_ctx structure
was added.

Cc: Christoph Hellwig <hch@lst.de>
Cc: Martin K. Petersen <martin.petersen@oracle.com>

Fixes: 783b94bd9250 ("nvme-pci: do not build a scatterlist to map metadata")
Signed-off-by: Jinyoung Choi <j-young.choi@samsung.com>
---
 block/bio-integrity.c | 66 ++++++++++++++++++++++++++++++++++---------
 block/bio.c           | 56 +++++++++++++++++++++++-------------
 block/blk.h           | 13 +++++++++
 3 files changed, 102 insertions(+), 33 deletions(-)

Comments

Christoph Hellwig May 12, 2023, 1:43 p.m. UTC | #1
Hi Jinyoung,

can you work a bit on the commit log and especially the subject line?

I'd word this as something like:

"Subject: bio-integrity: create multi-page bvecs in bio_integrity_add_page()

Allow bio_integrity_add_page to create multi-page bvecs, just like
the bio payloads.  This simplifies adding larger payloads, and fixes
support for non-tiny workloads with nvme, which stopped using scatterlist
for metadata a while ago"

It should probably also mentioned somewhere that you did an audit to
ensure all drivers and the core code is fine with these multi-page
segments.  If it's not, this patch should only be added after that
has been made the case.

I think the extra arguments struct is a bit overcompliated, and mostly
due to me making the existing code to weird things in the low-level
helpers.  With the "rationalize the flow in bio_add_page and friends"
series I just sent out, I think we can drop the previous patch and
simplify this one down to:

diff --git a/block/bio-integrity.c b/block/bio-integrity.c
index 4533eb49166109..85d70dc723f0ed 100644
--- a/block/bio-integrity.c
+++ b/block/bio-integrity.c
@@ -118,26 +118,44 @@ void bio_integrity_free(struct bio *bio)
  * @len:	number of bytes of integrity metadata in page
  * @offset:	start offset within page
  *
- * Description: Attach a page containing integrity metadata to bio.
+ * Add a page containing integrity metadata to a bio while respecting
+ * the hardware max_sectors, max_segment and gap limitations.
  */
 int bio_integrity_add_page(struct bio *bio, struct page *page,
 			   unsigned int len, unsigned int offset)
 {
+	struct request_queue *q = bdev_get_queue(bio->bi_bdev);
 	struct bio_integrity_payload *bip = bio_integrity(bio);
 
-	if (bip->bip_vcnt >= bip->bip_max_vcnt) {
-		printk(KERN_ERR "%s: bip_vec full\n", __func__);
+	if (((bip->bip_iter.bi_size + len) >> SECTOR_SHIFT) >
+	    queue_max_hw_sectors(q))
 		return 0;
-	}
 
-	if (bip->bip_vcnt &&
-	    bvec_gap_to_prev(&bdev_get_queue(bio->bi_bdev)->limits,
-			     &bip->bip_vec[bip->bip_vcnt - 1], offset))
-		return 0;
+	if (bip->bip_vcnt > 0) {
+		struct bio_vec *bv = &bip->bip_vec[bip->bip_vcnt - 1];
+		bool same_page = false;
+
+		if (bvec_try_merge_hw_page(q, bv, page, len, offset,
+				&same_page)) {
+			bip->bip_iter.bi_size += len;
+			return len;
+		}
+
+		if (bip->bip_vcnt >=
+		    min(bip->bip_max_vcnt, queue_max_integrity_segments(q)))
+			return 0;
+
+		/*
+		 * If the queue doesn't support SG gaps and adding this segment
+		 * would create a gap, disallow it.
+		 */
+		if (bvec_gap_to_prev(&q->limits, bv, offset))
+			return 0;
+	}
 
 	bvec_set_page(&bip->bip_vec[bip->bip_vcnt], page, len, offset);
 	bip->bip_vcnt++;
-
+	bip->bip_iter.bi_size += len;
 	return len;
 }
 EXPORT_SYMBOL(bio_integrity_add_page);
@@ -249,7 +267,6 @@ bool bio_integrity_prep(struct bio *bio)
 	}
 
 	bip->bip_flags |= BIP_BLOCK_INTEGRITY;
-	bip->bip_iter.bi_size = len;
 	bip_set_seed(bip, bio->bi_iter.bi_sector);
 
 	if (bi->flags & BLK_INTEGRITY_IP_CHECKSUM)
diff --git a/block/bio.c b/block/bio.c
index 79e8aa600ddbe2..050b57e09ac362 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -934,7 +934,7 @@ static bool bvec_try_merge_page(struct bio_vec *bv, struct page *page,
  * size limit.  This is not for normal read/write bios, but for passthrough
  * or Zone Append operations that we can't split.
  */
-static bool bvec_try_merge_hw_page(struct request_queue *q, struct bio_vec *bv,
+bool bvec_try_merge_hw_page(struct request_queue *q, struct bio_vec *bv,
 		struct page *page, unsigned len, unsigned offset,
 		bool *same_page)
 {
diff --git a/block/blk.h b/block/blk.h
index 45547bcf111938..1e67f738b52191 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -486,4 +486,8 @@ static inline int req_ref_read(struct request *req)
 	return atomic_read(&req->ref);
 }
 
+bool bvec_try_merge_hw_page(struct request_queue *q, struct bio_vec *bv,
+		struct page *page, unsigned len, unsigned offset,
+		bool *same_page);
+
 #endif /* BLK_INTERNAL_H */
Jinyoung Choi May 16, 2023, 12:54 p.m. UTC | #2
>Hi Jinyoung,
>
>can you work a bit on the commit log and especially the subject line?
>
>I'd word this as something like:
>
>"Subject: bio-integrity: create multi-page bvecs in bio_integrity_add_page()
>
>Allow bio_integrity_add_page to create multi-page bvecs, just like
>the bio payloads.  This simplifies adding larger payloads, and fixes
>support for non-tiny workloads with nvme, which stopped using scatterlist
>for metadata a while ago"
>

Hi. Christoph. I checked late.

OK. I will revise it like that.

>It should probably also mentioned somewhere that you did an audit to
>ensure all drivers and the core code is fine with these multi-page
>segments.  If it's not, this patch should only be added after that
>has been made the case.

Regardless of a single-page or a multi-page configuration,
block_rq_count_integrity_sg() and block_rq_map_integry_sg() check
the pages included in the bip to create a segment for sg.
So... there doesn't seem to be a problem yet.

First, the nvme device that supports fips was tested using the above
functions, and it was also checked with the dix of scsi_debug turned on.
I am also trying to test the sas device that supports sed. It seems that
a problem occurs while going through the laid controller (mpt3sas driver).
It works normally in DIF mode, but protection error occurs when DIX mode
is forcibly activated. Of course, the same is happening in the original code.
I will check additionally and be careful not to cause any problems.

>
>I think the extra arguments struct is a bit overcompliated, and mostly
>due to me making the existing code to weird things in the low-level
>helpers.  With the "rationalize the flow in bio_add_page and friends"
>series I just sent out, I think we can drop the previous patch and
>simplify this one down to:

I tried not to make a big change from the existing logic. So I added
struct unnecessarily. I thought it would be a problem if I added 
conditions to the callers and made them call the common code.
Thank you for solving this part.
If your code merges, I will modify it accordingly and proceed.

Best Regards,
Jinyoung.
diff mbox series

Patch

diff --git a/block/bio-integrity.c b/block/bio-integrity.c
index 4533eb491661..20444ec447cb 100644
--- a/block/bio-integrity.c
+++ b/block/bio-integrity.c
@@ -111,6 +111,23 @@  void bio_integrity_free(struct bio *bio)
 	bio->bi_opf &= ~REQ_INTEGRITY;
 }
 
+/**
+ * bip_full - check if the bip is full
+ * @bip:	bip to check
+ * @len:	length of one segment to be added
+ *
+ * Return true if @bip is full and one segment with @len bytes can't be
+ * added to the bip, otherwise return false
+ */
+static inline bool bip_full(struct bio_integrity_payload *bip, unsigned int len)
+{
+	if (bip->bip_vcnt >= bip->bip_max_vcnt)
+		return true;
+	if (bip->bip_iter.bi_size > UINT_MAX - len)
+		return true;
+	return false;
+}
+
 /**
  * bio_integrity_add_page - Attach integrity metadata
  * @bio:	bio to update
@@ -118,25 +135,53 @@  void bio_integrity_free(struct bio *bio)
  * @len:	number of bytes of integrity metadata in page
  * @offset:	start offset within page
  *
- * Description: Attach a page containing integrity metadata to bio.
+ * Add a page containing integrity metadata to a bio while respecting
+ * the hardware max_sectors, max_segment and gap limitations.
  */
 int bio_integrity_add_page(struct bio *bio, struct page *page,
 			   unsigned int len, unsigned int offset)
 {
+	struct request_queue *q = bdev_get_queue(bio->bi_bdev);
 	struct bio_integrity_payload *bip = bio_integrity(bio);
 
-	if (bip->bip_vcnt >= bip->bip_max_vcnt) {
+	if (((bip->bip_iter.bi_size + len) >> 9) > queue_max_hw_sectors(q))
+		return 0;
+
+	if (bip->bip_vcnt > 0) {
+		struct bio_vec *bv = &bip->bip_vec[bip->bip_vcnt - 1];
+		bool same_page = false;
+		struct page_merge_ctx pmc = {
+			.bv = bv,
+			.bi_vcnt = bip->bip_vcnt,
+			.bi_iter = &bip->bip_iter,
+			.page = page,
+			.len = len,
+			.offset = offset,
+			.same_page = &same_page,
+		};
+
+		if (bio_try_merge_hw_seg(q, &pmc))
+			return len;
+
+		/*
+		 * If the queue doesn't support SG gaps and adding this segment
+		 * would create a gap, disallow it.
+		 */
+		if (bvec_gap_to_prev(&q->limits, bv, offset))
+			return 0;
+	}
+
+	if (bip_full(bip, len)) {
 		printk(KERN_ERR "%s: bip_vec full\n", __func__);
 		return 0;
 	}
 
-	if (bip->bip_vcnt &&
-	    bvec_gap_to_prev(&bdev_get_queue(bio->bi_bdev)->limits,
-			     &bip->bip_vec[bip->bip_vcnt - 1], offset))
+	if (bip->bip_vcnt >= queue_max_integrity_segments(q))
 		return 0;
 
 	bvec_set_page(&bip->bip_vec[bip->bip_vcnt], page, len, offset);
 	bip->bip_vcnt++;
+	bip->bip_iter.bi_size += len;
 
 	return len;
 }
@@ -249,7 +294,6 @@  bool bio_integrity_prep(struct bio *bio)
 	}
 
 	bip->bip_flags |= BIP_BLOCK_INTEGRITY;
-	bip->bip_iter.bi_size = len;
 	bip_set_seed(bip, bio->bi_iter.bi_sector);
 
 	if (bi->flags & BLK_INTEGRITY_IP_CHECKSUM)
@@ -258,7 +302,6 @@  bool bio_integrity_prep(struct bio *bio)
 	/* Map it */
 	offset = offset_in_page(buf);
 	for (i = 0 ; i < nr_pages ; i++) {
-		int ret;
 		bytes = PAGE_SIZE - offset;
 
 		if (len <= 0)
@@ -267,18 +310,13 @@  bool bio_integrity_prep(struct bio *bio)
 		if (bytes > len)
 			bytes = len;
 
-		ret = bio_integrity_add_page(bio, virt_to_page(buf),
-					     bytes, offset);
-
-		if (ret == 0) {
+		if (bio_integrity_add_page(bio, virt_to_page(buf),
+					   bytes, offset) < bytes) {
 			printk(KERN_ERR "could not attach integrity payload\n");
 			status = BLK_STS_RESOURCE;
 			goto err_end_io;
 		}
 
-		if (ret < bytes)
-			break;
-
 		buf += bytes;
 		len -= bytes;
 		offset = 0;
diff --git a/block/bio.c b/block/bio.c
index 1be17dea603a..45af9e39acff 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -926,22 +926,23 @@  static inline bool page_is_mergeable(const struct bio_vec *bv,
 	return (bv->bv_page + bv_end / PAGE_SIZE) == (page + off / PAGE_SIZE);
 }
 
-static bool __bio_try_merge_page(struct bio *bio, struct page *page,
-				 unsigned int len, unsigned int off,
-				 bool *same_page)
+static bool __bio_try_merge_page(struct page_merge_ctx *pmc)
 {
-	struct bio_vec *bv = &bio->bi_io_vec[bio->bi_vcnt - 1];
+	struct bio_vec *bv = pmc->bv;
+	struct bvec_iter *bi_iter = pmc->bi_iter;
+	unsigned int len = pmc->len;
+	bool *same_page = pmc->same_page;
 
-	if (!page_is_mergeable(bv, page, len, off, same_page))
+	if (!page_is_mergeable(bv, pmc->page, len, pmc->offset, same_page))
 		return false;
 
-	if (bio->bi_iter.bi_size > UINT_MAX - len) {
+	if (bi_iter->bi_size > UINT_MAX - len) {
 		*same_page = false;
 		return false;
 	}
 
 	bv->bv_len += len;
-	bio->bi_iter.bi_size += len;
+	bi_iter->bi_size += len;
 
 	return true;
 }
@@ -966,13 +967,23 @@  static bool bio_try_merge_page(struct bio *bio, struct page *page,
 			       unsigned int len, unsigned int off,
 			       bool *same_page)
 {
+	struct page_merge_ctx pmc;
+
 	if (WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED)))
 		return false;
 
 	if (!bio->bi_vcnt)
 		return false;
 
-	return __bio_try_merge_page(bio, page, len, off, same_page);
+	pmc.bv = &bio->bi_io_vec[bio->bi_vcnt - 1];
+	pmc.bi_vcnt = bio->bi_vcnt;
+	pmc.bi_iter = &bio->bi_iter;
+	pmc.page = page;
+	pmc.len = len;
+	pmc.offset = off;
+	pmc.same_page = same_page;
+
+	return __bio_try_merge_page(&pmc);
 }
 
 /*
@@ -980,20 +991,19 @@  static bool bio_try_merge_page(struct bio *bio, struct page *page,
  * size limit.  This is not for normal read/write bios, but for passthrough
  * or Zone Append operations that we can't split.
  */
-static bool bio_try_merge_hw_seg(struct request_queue *q, struct bio *bio,
-				 struct page *page, unsigned len,
-				 unsigned offset, bool *same_page)
+bool bio_try_merge_hw_seg(struct request_queue *q, struct page_merge_ctx *pmc)
 {
-	struct bio_vec *bv = &bio->bi_io_vec[bio->bi_vcnt - 1];
 	unsigned long mask = queue_segment_boundary(q);
+	struct bio_vec *bv = pmc->bv;
+	unsigned int len = pmc->len;
 	phys_addr_t addr1 = page_to_phys(bv->bv_page) + bv->bv_offset;
-	phys_addr_t addr2 = page_to_phys(page) + offset + len - 1;
+	phys_addr_t addr2 = page_to_phys(pmc->page) + pmc->offset + len - 1;
 
 	if ((addr1 | mask) != (addr2 | mask))
 		return false;
 	if (bv->bv_len + len > queue_max_segment_size(q))
 		return false;
-	return __bio_try_merge_page(bio, page, len, offset, same_page);
+	return __bio_try_merge_page(pmc);
 }
 
 /**
@@ -1013,8 +1023,6 @@  int bio_add_hw_page(struct request_queue *q, struct bio *bio,
 		struct page *page, unsigned int len, unsigned int offset,
 		unsigned int max_sectors, bool *same_page)
 {
-	struct bio_vec *bvec;
-
 	if (WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED)))
 		return 0;
 
@@ -1022,15 +1030,25 @@  int bio_add_hw_page(struct request_queue *q, struct bio *bio,
 		return 0;
 
 	if (bio->bi_vcnt > 0) {
-		if (bio_try_merge_hw_seg(q, bio, page, len, offset, same_page))
+		struct bio_vec *bv = &bio->bi_io_vec[bio->bi_vcnt - 1];
+		struct page_merge_ctx pmc = {
+			.bv = bv,
+			.bi_vcnt = bio->bi_vcnt,
+			.bi_iter = &bio->bi_iter,
+			.page = page,
+			.len = len,
+			.offset = offset,
+			.same_page = same_page,
+		};
+
+		if (bio_try_merge_hw_seg(q, &pmc))
 			return len;
 
 		/*
 		 * If the queue doesn't support SG gaps and adding this segment
 		 * would create a gap, disallow it.
 		 */
-		bvec = &bio->bi_io_vec[bio->bi_vcnt - 1];
-		if (bvec_gap_to_prev(&q->limits, bvec, offset))
+		if (bvec_gap_to_prev(&q->limits, bv, offset))
 			return 0;
 	}
 
diff --git a/block/blk.h b/block/blk.h
index 45547bcf1119..dd7cbb57ce43 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -118,6 +118,19 @@  static inline bool bvec_gap_to_prev(const struct queue_limits *lim,
 	return __bvec_gap_to_prev(lim, bprv, offset);
 }
 
+/* page merge context */
+struct page_merge_ctx {
+	struct bio_vec *bv;		/* bvec where @page will be merged */
+	unsigned short bi_vcnt;		/* how many bio_vec's */
+	struct bvec_iter *bi_iter;	/* actual i/o information on device */
+	struct page *page;		/* start page to add */
+	unsigned int len;		/* length of the data to add */
+	unsigned int offset;		/* offset of the data relative to @page */
+	bool *same_page;		/* return if the segment has been merged inside the same page*/
+};
+
+bool bio_try_merge_hw_seg(struct request_queue *q, struct page_merge_ctx *pmc);
+
 static inline bool rq_mergeable(struct request *rq)
 {
 	if (blk_rq_is_passthrough(rq))