diff mbox series

[4/5] block: don't free submitter owned integrity payload on I/O completion

Message ID 20240701050918.1244264-5-hch@lst.de (mailing list archive)
State New
Headers show
Series [1/5] block: split integrity support out of bio.h | expand

Commit Message

Christoph Hellwig July 1, 2024, 5:09 a.m. UTC
Currently __bio_integrity_endio frees the integrity payload unless it is
explicitly marked as user-mapped.  This means in-kernel callers that
allocate their own integrity payload never get to see it on I/O
completion.  The current two users don't need it as they just pre-mapped
PI tuples received over the network, but this limits uses of integrity
data lot.

Change bio_integrity_endio to call __bio_integrity_endio for block layer
generated integrity data only, and leave freeing of submitter
allocated integrity data to bio_uninit which also gets called from
the final bio_put.  This requires that unmapping user mapped or copied
integrity data is now always done by the caller, and the special
BIP_INTEGRITY_USER flag can go away.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 block/bio-integrity.c         | 57 ++++++++++++++---------------------
 block/blk.h                   | 13 ++++++--
 include/linux/bio-integrity.h |  3 +-
 3 files changed, 34 insertions(+), 39 deletions(-)

Comments

Kanchan Joshi July 1, 2024, 1 p.m. UTC | #1
On 7/1/2024 10:39 AM, Christoph Hellwig wrote:
> +/*
> + * Integrity payloads can either be owned by the submitter, in which case
> + * bio_uninit will free them, or owned and generated by the block layer,
> + * in which case we'll verify them here (for reads) and free them before
> + * the bio is handed back to the submitted.
> + */
> +bool __bio_integrity_endio(struct bio *bio);
>   static inline bool bio_integrity_endio(struct bio *bio)
>   {
> -	if (bio_integrity(bio))
> +	struct bio_integrity_payload *bip = bio_integrity(bio);
> +
> +	if (bip && (bip->bip_flags & BIP_BLOCK_INTEGRITY))
>   		return __bio_integrity_endio(bio);

The patch will cause regression for nvme-passthrough. For that 
completion order is:
(a) bio_endio()
(b) req->end_io
(c) blk_rq_unmap_user.

And current code ensures that integrity is freed explicitly only after 
(a) and (b).
With the patch, integrity will get freed during (a) itself.

There are two places in bio_endio() that can free the integrity.
It first calls bio_integrity_endio() - which is handled fine above.
But it also calls bio_uninit() - which will free the integrity. We don't 
want that to happen before passthrough gets the chance to unpin/copy-back.
Christoph Hellwig July 1, 2024, 3:17 p.m. UTC | #2
On Mon, Jul 01, 2024 at 06:30:34PM +0530, Kanchan Joshi wrote:
> The patch will cause regression for nvme-passthrough. For that 
> completion order is:
> (a) bio_endio()
> (b) req->end_io
> (c) blk_rq_unmap_user.
> 
> And current code ensures that integrity is freed explicitly only after 
> (a) and (b).
> With the patch, integrity will get freed during (a) itself.

It is supposed to be freed from (c), specifically from
blk_mq_map_bio_put.
> 
> There are two places in bio_endio() that can free the integrity.
> It first calls bio_integrity_endio() - which is handled fine above.
> But it also calls bio_uninit() - which will free the integrity. We don't 
> want that to happen before passthrough gets the chance to unpin/copy-back.

But yes, that messed it up.  I'm kinda curious why it didn't trip up
during my testing of the passthrough metadata code.  That bio_uninit
in bio_endio is quite bogus and I'm a bit suprised it hasn't caught
more errors - the reason why bio_uninit exists is specifically to deal
with those on-stack or embedded into bigger structure bios.
Anuj gupta July 2, 2024, 12:42 p.m. UTC | #3
On Mon, Jul 1, 2024 at 8:50 PM Christoph Hellwig <hch@lst.de> wrote:
>
> On Mon, Jul 01, 2024 at 06:30:34PM +0530, Kanchan Joshi wrote:
> > The patch will cause regression for nvme-passthrough. For that
> > completion order is:
> > (a) bio_endio()
> > (b) req->end_io
> > (c) blk_rq_unmap_user.
> >
> > And current code ensures that integrity is freed explicitly only after
> > (a) and (b).
> > With the patch, integrity will get freed during (a) itself.
>
> It is supposed to be freed from (c), specifically from
> blk_mq_map_bio_put.
> >
> > There are two places in bio_endio() that can free the integrity.
> > It first calls bio_integrity_endio() - which is handled fine above.
> > But it also calls bio_uninit() - which will free the integrity. We don't
> > want that to happen before passthrough gets the chance to unpin/copy-back.
>
> But yes, that messed it up.  I'm kinda curious why it didn't trip up
> during my testing of the passthrough metadata code.

Yeah, it didn't trigger any errors in my tests either. It's just that
integrity vecs
were not unpinned during completion.
diff mbox series

Patch

diff --git a/block/bio-integrity.c b/block/bio-integrity.c
index c8757d47e0ef62..4aa836d603fb23 100644
--- a/block/bio-integrity.c
+++ b/block/bio-integrity.c
@@ -22,9 +22,17 @@  void blk_flush_integrity(void)
 	flush_workqueue(kintegrityd_wq);
 }
 
-static void __bio_integrity_free(struct bio_set *bs,
-				 struct bio_integrity_payload *bip)
+/**
+ * bio_integrity_free - Free bio integrity payload
+ * @bio:	bio containing bip to be freed
+ *
+ * Description: Free the integrity portion of a bio.
+ */
+void bio_integrity_free(struct bio *bio)
 {
+	struct bio_integrity_payload *bip = bio_integrity(bio);
+	struct bio_set *bs = bio->bi_pool;
+
 	if (bs && mempool_initialized(&bs->bio_integrity_pool)) {
 		if (bip->bip_vec)
 			bvec_free(&bs->bvec_integrity_pool, bip->bip_vec,
@@ -33,6 +41,8 @@  static void __bio_integrity_free(struct bio_set *bs,
 	} else {
 		kfree(bip);
 	}
+	bio->bi_integrity = NULL;
+	bio->bi_opf &= ~REQ_INTEGRITY;
 }
 
 /**
@@ -86,7 +96,10 @@  struct bio_integrity_payload *bio_integrity_alloc(struct bio *bio,
 
 	return bip;
 err:
-	__bio_integrity_free(bs, bip);
+	if (bs && mempool_initialized(&bs->bio_integrity_pool))
+		mempool_free(bip, &bs->bio_integrity_pool);
+	else
+		kfree(bip);
 	return ERR_PTR(-ENOMEM);
 }
 EXPORT_SYMBOL(bio_integrity_alloc);
@@ -132,28 +145,6 @@  static void bio_integrity_unmap_user(struct bio_integrity_payload *bip)
 	bio_integrity_unpin_bvec(bip->bip_vec, bip->bip_max_vcnt, dirty);
 }
 
-/**
- * bio_integrity_free - Free bio integrity payload
- * @bio:	bio containing bip to be freed
- *
- * Description: Used to free the integrity portion of a bio. Usually
- * called from bio_free().
- */
-void bio_integrity_free(struct bio *bio)
-{
-	struct bio_integrity_payload *bip = bio_integrity(bio);
-	struct bio_set *bs = bio->bi_pool;
-
-	if (bip->bip_flags & BIP_INTEGRITY_USER)
-		return;
-	if (bip->bip_flags & BIP_BLOCK_INTEGRITY)
-		kfree(bvec_virt(bip->bip_vec));
-
-	__bio_integrity_free(bs, bip);
-	bio->bi_integrity = NULL;
-	bio->bi_opf &= ~REQ_INTEGRITY;
-}
-
 /**
  * bio_integrity_unmap_free_user - Unmap and free bio user integrity payload
  * @bio:	bio containing bip to be unmapped and freed
@@ -165,14 +156,9 @@  void bio_integrity_free(struct bio *bio)
 void bio_integrity_unmap_free_user(struct bio *bio)
 {
 	struct bio_integrity_payload *bip = bio_integrity(bio);
-	struct bio_set *bs = bio->bi_pool;
 
-	if (WARN_ON_ONCE(!(bip->bip_flags & BIP_INTEGRITY_USER)))
-		return;
 	bio_integrity_unmap_user(bip);
-	__bio_integrity_free(bs, bip);
-	bio->bi_integrity = NULL;
-	bio->bi_opf &= ~REQ_INTEGRITY;
+	bio_integrity_free(bio);
 }
 
 /**
@@ -273,7 +259,7 @@  static int bio_integrity_copy_user(struct bio *bio, struct bio_vec *bvec,
 		goto free_bip;
 	}
 
-	bip->bip_flags |= BIP_INTEGRITY_USER | BIP_COPY_USER;
+	bip->bip_flags |= BIP_COPY_USER;
 	bip->bip_iter.bi_sector = seed;
 	bip->bip_vcnt = nr_vecs;
 	return 0;
@@ -294,7 +280,6 @@  static int bio_integrity_init_user(struct bio *bio, struct bio_vec *bvec,
 		return PTR_ERR(bip);
 
 	memcpy(bip->bip_vec, bvec, nr_vecs * sizeof(*bvec));
-	bip->bip_flags |= BIP_INTEGRITY_USER;
 	bip->bip_iter.bi_sector = seed;
 	bip->bip_iter.bi_size = len;
 	bip->bip_vcnt = nr_vecs;
@@ -502,6 +487,8 @@  static void bio_integrity_verify_fn(struct work_struct *work)
 	struct bio *bio = bip->bip_bio;
 
 	blk_integrity_verify(bio);
+
+	kfree(bvec_virt(bip->bip_vec));
 	bio_integrity_free(bio);
 	bio_endio(bio);
 }
@@ -522,13 +509,13 @@  bool __bio_integrity_endio(struct bio *bio)
 	struct blk_integrity *bi = blk_get_integrity(bio->bi_bdev->bd_disk);
 	struct bio_integrity_payload *bip = bio_integrity(bio);
 
-	if (bio_op(bio) == REQ_OP_READ && !bio->bi_status &&
-	    (bip->bip_flags & BIP_BLOCK_INTEGRITY) && bi->csum_type) {
+	if (bio_op(bio) == REQ_OP_READ && !bio->bi_status && bi->csum_type) {
 		INIT_WORK(&bip->bip_work, bio_integrity_verify_fn);
 		queue_work(kintegrityd_wq, &bip->bip_work);
 		return false;
 	}
 
+	kfree(bvec_virt(bip->bip_vec));
 	bio_integrity_free(bio);
 	return true;
 }
diff --git a/block/blk.h b/block/blk.h
index 401e604f35d2cf..2233dc8d36b82a 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -202,11 +202,20 @@  static inline unsigned int blk_queue_get_max_sectors(struct request *rq)
 
 #ifdef CONFIG_BLK_DEV_INTEGRITY
 void blk_flush_integrity(void);
-bool __bio_integrity_endio(struct bio *);
 void bio_integrity_free(struct bio *bio);
+
+/*
+ * Integrity payloads can either be owned by the submitter, in which case
+ * bio_uninit will free them, or owned and generated by the block layer,
+ * in which case we'll verify them here (for reads) and free them before
+ * the bio is handed back to the submitted.
+ */
+bool __bio_integrity_endio(struct bio *bio);
 static inline bool bio_integrity_endio(struct bio *bio)
 {
-	if (bio_integrity(bio))
+	struct bio_integrity_payload *bip = bio_integrity(bio);
+
+	if (bip && (bip->bip_flags & BIP_BLOCK_INTEGRITY))
 		return __bio_integrity_endio(bio);
 	return true;
 }
diff --git a/include/linux/bio-integrity.h b/include/linux/bio-integrity.h
index cac24dac06fff0..3823d9be0d0790 100644
--- a/include/linux/bio-integrity.h
+++ b/include/linux/bio-integrity.h
@@ -10,8 +10,7 @@  enum bip_flags {
 	BIP_CTRL_NOCHECK	= 1 << 2, /* disable HBA integrity checking */
 	BIP_DISK_NOCHECK	= 1 << 3, /* disable disk integrity checking */
 	BIP_IP_CHECKSUM		= 1 << 4, /* IP checksum */
-	BIP_INTEGRITY_USER	= 1 << 5, /* Integrity payload is user address */
-	BIP_COPY_USER		= 1 << 6, /* Kernel bounce buffer in use */
+	BIP_COPY_USER		= 1 << 5, /* Kernel bounce buffer in use */
 };
 
 struct bio_integrity_payload {