diff mbox series

[v2,1/4] blk-mq: release crypto keyslot before reporting I/O complete

Message ID 20230308193645.114069-2-ebiggers@kernel.org (mailing list archive)
State Superseded
Headers show
Series Fix blk-crypto keyslot race condition | expand

Commit Message

Eric Biggers March 8, 2023, 7:36 p.m. UTC
From: Eric Biggers <ebiggers@google.com>

Once all I/O using a blk_crypto_key has completed, filesystems can call
blk_crypto_evict_key().  However, the block layer currently doesn't call
blk_crypto_put_keyslot() until the request is being freed, which happens
after upper layers have been told (via bio_endio()) the I/O has
completed.  This causes a race condition where blk_crypto_evict_key()
can see 'slot_refs != 0' without there being an actual bug.

This makes __blk_crypto_evict_key() hit the
'WARN_ON_ONCE(atomic_read(&slot->slot_refs) != 0)' and return without
doing anything, eventually causing a use-after-free in
blk_crypto_reprogram_all_keys().  (This is a very rare bug and has only
been seen when per-file keys are being used with fscrypt.)

There are two options to fix this: either release the keyslot before
bio_endio() is called on the request's last bio, or make
__blk_crypto_evict_key() ignore slot_refs.  Let's go with the first
solution, since it preserves the ability to report bugs (via
WARN_ON_ONCE) where a key is evicted while still in-use.

Fixes: a892c8d52c02 ("block: Inline encryption support for blk-mq")
Cc: stable@vger.kernel.org
Signed-off-by: Eric Biggers <ebiggers@google.com>
---
 block/blk-crypto-internal.h | 25 +++++++++++++++++++++----
 block/blk-crypto.c          | 24 ++++++++++++------------
 block/blk-merge.c           |  2 ++
 block/blk-mq.c              | 15 ++++++++++++++-
 4 files changed, 49 insertions(+), 17 deletions(-)

Comments

Nathan Huckleberry March 13, 2023, 9:26 p.m. UTC | #1
On Wed, Mar 8, 2023 at 11:39 AM Eric Biggers <ebiggers@kernel.org> wrote:
>
> From: Eric Biggers <ebiggers@google.com>
>
> Once all I/O using a blk_crypto_key has completed, filesystems can call
> blk_crypto_evict_key().  However, the block layer currently doesn't call
> blk_crypto_put_keyslot() until the request is being freed, which happens
> after upper layers have been told (via bio_endio()) the I/O has
> completed.  This causes a race condition where blk_crypto_evict_key()
> can see 'slot_refs != 0' without there being an actual bug.
>
> This makes __blk_crypto_evict_key() hit the
> 'WARN_ON_ONCE(atomic_read(&slot->slot_refs) != 0)' and return without
> doing anything, eventually causing a use-after-free in
> blk_crypto_reprogram_all_keys().  (This is a very rare bug and has only
> been seen when per-file keys are being used with fscrypt.)
>
> There are two options to fix this: either release the keyslot before
> bio_endio() is called on the request's last bio, or make
> __blk_crypto_evict_key() ignore slot_refs.  Let's go with the first
> solution, since it preserves the ability to report bugs (via
> WARN_ON_ONCE) where a key is evicted while still in-use.
>
> Fixes: a892c8d52c02 ("block: Inline encryption support for blk-mq")
> Cc: stable@vger.kernel.org
> Signed-off-by: Eric Biggers <ebiggers@google.com>
> ---
>  block/blk-crypto-internal.h | 25 +++++++++++++++++++++----
>  block/blk-crypto.c          | 24 ++++++++++++------------
>  block/blk-merge.c           |  2 ++
>  block/blk-mq.c              | 15 ++++++++++++++-
>  4 files changed, 49 insertions(+), 17 deletions(-)
>
> diff --git a/block/blk-crypto-internal.h b/block/blk-crypto-internal.h
> index a8cdaf26851e..4f1de2495f0c 100644
> --- a/block/blk-crypto-internal.h
> +++ b/block/blk-crypto-internal.h
> @@ -65,6 +65,11 @@ static inline bool blk_crypto_rq_is_encrypted(struct request *rq)
>         return rq->crypt_ctx;
>  }
>
> +static inline bool blk_crypto_rq_has_keyslot(struct request *rq)
> +{
> +       return rq->crypt_keyslot;
> +}
> +
>  blk_status_t blk_crypto_get_keyslot(struct blk_crypto_profile *profile,
>                                     const struct blk_crypto_key *key,
>                                     struct blk_crypto_keyslot **slot_ptr);
> @@ -119,6 +124,11 @@ static inline bool blk_crypto_rq_is_encrypted(struct request *rq)
>         return false;
>  }
>
> +static inline bool blk_crypto_rq_has_keyslot(struct request *rq)
> +{
> +       return false;
> +}
> +
>  #endif /* CONFIG_BLK_INLINE_ENCRYPTION */
>
>  void __bio_crypt_advance(struct bio *bio, unsigned int bytes);
> @@ -153,14 +163,21 @@ static inline bool blk_crypto_bio_prep(struct bio **bio_ptr)
>         return true;
>  }
>
> -blk_status_t __blk_crypto_init_request(struct request *rq);
> -static inline blk_status_t blk_crypto_init_request(struct request *rq)
> +blk_status_t __blk_crypto_rq_get_keyslot(struct request *rq);
> +static inline blk_status_t blk_crypto_rq_get_keyslot(struct request *rq)
>  {
>         if (blk_crypto_rq_is_encrypted(rq))
> -               return __blk_crypto_init_request(rq);
> +               return __blk_crypto_rq_get_keyslot(rq);
>         return BLK_STS_OK;
>  }
>
> +void __blk_crypto_rq_put_keyslot(struct request *rq);
> +static inline void blk_crypto_rq_put_keyslot(struct request *rq)
> +{
> +       if (blk_crypto_rq_has_keyslot(rq))
> +               __blk_crypto_rq_put_keyslot(rq);
> +}
> +
>  void __blk_crypto_free_request(struct request *rq);
>  static inline void blk_crypto_free_request(struct request *rq)
>  {
> @@ -199,7 +216,7 @@ static inline blk_status_t blk_crypto_insert_cloned_request(struct request *rq)
>  {
>
>         if (blk_crypto_rq_is_encrypted(rq))
> -               return blk_crypto_init_request(rq);
> +               return blk_crypto_rq_get_keyslot(rq);
>         return BLK_STS_OK;
>  }
>
> diff --git a/block/blk-crypto.c b/block/blk-crypto.c
> index 45378586151f..d0c7feb447e9 100644
> --- a/block/blk-crypto.c
> +++ b/block/blk-crypto.c
> @@ -224,27 +224,27 @@ static bool bio_crypt_check_alignment(struct bio *bio)
>         return true;
>  }
>
> -blk_status_t __blk_crypto_init_request(struct request *rq)
> +blk_status_t __blk_crypto_rq_get_keyslot(struct request *rq)
>  {
>         return blk_crypto_get_keyslot(rq->q->crypto_profile,
>                                       rq->crypt_ctx->bc_key,
>                                       &rq->crypt_keyslot);
>  }
>
> -/**
> - * __blk_crypto_free_request - Uninitialize the crypto fields of a request.
> - *
> - * @rq: The request whose crypto fields to uninitialize.
> - *
> - * Completely uninitializes the crypto fields of a request. If a keyslot has
> - * been programmed into some inline encryption hardware, that keyslot is
> - * released. The rq->crypt_ctx is also freed.
> - */
> -void __blk_crypto_free_request(struct request *rq)
> +void __blk_crypto_rq_put_keyslot(struct request *rq)
>  {
>         blk_crypto_put_keyslot(rq->crypt_keyslot);
> +       rq->crypt_keyslot = NULL;
> +}
> +
> +void __blk_crypto_free_request(struct request *rq)
> +{
> +       /* The keyslot, if one was needed, should have been released earlier. */
> +       if (WARN_ON_ONCE(rq->crypt_keyslot))
> +               __blk_crypto_rq_put_keyslot(rq);
> +
>         mempool_free(rq->crypt_ctx, bio_crypt_ctx_pool);
> -       blk_crypto_rq_set_defaults(rq);
> +       rq->crypt_ctx = NULL;
>  }
>
>  /**
> diff --git a/block/blk-merge.c b/block/blk-merge.c
> index 6460abdb2426..65e75efa9bd3 100644
> --- a/block/blk-merge.c
> +++ b/block/blk-merge.c
> @@ -867,6 +867,8 @@ static struct request *attempt_merge(struct request_queue *q,
>         if (!blk_discard_mergable(req))
>                 elv_merge_requests(q, req, next);
>
> +       blk_crypto_rq_put_keyslot(next);
> +

This looks good to me, but it looks like there was a pre-existing bug
in the blk-merge code. The elv_merged_request function is only called
when the request does not merge. Does anyone know if that behavior is
correct?

>         /*
>          * 'next' is going away, so update stats accordingly
>          */
> diff --git a/block/blk-mq.c b/block/blk-mq.c
> index d0cb2ef18fe2..49825538d932 100644
> --- a/block/blk-mq.c
> +++ b/block/blk-mq.c
> @@ -840,6 +840,12 @@ static void blk_complete_request(struct request *req)
>                 req->q->integrity.profile->complete_fn(req, total_bytes);
>  #endif
>
> +       /*
> +        * Upper layers may call blk_crypto_evict_key() anytime after the last
> +        * bio_endio().  Therefore, the keyslot must be released before that.
> +        */
> +       blk_crypto_rq_put_keyslot(req);
> +
>         blk_account_io_completion(req, total_bytes);
>
>         do {
> @@ -905,6 +911,13 @@ bool blk_update_request(struct request *req, blk_status_t error,
>                 req->q->integrity.profile->complete_fn(req, nr_bytes);
>  #endif
>
> +       /*
> +        * Upper layers may call blk_crypto_evict_key() anytime after the last
> +        * bio_endio().  Therefore, the keyslot must be released before that.
> +        */
> +       if (blk_crypto_rq_has_keyslot(req) && nr_bytes >= blk_rq_bytes(req))
> +               __blk_crypto_rq_put_keyslot(req);
> +
>         if (unlikely(error && !blk_rq_is_passthrough(req) &&
>                      !(req->rq_flags & RQF_QUIET)) &&
>                      !test_bit(GD_DEAD, &req->q->disk->state)) {
> @@ -2967,7 +2980,7 @@ void blk_mq_submit_bio(struct bio *bio)
>
>         blk_mq_bio_to_request(rq, bio, nr_segs);
>
> -       ret = blk_crypto_init_request(rq);
> +       ret = blk_crypto_rq_get_keyslot(rq);
>         if (ret != BLK_STS_OK) {
>                 bio->bi_status = ret;
>                 bio_endio(bio);
> --
> 2.39.2
>

This patch itself looks good to me.

Reviewed-by: Nathan Huckleberry <nhuck@google.com>
Eric Biggers March 14, 2023, 6:20 p.m. UTC | #2
On Mon, Mar 13, 2023 at 02:26:00PM -0700, Nathan Huckleberry wrote:
> > diff --git a/block/blk-merge.c b/block/blk-merge.c
> > index 6460abdb2426..65e75efa9bd3 100644
> > --- a/block/blk-merge.c
> > +++ b/block/blk-merge.c
> > @@ -867,6 +867,8 @@ static struct request *attempt_merge(struct request_queue *q,
> >         if (!blk_discard_mergable(req))
> >                 elv_merge_requests(q, req, next);
> >
> > +       blk_crypto_rq_put_keyslot(next);
> > +
> 
> This looks good to me, but it looks like there was a pre-existing bug
> in the blk-merge code. The elv_merged_request function is only called
> when the request does not merge. Does anyone know if that behavior is
> correct?

That's very confusing to me too!

I did notice that attempt_merge() calls elv_merge_requests() (not to be confused
with elv_merged_request()) if it merges the requests.

So it seems there is elv_merge_requests() which means the request was merged,
and elv_merged_request() which means the request was *not* merged...  I have no
idea what is going on there :-(
	
> This patch itself looks good to me.
> 
> Reviewed-by: Nathan Huckleberry <nhuck@google.com>

Thanks.

Jens, Christoph, etc., anyone else want to take a look too?

- Eric
Christoph Hellwig March 15, 2023, 4:18 p.m. UTC | #3
On Tue, Mar 14, 2023 at 11:20:14AM -0700, Eric Biggers wrote:
> I did notice that attempt_merge() calls elv_merge_requests() (not to be confused
> with elv_merged_request()) if it merges the requests.
> 
> So it seems there is elv_merge_requests() which means the request was merged,
> and elv_merged_request() which means the request was *not* merged...  I have no
> idea what is going on there :-(

The naming looks very confusing, but that is indeed what the code
does.  The elevator code is in massive need of a cleanup, as it often
is that confusing.
Christoph Hellwig March 15, 2023, 4:19 p.m. UTC | #4
Looks good:

Reviewed-by: Christoph Hellwig <hch@lst.de>
diff mbox series

Patch

diff --git a/block/blk-crypto-internal.h b/block/blk-crypto-internal.h
index a8cdaf26851e..4f1de2495f0c 100644
--- a/block/blk-crypto-internal.h
+++ b/block/blk-crypto-internal.h
@@ -65,6 +65,11 @@  static inline bool blk_crypto_rq_is_encrypted(struct request *rq)
 	return rq->crypt_ctx;
 }
 
+static inline bool blk_crypto_rq_has_keyslot(struct request *rq)
+{
+	return rq->crypt_keyslot;
+}
+
 blk_status_t blk_crypto_get_keyslot(struct blk_crypto_profile *profile,
 				    const struct blk_crypto_key *key,
 				    struct blk_crypto_keyslot **slot_ptr);
@@ -119,6 +124,11 @@  static inline bool blk_crypto_rq_is_encrypted(struct request *rq)
 	return false;
 }
 
+static inline bool blk_crypto_rq_has_keyslot(struct request *rq)
+{
+	return false;
+}
+
 #endif /* CONFIG_BLK_INLINE_ENCRYPTION */
 
 void __bio_crypt_advance(struct bio *bio, unsigned int bytes);
@@ -153,14 +163,21 @@  static inline bool blk_crypto_bio_prep(struct bio **bio_ptr)
 	return true;
 }
 
-blk_status_t __blk_crypto_init_request(struct request *rq);
-static inline blk_status_t blk_crypto_init_request(struct request *rq)
+blk_status_t __blk_crypto_rq_get_keyslot(struct request *rq);
+static inline blk_status_t blk_crypto_rq_get_keyslot(struct request *rq)
 {
 	if (blk_crypto_rq_is_encrypted(rq))
-		return __blk_crypto_init_request(rq);
+		return __blk_crypto_rq_get_keyslot(rq);
 	return BLK_STS_OK;
 }
 
+void __blk_crypto_rq_put_keyslot(struct request *rq);
+static inline void blk_crypto_rq_put_keyslot(struct request *rq)
+{
+	if (blk_crypto_rq_has_keyslot(rq))
+		__blk_crypto_rq_put_keyslot(rq);
+}
+
 void __blk_crypto_free_request(struct request *rq);
 static inline void blk_crypto_free_request(struct request *rq)
 {
@@ -199,7 +216,7 @@  static inline blk_status_t blk_crypto_insert_cloned_request(struct request *rq)
 {
 
 	if (blk_crypto_rq_is_encrypted(rq))
-		return blk_crypto_init_request(rq);
+		return blk_crypto_rq_get_keyslot(rq);
 	return BLK_STS_OK;
 }
 
diff --git a/block/blk-crypto.c b/block/blk-crypto.c
index 45378586151f..d0c7feb447e9 100644
--- a/block/blk-crypto.c
+++ b/block/blk-crypto.c
@@ -224,27 +224,27 @@  static bool bio_crypt_check_alignment(struct bio *bio)
 	return true;
 }
 
-blk_status_t __blk_crypto_init_request(struct request *rq)
+blk_status_t __blk_crypto_rq_get_keyslot(struct request *rq)
 {
 	return blk_crypto_get_keyslot(rq->q->crypto_profile,
 				      rq->crypt_ctx->bc_key,
 				      &rq->crypt_keyslot);
 }
 
-/**
- * __blk_crypto_free_request - Uninitialize the crypto fields of a request.
- *
- * @rq: The request whose crypto fields to uninitialize.
- *
- * Completely uninitializes the crypto fields of a request. If a keyslot has
- * been programmed into some inline encryption hardware, that keyslot is
- * released. The rq->crypt_ctx is also freed.
- */
-void __blk_crypto_free_request(struct request *rq)
+void __blk_crypto_rq_put_keyslot(struct request *rq)
 {
 	blk_crypto_put_keyslot(rq->crypt_keyslot);
+	rq->crypt_keyslot = NULL;
+}
+
+void __blk_crypto_free_request(struct request *rq)
+{
+	/* The keyslot, if one was needed, should have been released earlier. */
+	if (WARN_ON_ONCE(rq->crypt_keyslot))
+		__blk_crypto_rq_put_keyslot(rq);
+
 	mempool_free(rq->crypt_ctx, bio_crypt_ctx_pool);
-	blk_crypto_rq_set_defaults(rq);
+	rq->crypt_ctx = NULL;
 }
 
 /**
diff --git a/block/blk-merge.c b/block/blk-merge.c
index 6460abdb2426..65e75efa9bd3 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -867,6 +867,8 @@  static struct request *attempt_merge(struct request_queue *q,
 	if (!blk_discard_mergable(req))
 		elv_merge_requests(q, req, next);
 
+	blk_crypto_rq_put_keyslot(next);
+
 	/*
 	 * 'next' is going away, so update stats accordingly
 	 */
diff --git a/block/blk-mq.c b/block/blk-mq.c
index d0cb2ef18fe2..49825538d932 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -840,6 +840,12 @@  static void blk_complete_request(struct request *req)
 		req->q->integrity.profile->complete_fn(req, total_bytes);
 #endif
 
+	/*
+	 * Upper layers may call blk_crypto_evict_key() anytime after the last
+	 * bio_endio().  Therefore, the keyslot must be released before that.
+	 */
+	blk_crypto_rq_put_keyslot(req);
+
 	blk_account_io_completion(req, total_bytes);
 
 	do {
@@ -905,6 +911,13 @@  bool blk_update_request(struct request *req, blk_status_t error,
 		req->q->integrity.profile->complete_fn(req, nr_bytes);
 #endif
 
+	/*
+	 * Upper layers may call blk_crypto_evict_key() anytime after the last
+	 * bio_endio().  Therefore, the keyslot must be released before that.
+	 */
+	if (blk_crypto_rq_has_keyslot(req) && nr_bytes >= blk_rq_bytes(req))
+		__blk_crypto_rq_put_keyslot(req);
+
 	if (unlikely(error && !blk_rq_is_passthrough(req) &&
 		     !(req->rq_flags & RQF_QUIET)) &&
 		     !test_bit(GD_DEAD, &req->q->disk->state)) {
@@ -2967,7 +2980,7 @@  void blk_mq_submit_bio(struct bio *bio)
 
 	blk_mq_bio_to_request(rq, bio, nr_segs);
 
-	ret = blk_crypto_init_request(rq);
+	ret = blk_crypto_rq_get_keyslot(rq);
 	if (ret != BLK_STS_OK) {
 		bio->bi_status = ret;
 		bio_endio(bio);