diff mbox series

[v6,2/9] block: Add encryption context to struct bio

Message ID 20191218145136.172774-3-satyat@google.com (mailing list archive)
State New, archived
Headers show
Series Inline Encryption Support | expand

Commit Message

Satya Tangirala Dec. 18, 2019, 2:51 p.m. UTC
We must have some way of letting a storage device driver know what
encryption context it should use for en/decrypting a request. However,
it's the filesystem/fscrypt that knows about and manages encryption
contexts. As such, when the filesystem layer submits a bio to the block
layer, and this bio eventually reaches a device driver with support for
inline encryption, the device driver will need to have been told the
encryption context for that bio.

We want to communicate the encryption context from the filesystem layer
to the storage device along with the bio, when the bio is submitted to the
block layer. To do this, we add a struct bio_crypt_ctx to struct bio, which
can represent an encryption context (note that we can't use the bi_private
field in struct bio to do this because that field does not function to pass
information across layers in the storage stack). We also introduce various
functions to manipulate the bio_crypt_ctx and make the bio/request merging
logic aware of the bio_crypt_ctx.

Signed-off-by: Satya Tangirala <satyat@google.com>
---
 block/Makefile                |   2 +-
 block/bio-crypt-ctx.c         | 131 ++++++++++++++++++++++++++++++
 block/bio.c                   |  16 ++--
 block/blk-core.c              |   3 +
 block/blk-merge.c             |  11 +++
 block/bounce.c                |  12 ++-
 drivers/md/dm.c               |   3 +-
 include/linux/bio-crypt-ctx.h | 146 +++++++++++++++++++++++++++++++++-
 include/linux/blk_types.h     |   6 ++
 9 files changed, 312 insertions(+), 18 deletions(-)
 create mode 100644 block/bio-crypt-ctx.c

Comments

Eric Biggers Dec. 18, 2019, 9:10 p.m. UTC | #1
On Wed, Dec 18, 2019 at 06:51:29AM -0800, Satya Tangirala wrote:
> We must have some way of letting a storage device driver know what
> encryption context it should use for en/decrypting a request. However,
> it's the filesystem/fscrypt that knows about and manages encryption
> contexts. As such, when the filesystem layer submits a bio to the block
> layer, and this bio eventually reaches a device driver with support for
> inline encryption, the device driver will need to have been told the
> encryption context for that bio.
> 
> We want to communicate the encryption context from the filesystem layer
> to the storage device along with the bio, when the bio is submitted to the
> block layer. To do this, we add a struct bio_crypt_ctx to struct bio, which
> can represent an encryption context (note that we can't use the bi_private
> field in struct bio to do this because that field does not function to pass
> information across layers in the storage stack). We also introduce various
> functions to manipulate the bio_crypt_ctx and make the bio/request merging
> logic aware of the bio_crypt_ctx.

You might want to reword this to clarify that this could potentially be used by
any user of the block layer (e.g., device-mapper targets), not just filesystems
and not just fscrypt.  fscrypt is just the initial use case.

> diff --git a/block/bio-crypt-ctx.c b/block/bio-crypt-ctx.c
> new file mode 100644
> index 000000000000..dadf0da3c21b
> --- /dev/null
> +++ b/block/bio-crypt-ctx.c
> @@ -0,0 +1,131 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Copyright 2019 Google LLC
> + */
> +
> +#include <linux/bio.h>
> +#include <linux/blkdev.h>
> +#include <linux/keyslot-manager.h>
> +#include <linux/module.h>
> +#include <linux/slab.h>
> +
> +
> +static int num_prealloc_crypt_ctxs = 128;
> +
> +module_param(num_prealloc_crypt_ctxs, int, 0444);
> +MODULE_PARM_DESC(num_prealloc_crypt_ctxs,
> +		"Number of bio crypto contexts to preallocate");
> +
> +static struct kmem_cache *bio_crypt_ctx_cache;
> +static mempool_t *bio_crypt_ctx_pool;
> +
> +int __init bio_crypt_ctx_init(void)
> +{
> +	bio_crypt_ctx_cache = KMEM_CACHE(bio_crypt_ctx, 0);
> +	if (!bio_crypt_ctx_cache)
> +		return -ENOMEM;
> +
> +	bio_crypt_ctx_pool = mempool_create_slab_pool(num_prealloc_crypt_ctxs,
> +						      bio_crypt_ctx_cache);
> +	if (!bio_crypt_ctx_pool)
> +		return -ENOMEM;
> +
> +	/* This is assumed in various places. */
> +	BUILD_BUG_ON(BLK_ENCRYPTION_MODE_INVALID != 0);
> +
> +	return 0;
> +}
> +
> +struct bio_crypt_ctx *bio_crypt_alloc_ctx(gfp_t gfp_mask)
> +{
> +	return mempool_alloc(bio_crypt_ctx_pool, gfp_mask);
> +}
> +
> +void bio_crypt_free_ctx(struct bio *bio)
> +{
> +	mempool_free(bio->bi_crypt_context, bio_crypt_ctx_pool);
> +	bio->bi_crypt_context = NULL;
> +}
> +
> +void bio_crypt_clone(struct bio *dst, struct bio *src, gfp_t gfp_mask)
> +{
> +	const struct bio_crypt_ctx *src_bc = src->bi_crypt_context;
> +
> +	/*
> +	 * If a bio is swhandled, then it will be decrypted when bio_endio
> +	 * is called. As we only want the data to be decrypted once, copies
> +	 * of the bio must not have have a crypt context.
> +	 */

This comment belongs in the patch that adds the crypto API fallback, not in this
patch.

> +/*
> + * Checks that two bio crypt contexts are compatible - i.e. that
> + * they are mergeable except for data_unit_num continuity.
> + */
> +bool bio_crypt_ctx_compatible(struct bio *b_1, struct bio *b_2)
> +{
> +	struct bio_crypt_ctx *bc1 = b_1->bi_crypt_context;
> +	struct bio_crypt_ctx *bc2 = b_2->bi_crypt_context;
> +
> +	if (bc1 != bc2)
> +		return false;
> +
> +	return !bc1 || bc1->bc_key == bc2->bc_key;
> +}

As we've been discussing, this got broken because it now always returns false
when the bio_crypt_ctx's are different.  My suggestion is:

	if (!bc1)
		return !bc2;
	return bc2 && bc1->bc_key == bc2->bc_key;

> @@ -615,15 +617,12 @@ struct bio *bio_clone_fast(struct bio *bio, gfp_t gfp_mask, struct bio_set *bs)
>  
>  	__bio_clone_fast(b, bio);
>  
> -	if (bio_integrity(bio)) {
> -		int ret;
> -
> -		ret = bio_integrity_clone(b, bio, gfp_mask);
> +	bio_crypt_clone(b, bio, gfp_mask);
>  
> -		if (ret < 0) {
> -			bio_put(b);
> -			return NULL;
> -		}
> +	if (bio_integrity(bio) &&
> +	    bio_integrity_clone(b, bio, gfp_mask) < 0) {
> +		bio_put(b);
> +		return NULL;
>  	}

Now that bio_crypt_clone() can't fail, this patch probably shouldn't bother to
clean up the blk-integrity error handling.

> diff --git a/block/blk-merge.c b/block/blk-merge.c
> index d783bdc4559b..5e53aad97da9 100644
> --- a/block/blk-merge.c
> +++ b/block/blk-merge.c
> @@ -596,6 +596,8 @@ int ll_back_merge_fn(struct request *req, struct bio *bio, unsigned int nr_segs)
>  		req_set_nomerge(req->q, req);
>  		return 0;
>  	}
> +	if (!bio_crypt_ctx_mergeable(req->bio, blk_rq_bytes(req), bio))
> +		return 0;
>  
>  	return ll_new_hw_segment(req, bio, nr_segs);
>  }
> @@ -612,6 +614,8 @@ int ll_front_merge_fn(struct request *req, struct bio *bio, unsigned int nr_segs
>  		req_set_nomerge(req->q, req);
>  		return 0;
>  	}
> +	if (!bio_crypt_ctx_mergeable(bio, bio->bi_iter.bi_size, req->bio))
> +		return 0;
>  
>  	return ll_new_hw_segment(req, bio, nr_segs);
>  }
> @@ -656,6 +660,9 @@ static int ll_merge_requests_fn(struct request_queue *q, struct request *req,
>  	if (blk_integrity_merge_rq(q, req, next) == false)
>  		return 0;
>  
> +	if (!bio_crypt_ctx_mergeable(req->bio, blk_rq_bytes(req), next->bio))
> +		return 0;
> +
>  	/* Merge is OK... */
>  	req->nr_phys_segments = total_phys_segments;
>  	return 1;
> @@ -895,6 +902,10 @@ bool blk_rq_merge_ok(struct request *rq, struct bio *bio)
>  	if (rq->ioprio != bio_prio(bio))
>  		return false;
>  
> +	/* Only merge if the crypt contexts are compatible */
> +	if (!bio_crypt_ctx_compatible(bio, rq->bio))
> +		return false;
> +
>  	return true;
>  }

Thanks, this looks much better now.  This fixes the bug in v5 where the DUN
continuity check in blk_try_merge() was insufficient, as blk_try_merge() isn't
always called, but ll_front_merge_fn() and ll_back_merge_fn() are.

As an optional cleanup, could you also move the blk-crypto checks in
ll_back_merge_fn(), ll_front_merge_fn(), and blk_rq_merge_ok() to be immediately
below the corresponding blk-integrity checks?  blk-integrity and blk-crypto are
both optional block layer features which have constraints for front and back
merges, so they require merge checks in all the same places (except in one case
where blk-integrity requires an extra check).  That's partly how I found the bug
(which for the record, Satya found independently too) -- I was comparing the
blk-crypto checks to blk-integrity.

So having the blk-crypto and blk-integrity checks be immediately next to each
other would be helpful from a readability standpoint.

Could you also flip the argument order in the call to bio_crypt_ctx_compatible()
so that it matches the order for the other checks in blk_rq_merge_ok()?

None of this "actually matters", but IMO it's really important that we make
these merge checks as straightforward and auditable as possible, as bugs in them
cause data corruption, as we've seen.

>  
> diff --git a/block/bounce.c b/block/bounce.c
> index f8ed677a1bf7..aa57ccc6ced3 100644
> --- a/block/bounce.c
> +++ b/block/bounce.c
> @@ -267,14 +267,12 @@ static struct bio *bounce_clone_bio(struct bio *bio_src, gfp_t gfp_mask,
>  		break;
>  	}
>  
> -	if (bio_integrity(bio_src)) {
> -		int ret;
> +	bio_crypt_clone(bio, bio_src, gfp_mask);
>  
> -		ret = bio_integrity_clone(bio, bio_src, gfp_mask);
> -		if (ret < 0) {
> -			bio_put(bio);
> -			return NULL;
> -		}
> +	if (bio_integrity(bio_src) &&
> +	    bio_integrity_clone(bio, bio_src, gfp_mask) < 0) {
> +		bio_put(bio);
> +		return NULL;
>  	}
>  
>  	bio_clone_blkg_association(bio, bio_src);

Like in bio_clone_fast(): now that bio_crypt_clone() can't fail, this patch
probably shouldn't bother cleaning up the blk-integrity error handling.

> diff --git a/drivers/md/dm.c b/drivers/md/dm.c
> index e8f9661a10a1..783e0d5fd130 100644
> --- a/drivers/md/dm.c
> +++ b/drivers/md/dm.c
> @@ -1304,9 +1304,10 @@ static int clone_bio(struct dm_target_io *tio, struct bio *bio,
>  
>  	__bio_clone_fast(clone, bio);
>  
> +	bio_crypt_clone(clone, bio, GFP_NOIO);
> +
>  	if (bio_integrity(bio)) {
>  		int r;
> -

Unnecessary deleted line.

> diff --git a/include/linux/bio-crypt-ctx.h b/include/linux/bio-crypt-ctx.h
> index dd4ac9d95428..4535df0a6349 100644
> --- a/include/linux/bio-crypt-ctx.h
> +++ b/include/linux/bio-crypt-ctx.h
> @@ -8,7 +8,7 @@
>  enum blk_crypto_mode_num {
>  	BLK_ENCRYPTION_MODE_INVALID,
>  	BLK_ENCRYPTION_MODE_AES_256_XTS,
> -	BLK_ENCRYPTION_MODE_AES_128_CBC,
> +	BLK_ENCRYPTION_MODE_AES_128_CBC_ESSIV,

This should be folded into the patch which introduced blk_crypto_mode_num.

> +/**
> + * struct bio_crypt_ctx - an inline encryption context
> + * @bc_key: the key, algorithm, and data unit size to use
> + * @bc_keyslot: the keyslot that has been assigned for this key in @bc_ksm,
> + *		or -1 if no keyslot has been assigned yet.
> + * @bc_dun: the data unit number (starting IV) to use
> + * @bc_ksm: the keyslot manager into which the key has been programmed with
> + *	    @bc_keyslot, or NULL if this key hasn't yet been programmed.
> + *
> + * A bio_crypt_ctx specifies that the contents of the bio will be encrypted (for
> + * write requests) or decrypted (for read requests) inline by the storage device
> + * or controller, or by the crypto API fallback.
> + */
> +struct bio_crypt_ctx {
> +	const struct blk_crypto_key	*bc_key;
> +	int				bc_keyslot;
> +
> +	/* Data unit number */
> +	u64				bc_dun[BLK_CRYPTO_DUN_ARRAY_SIZE];
> +
> +	/*
> +	 * The keyslot manager where the key has been programmed
> +	 * with keyslot.
> +	 */
> +	struct keyslot_manager		*bc_ksm;
> +};

The two comments inside the struct definition are not needed now that there is a
kerneldoc comment above the struct which documents all the fields.

FWIW, I also think it would be slightly more logical to order the fields like:

struct bio_crypt_ctx {
	const struct blk_crypto_key	*bc_key;
	u64				bc_dun[BLK_CRYPTO_DUN_ARRAY_SIZE];
	struct keyslot_manager		*bc_ksm;
	int				bc_keyslot;
};

... because (key, dun) go together as they're what the user provides, while
(ksm, keyslot) go together as they're managed by the block layer.

> +static inline bool bio_crypt_dun_is_contiguous(const struct bio_crypt_ctx *bc,
> +					       unsigned int bytes,
> +					u64 next_dun[BLK_CRYPTO_DUN_ARRAY_SIZE])
> +{
> +	int i = 0;
> +	unsigned int inc = bytes >> bc->bc_key->data_unit_size_bits;
> +
> +	while (inc && i < BLK_CRYPTO_DUN_ARRAY_SIZE) {
> +		if (bc->bc_dun[i] + inc != next_dun[i])
> +			return false;
> +		inc = ((bc->bc_dun[i] + inc)  < inc);
> +		i++;
> +	}
> +
> +	return true;
> +}

This incorrectly returns true in some cases.  It needs to compare the whole DUN,
not stop as soon as 'inc' becomes 0.

I'm also always a bit nervious of code that checks for integer wraparound
without casting or assigning the result, due to the presence of integer
promotion in C...  That's partly why I had the 'u64 sum' variable in the version
I suggested.  But this specific case is fine because the type is u64.

- Eric
Darrick J. Wong Dec. 18, 2019, 9:21 p.m. UTC | #2
On Wed, Dec 18, 2019 at 06:51:29AM -0800, Satya Tangirala wrote:
> We must have some way of letting a storage device driver know what
> encryption context it should use for en/decrypting a request. However,
> it's the filesystem/fscrypt that knows about and manages encryption
> contexts. As such, when the filesystem layer submits a bio to the block
> layer, and this bio eventually reaches a device driver with support for
> inline encryption, the device driver will need to have been told the
> encryption context for that bio.
> 
> We want to communicate the encryption context from the filesystem layer
> to the storage device along with the bio, when the bio is submitted to the
> block layer. To do this, we add a struct bio_crypt_ctx to struct bio, which
> can represent an encryption context (note that we can't use the bi_private
> field in struct bio to do this because that field does not function to pass
> information across layers in the storage stack). We also introduce various
> functions to manipulate the bio_crypt_ctx and make the bio/request merging
> logic aware of the bio_crypt_ctx.
> 
> Signed-off-by: Satya Tangirala <satyat@google.com>
> ---
>  block/Makefile                |   2 +-
>  block/bio-crypt-ctx.c         | 131 ++++++++++++++++++++++++++++++
>  block/bio.c                   |  16 ++--
>  block/blk-core.c              |   3 +
>  block/blk-merge.c             |  11 +++
>  block/bounce.c                |  12 ++-
>  drivers/md/dm.c               |   3 +-
>  include/linux/bio-crypt-ctx.h | 146 +++++++++++++++++++++++++++++++++-
>  include/linux/blk_types.h     |   6 ++
>  9 files changed, 312 insertions(+), 18 deletions(-)
>  create mode 100644 block/bio-crypt-ctx.c
> 
> diff --git a/block/Makefile b/block/Makefile
> index 7c603669f216..79f2b8b3fc5d 100644
> --- a/block/Makefile
> +++ b/block/Makefile
> @@ -37,4 +37,4 @@ obj-$(CONFIG_BLK_DEBUG_FS)	+= blk-mq-debugfs.o
>  obj-$(CONFIG_BLK_DEBUG_FS_ZONED)+= blk-mq-debugfs-zoned.o
>  obj-$(CONFIG_BLK_SED_OPAL)	+= sed-opal.o
>  obj-$(CONFIG_BLK_PM)		+= blk-pm.o
> -obj-$(CONFIG_BLK_INLINE_ENCRYPTION)	+= keyslot-manager.o
> \ No newline at end of file
> +obj-$(CONFIG_BLK_INLINE_ENCRYPTION)	+= keyslot-manager.o bio-crypt-ctx.o
> \ No newline at end of file
> diff --git a/block/bio-crypt-ctx.c b/block/bio-crypt-ctx.c
> new file mode 100644
> index 000000000000..dadf0da3c21b
> --- /dev/null
> +++ b/block/bio-crypt-ctx.c
> @@ -0,0 +1,131 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Copyright 2019 Google LLC
> + */
> +
> +#include <linux/bio.h>
> +#include <linux/blkdev.h>
> +#include <linux/keyslot-manager.h>
> +#include <linux/module.h>
> +#include <linux/slab.h>
> +
> +
> +static int num_prealloc_crypt_ctxs = 128;
> +
> +module_param(num_prealloc_crypt_ctxs, int, 0444);
> +MODULE_PARM_DESC(num_prealloc_crypt_ctxs,
> +		"Number of bio crypto contexts to preallocate");
> +
> +static struct kmem_cache *bio_crypt_ctx_cache;
> +static mempool_t *bio_crypt_ctx_pool;
> +
> +int __init bio_crypt_ctx_init(void)
> +{
> +	bio_crypt_ctx_cache = KMEM_CACHE(bio_crypt_ctx, 0);
> +	if (!bio_crypt_ctx_cache)
> +		return -ENOMEM;
> +
> +	bio_crypt_ctx_pool = mempool_create_slab_pool(num_prealloc_crypt_ctxs,
> +						      bio_crypt_ctx_cache);
> +	if (!bio_crypt_ctx_pool)
> +		return -ENOMEM;
> +
> +	/* This is assumed in various places. */
> +	BUILD_BUG_ON(BLK_ENCRYPTION_MODE_INVALID != 0);
> +
> +	return 0;
> +}
> +
> +struct bio_crypt_ctx *bio_crypt_alloc_ctx(gfp_t gfp_mask)
> +{
> +	return mempool_alloc(bio_crypt_ctx_pool, gfp_mask);
> +}
> +
> +void bio_crypt_free_ctx(struct bio *bio)
> +{
> +	mempool_free(bio->bi_crypt_context, bio_crypt_ctx_pool);
> +	bio->bi_crypt_context = NULL;
> +}
> +
> +void bio_crypt_clone(struct bio *dst, struct bio *src, gfp_t gfp_mask)
> +{
> +	const struct bio_crypt_ctx *src_bc = src->bi_crypt_context;
> +
> +	/*
> +	 * If a bio is swhandled, then it will be decrypted when bio_endio
> +	 * is called. As we only want the data to be decrypted once, copies
> +	 * of the bio must not have have a crypt context.
> +	 */
> +	if (!src_bc)
> +		return;
> +
> +	dst->bi_crypt_context = bio_crypt_alloc_ctx(gfp_mask);
> +	*dst->bi_crypt_context = *src_bc;
> +
> +	if (src_bc->bc_keyslot >= 0)
> +		keyslot_manager_get_slot(src_bc->bc_ksm, src_bc->bc_keyslot);
> +}
> +EXPORT_SYMBOL_GPL(bio_crypt_clone);
> +
> +bool bio_crypt_should_process(struct request *rq)
> +{
> +	struct bio *bio = rq->bio;
> +
> +	if (!bio || !bio->bi_crypt_context)
> +		return false;
> +
> +	return rq->q->ksm == bio->bi_crypt_context->bc_ksm;
> +}
> +EXPORT_SYMBOL_GPL(bio_crypt_should_process);
> +
> +/*
> + * Checks that two bio crypt contexts are compatible - i.e. that
> + * they are mergeable except for data_unit_num continuity.
> + */
> +bool bio_crypt_ctx_compatible(struct bio *b_1, struct bio *b_2)
> +{
> +	struct bio_crypt_ctx *bc1 = b_1->bi_crypt_context;
> +	struct bio_crypt_ctx *bc2 = b_2->bi_crypt_context;
> +
> +	if (bc1 != bc2)
> +		return false;
> +
> +	return !bc1 || bc1->bc_key == bc2->bc_key;
> +}
> +
> +/*
> + * Checks that two bio crypt contexts are compatible, and also
> + * that their data_unit_nums are continuous (and can hence be merged)
> + * in the order b_1 followed by b_2.
> + */
> +bool bio_crypt_ctx_mergeable(struct bio *b_1, unsigned int b1_bytes,
> +			     struct bio *b_2)
> +{
> +	struct bio_crypt_ctx *bc1 = b_1->bi_crypt_context;
> +	struct bio_crypt_ctx *bc2 = b_2->bi_crypt_context;
> +
> +	if (!bio_crypt_ctx_compatible(b_1, b_2))
> +		return false;
> +
> +	return !bc1 || bio_crypt_dun_is_contiguous(bc1, b1_bytes, bc2->bc_dun);
> +}
> +
> +void bio_crypt_ctx_release_keyslot(struct bio_crypt_ctx *bc)
> +{
> +	keyslot_manager_put_slot(bc->bc_ksm, bc->bc_keyslot);
> +	bc->bc_ksm = NULL;
> +	bc->bc_keyslot = -1;
> +}
> +
> +int bio_crypt_ctx_acquire_keyslot(struct bio_crypt_ctx *bc,
> +				  struct keyslot_manager *ksm)
> +{
> +	int slot = keyslot_manager_get_slot_for_key(ksm, bc->bc_key);
> +
> +	if (slot < 0)
> +		return slot;
> +
> +	bc->bc_keyslot = slot;
> +	bc->bc_ksm = ksm;
> +	return 0;
> +}
> diff --git a/block/bio.c b/block/bio.c
> index a5d75f6bf4c7..c99e054d56ef 100644
> --- a/block/bio.c
> +++ b/block/bio.c
> @@ -236,6 +236,8 @@ void bio_uninit(struct bio *bio)
>  
>  	if (bio_integrity(bio))
>  		bio_integrity_free(bio);
> +
> +	bio_crypt_free_ctx(bio);
>  }
>  EXPORT_SYMBOL(bio_uninit);
>  
> @@ -615,15 +617,12 @@ struct bio *bio_clone_fast(struct bio *bio, gfp_t gfp_mask, struct bio_set *bs)
>  
>  	__bio_clone_fast(b, bio);
>  
> -	if (bio_integrity(bio)) {
> -		int ret;
> -
> -		ret = bio_integrity_clone(b, bio, gfp_mask);
> +	bio_crypt_clone(b, bio, gfp_mask);
>  
> -		if (ret < 0) {
> -			bio_put(b);
> -			return NULL;
> -		}
> +	if (bio_integrity(bio) &&
> +	    bio_integrity_clone(b, bio, gfp_mask) < 0) {
> +		bio_put(b);
> +		return NULL;
>  	}
>  
>  	return b;
> @@ -997,6 +996,7 @@ void bio_advance(struct bio *bio, unsigned bytes)
>  	if (bio_integrity(bio))
>  		bio_integrity_advance(bio, bytes);
>  
> +	bio_crypt_advance(bio, bytes);
>  	bio_advance_iter(bio, &bio->bi_iter, bytes);
>  }
>  EXPORT_SYMBOL(bio_advance);
> diff --git a/block/blk-core.c b/block/blk-core.c
> index e0a094fddee5..5200f4d1fed4 100644
> --- a/block/blk-core.c
> +++ b/block/blk-core.c
> @@ -1810,5 +1810,8 @@ int __init blk_dev_init(void)
>  	blk_debugfs_root = debugfs_create_dir("block", NULL);
>  #endif
>  
> +	if (bio_crypt_ctx_init() < 0)
> +		panic("Failed to allocate mem for bio crypt ctxs\n");
> +
>  	return 0;
>  }
> diff --git a/block/blk-merge.c b/block/blk-merge.c
> index d783bdc4559b..5e53aad97da9 100644
> --- a/block/blk-merge.c
> +++ b/block/blk-merge.c
> @@ -596,6 +596,8 @@ int ll_back_merge_fn(struct request *req, struct bio *bio, unsigned int nr_segs)
>  		req_set_nomerge(req->q, req);
>  		return 0;
>  	}
> +	if (!bio_crypt_ctx_mergeable(req->bio, blk_rq_bytes(req), bio))
> +		return 0;
>  
>  	return ll_new_hw_segment(req, bio, nr_segs);
>  }
> @@ -612,6 +614,8 @@ int ll_front_merge_fn(struct request *req, struct bio *bio, unsigned int nr_segs
>  		req_set_nomerge(req->q, req);
>  		return 0;
>  	}
> +	if (!bio_crypt_ctx_mergeable(bio, bio->bi_iter.bi_size, req->bio))
> +		return 0;
>  
>  	return ll_new_hw_segment(req, bio, nr_segs);
>  }
> @@ -656,6 +660,9 @@ static int ll_merge_requests_fn(struct request_queue *q, struct request *req,
>  	if (blk_integrity_merge_rq(q, req, next) == false)
>  		return 0;
>  
> +	if (!bio_crypt_ctx_mergeable(req->bio, blk_rq_bytes(req), next->bio))
> +		return 0;
> +
>  	/* Merge is OK... */
>  	req->nr_phys_segments = total_phys_segments;
>  	return 1;
> @@ -895,6 +902,10 @@ bool blk_rq_merge_ok(struct request *rq, struct bio *bio)
>  	if (rq->ioprio != bio_prio(bio))
>  		return false;
>  
> +	/* Only merge if the crypt contexts are compatible */
> +	if (!bio_crypt_ctx_compatible(bio, rq->bio))
> +		return false;
> +
>  	return true;
>  }
>  
> diff --git a/block/bounce.c b/block/bounce.c
> index f8ed677a1bf7..aa57ccc6ced3 100644
> --- a/block/bounce.c
> +++ b/block/bounce.c
> @@ -267,14 +267,12 @@ static struct bio *bounce_clone_bio(struct bio *bio_src, gfp_t gfp_mask,
>  		break;
>  	}
>  
> -	if (bio_integrity(bio_src)) {
> -		int ret;
> +	bio_crypt_clone(bio, bio_src, gfp_mask);
>  
> -		ret = bio_integrity_clone(bio, bio_src, gfp_mask);
> -		if (ret < 0) {
> -			bio_put(bio);
> -			return NULL;
> -		}
> +	if (bio_integrity(bio_src) &&
> +	    bio_integrity_clone(bio, bio_src, gfp_mask) < 0) {
> +		bio_put(bio);
> +		return NULL;
>  	}
>  
>  	bio_clone_blkg_association(bio, bio_src);
> diff --git a/drivers/md/dm.c b/drivers/md/dm.c
> index e8f9661a10a1..783e0d5fd130 100644
> --- a/drivers/md/dm.c
> +++ b/drivers/md/dm.c
> @@ -1304,9 +1304,10 @@ static int clone_bio(struct dm_target_io *tio, struct bio *bio,
>  
>  	__bio_clone_fast(clone, bio);
>  
> +	bio_crypt_clone(clone, bio, GFP_NOIO);
> +
>  	if (bio_integrity(bio)) {
>  		int r;
> -
>  		if (unlikely(!dm_target_has_integrity(tio->ti->type) &&
>  			     !dm_target_passes_integrity(tio->ti->type))) {
>  			DMWARN("%s: the target %s doesn't support integrity data.",
> diff --git a/include/linux/bio-crypt-ctx.h b/include/linux/bio-crypt-ctx.h
> index dd4ac9d95428..4535df0a6349 100644
> --- a/include/linux/bio-crypt-ctx.h
> +++ b/include/linux/bio-crypt-ctx.h
> @@ -8,7 +8,7 @@
>  enum blk_crypto_mode_num {
>  	BLK_ENCRYPTION_MODE_INVALID,
>  	BLK_ENCRYPTION_MODE_AES_256_XTS,
> -	BLK_ENCRYPTION_MODE_AES_128_CBC,
> +	BLK_ENCRYPTION_MODE_AES_128_CBC_ESSIV,
>  	BLK_ENCRYPTION_MODE_ADIANTUM,
>  	BLK_ENCRYPTION_MODE_MAX,
>  };
> @@ -44,6 +44,150 @@ struct blk_crypto_key {
>  	u8 raw[BLK_CRYPTO_MAX_KEY_SIZE];
>  };
>  
> +#define BLK_CRYPTO_MAX_IV_SIZE		32
> +#define BLK_CRYPTO_DUN_ARRAY_SIZE	(BLK_CRYPTO_MAX_IV_SIZE/sizeof(u64))
> +
> +/**
> + * struct bio_crypt_ctx - an inline encryption context
> + * @bc_key: the key, algorithm, and data unit size to use
> + * @bc_keyslot: the keyslot that has been assigned for this key in @bc_ksm,
> + *		or -1 if no keyslot has been assigned yet.
> + * @bc_dun: the data unit number (starting IV) to use
> + * @bc_ksm: the keyslot manager into which the key has been programmed with
> + *	    @bc_keyslot, or NULL if this key hasn't yet been programmed.
> + *
> + * A bio_crypt_ctx specifies that the contents of the bio will be encrypted (for
> + * write requests) or decrypted (for read requests) inline by the storage device
> + * or controller, or by the crypto API fallback.
> + */
> +struct bio_crypt_ctx {
> +	const struct blk_crypto_key	*bc_key;
> +	int				bc_keyslot;
> +
> +	/* Data unit number */
> +	u64				bc_dun[BLK_CRYPTO_DUN_ARRAY_SIZE];
> +
> +	/*
> +	 * The keyslot manager where the key has been programmed
> +	 * with keyslot.
> +	 */
> +	struct keyslot_manager		*bc_ksm;
> +};
> +
> +int bio_crypt_ctx_init(void);
> +
> +struct bio_crypt_ctx *bio_crypt_alloc_ctx(gfp_t gfp_mask);
> +
> +void bio_crypt_free_ctx(struct bio *bio);
> +
> +static inline bool bio_has_crypt_ctx(struct bio *bio)
> +{
> +	return bio->bi_crypt_context;
> +}
> +
> +void bio_crypt_clone(struct bio *dst, struct bio *src, gfp_t gfp_mask);
> +
> +static inline void bio_crypt_set_ctx(struct bio *bio,
> +				     const struct blk_crypto_key *key,
> +				     u64 dun[BLK_CRYPTO_DUN_ARRAY_SIZE],
> +				     gfp_t gfp_mask)
> +{
> +	struct bio_crypt_ctx *bc = bio_crypt_alloc_ctx(gfp_mask);
> +
> +	bc->bc_key = key;
> +	memcpy(bc->bc_dun, dun, sizeof(bc->bc_dun));
> +	bc->bc_ksm = NULL;
> +	bc->bc_keyslot = -1;
> +
> +	bio->bi_crypt_context = bc;
> +}
> +
> +void bio_crypt_ctx_release_keyslot(struct bio_crypt_ctx *bc);
> +
> +int bio_crypt_ctx_acquire_keyslot(struct bio_crypt_ctx *bc,
> +				  struct keyslot_manager *ksm);
> +
> +struct request;
> +bool bio_crypt_should_process(struct request *rq);
> +
> +static inline bool bio_crypt_dun_is_contiguous(const struct bio_crypt_ctx *bc,
> +					       unsigned int bytes,
> +					u64 next_dun[BLK_CRYPTO_DUN_ARRAY_SIZE])
> +{
> +	int i = 0;
> +	unsigned int inc = bytes >> bc->bc_key->data_unit_size_bits;
> +
> +	while (inc && i < BLK_CRYPTO_DUN_ARRAY_SIZE) {
> +		if (bc->bc_dun[i] + inc != next_dun[i])
> +			return false;
> +		inc = ((bc->bc_dun[i] + inc)  < inc);
> +		i++;
> +	}
> +
> +	return true;
> +}
> +
> +
> +static inline void bio_crypt_dun_increment(u64 dun[BLK_CRYPTO_DUN_ARRAY_SIZE],
> +					   unsigned int inc)
> +{
> +	int i = 0;
> +
> +	while (inc && i < BLK_CRYPTO_DUN_ARRAY_SIZE) {
> +		dun[i] += inc;
> +		inc = (dun[i] < inc);
> +		i++;
> +	}
> +}
> +
> +static inline void bio_crypt_advance(struct bio *bio, unsigned int bytes)
> +{
> +	struct bio_crypt_ctx *bc = bio->bi_crypt_context;
> +
> +	if (!bc)
> +		return;
> +
> +	bio_crypt_dun_increment(bc->bc_dun,
> +				bytes >> bc->bc_key->data_unit_size_bits);
> +}
> +
> +bool bio_crypt_ctx_compatible(struct bio *b_1, struct bio *b_2);
> +
> +bool bio_crypt_ctx_mergeable(struct bio *b_1, unsigned int b1_bytes,
> +			     struct bio *b_2);
> +
> +#else /* CONFIG_BLK_INLINE_ENCRYPTION */
> +static inline int bio_crypt_ctx_init(void)
> +{
> +	return 0;
> +}
> +
> +static inline bool bio_has_crypt_ctx(struct bio *bio)
> +{
> +	return false;
> +}
> +
> +static inline void bio_crypt_clone(struct bio *dst, struct bio *src,
> +				   gfp_t gfp_mask) { }
> +
> +static inline void bio_crypt_free_ctx(struct bio *bio) { }
> +
> +static inline void bio_crypt_advance(struct bio *bio, unsigned int bytes) { }
> +
> +static inline bool bio_crypt_ctx_compatible(struct bio *b_1, struct bio *b_2)
> +{
> +	return true;
> +}
> +
> +static inline bool bio_crypt_ctx_mergeable(struct bio *b_1,
> +					   unsigned int b1_bytes,
> +					   struct bio *b_2)
> +{
> +	return true;
> +}
> +
>  #endif /* CONFIG_BLK_INLINE_ENCRYPTION */
> +
>  #endif /* CONFIG_BLOCK */
> +
>  #endif /* __LINUX_BIO_CRYPT_CTX_H */
> diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
> index 70254ae11769..1996689c51d3 100644
> --- a/include/linux/blk_types.h
> +++ b/include/linux/blk_types.h
> @@ -18,6 +18,7 @@ struct block_device;
>  struct io_context;
>  struct cgroup_subsys_state;
>  typedef void (bio_end_io_t) (struct bio *);
> +struct bio_crypt_ctx;
>  
>  /*
>   * Block error status values.  See block/blk-core:blk_errors for the details.
> @@ -173,6 +174,11 @@ struct bio {
>  	u64			bi_iocost_cost;
>  #endif
>  #endif
> +
> +#ifdef CONFIG_BLK_INLINE_ENCRYPTION
> +	struct bio_crypt_ctx	*bi_crypt_context;
> +#endif

This grows struct bio even if we aren't actively using bi_crypt_context,
and I thought Jens told us to stop making it bigger. :)

--D

> +
>  	union {
>  #if defined(CONFIG_BLK_DEV_INTEGRITY)
>  		struct bio_integrity_payload *bi_integrity; /* data integrity */
> -- 
> 2.24.1.735.g03f4e72817-goog
>
Martin K. Petersen Dec. 18, 2019, 9:25 p.m. UTC | #3
Darrick,

>> +#ifdef CONFIG_BLK_INLINE_ENCRYPTION
>> +	struct bio_crypt_ctx	*bi_crypt_context;
>> +#endif
>
> This grows struct bio even if we aren't actively using bi_crypt_context,
> and I thought Jens told us to stop making it bigger. :)

Yeah. Why not use the bio integrity plumbing? It was explicitly designed
to attach things to a bio and have them consumed by the device driver.
Eric Biggers Dec. 18, 2019, 10:27 p.m. UTC | #4
On Wed, Dec 18, 2019 at 04:25:28PM -0500, Martin K. Petersen wrote:
> 
> Darrick,
> 
> >> +#ifdef CONFIG_BLK_INLINE_ENCRYPTION
> >> +	struct bio_crypt_ctx	*bi_crypt_context;
> >> +#endif
> >
> > This grows struct bio even if we aren't actively using bi_crypt_context,
> > and I thought Jens told us to stop making it bigger. :)
> 
> Yeah. Why not use the bio integrity plumbing? It was explicitly designed
> to attach things to a bio and have them consumed by the device driver.
> 

There's not really any such thing as "use the bio integrity plumbing".
blk-integrity just does blk-integrity; it's not a plumbing layer that allows
other features to be supported.  Well, in theory we could refactor and rename
all the hooks to "blk-extra" and make them delegate to either blk-integrity or
blk-crypto, but I think that would be overkill.

What we could do, though, is say that at most one of blk-crypto and
blk-integrity can be used at once on a given bio, and put the bi_integrity and
bi_crypt_context pointers in union.  (That would require allocating a
REQ_INLINECRYPT bit so that we can tell what the pointer points to.)

- Eric
Martin K. Petersen Dec. 19, 2019, 12:47 a.m. UTC | #5
Eric,

> There's not really any such thing as "use the bio integrity plumbing".
> blk-integrity just does blk-integrity; it's not a plumbing layer that
> allows other features to be supported.  Well, in theory we could
> refactor and rename all the hooks to "blk-extra" and make them
> delegate to either blk-integrity or blk-crypto, but I think that would
> be overkill.

I certainly don't expect your crypto stuff to plug in without any
modification to what we currently have. I'm just observing that the
existing plumbing is designed to have pluggable functions that let
filesystems attach additional information to bios on writes and process
additional attached information on reads. And the block layer already
handles slicing and dicing these attachments as the I/O traverses the
stack.

There's also other stuff that probably won't be directly applicable or
interesting for your use case. It just seems like identifying actual
commonalities and differences would be worthwhile.

Note that substantial changes to the integrity code would inevitably
lead to a lot of pain and suffering for me. So from that perspective I
am very happy if you leave it alone. From an architectural viewpoint,
however, it seems that there are more similarities than differences
between crypto and integrity. And we should avoid duplication where
possible. That's all.

> What we could do, though, is say that at most one of blk-crypto and
> blk-integrity can be used at once on a given bio, and put the
> bi_integrity and bi_crypt_context pointers in union.  (That would
> require allocating a REQ_INLINECRYPT bit so that we can tell what the
> pointer points to.)

Absolutely. That's why it's a union. Putting your stuff there is a
prerequisite as far as I'm concerned. No need to grow the bio when the
two features are unlikely to coexist. We can revisit that later should
the need arise.
Eric Biggers Dec. 20, 2019, 3:52 a.m. UTC | #6
On Wed, Dec 18, 2019 at 07:47:56PM -0500, Martin K. Petersen wrote:
> 
> Eric,
> 
> > There's not really any such thing as "use the bio integrity plumbing".
> > blk-integrity just does blk-integrity; it's not a plumbing layer that
> > allows other features to be supported.  Well, in theory we could
> > refactor and rename all the hooks to "blk-extra" and make them
> > delegate to either blk-integrity or blk-crypto, but I think that would
> > be overkill.
> 
> I certainly don't expect your crypto stuff to plug in without any
> modification to what we currently have. I'm just observing that the
> existing plumbing is designed to have pluggable functions that let
> filesystems attach additional information to bios on writes and process
> additional attached information on reads. And the block layer already
> handles slicing and dicing these attachments as the I/O traverses the
> stack.
> 
> There's also other stuff that probably won't be directly applicable or
> interesting for your use case. It just seems like identifying actual
> commonalities and differences would be worthwhile.
> 
> Note that substantial changes to the integrity code would inevitably
> lead to a lot of pain and suffering for me. So from that perspective I
> am very happy if you leave it alone. From an architectural viewpoint,
> however, it seems that there are more similarities than differences
> between crypto and integrity. And we should avoid duplication where
> possible. That's all.

There are some similarities, like both being optional features that need extra
per-bio information and hooks for bio merging, freeing, cloning, and advancing.

However, the nature of the per-bio information is very different.  Most of the
complexity in blk-integrity is around managing of a separate integrity
scatterlist for each bio, alongside the regular data scatterlist.

That's not something we need or want for inline encryption.  For each bio we
just need a key, algorithm, data unit number, and data unit size.  Since the
data unit number (IV) is automatically incremented for each sector and the
encryption is length-preserving, there's no per-sector data.

(Granted, from a crypto perspective ideally one would use authenticated
encryption, which does require per-sector data.  However, no one seems
interested in building hardware that supports it.  So for the forseeable future,
only length-preserving encryption is in scope for this.)

Also, blk-crypto actually transforms the data whereas blk-integrity does not.

> > What we could do, though, is say that at most one of blk-crypto and
> > blk-integrity can be used at once on a given bio, and put the
> > bi_integrity and bi_crypt_context pointers in union.  (That would
> > require allocating a REQ_INLINECRYPT bit so that we can tell what the
> > pointer points to.)
> 
> Absolutely. That's why it's a union. Putting your stuff there is a
> prerequisite as far as I'm concerned. No need to grow the bio when the
> two features are unlikely to coexist. We can revisit that later should
> the need arise.

There are some ways the two features could be supported simultaneously without
using more space, like making the pointer point to a linked list of tagged
structs, or making the struct contain both a bio_crypt_ctx and
bio_integrity_payload (or whichever combination is enabled in kconfig).

But it would be painful and I don't think people need this for now.  So if
people really aren't willing to accept the extra 8 bytes per bio even behind a
kconfig option, my vote is we that we put bi_crypt_context in the union with
bi_integrity, and add a flag REQ_INLINECRYPT (like REQ_INTEGRITY) that indicates
that the bi_crypt_context member of the union is valid.

We'd also need some error-handling to prevent the two features from actually
being used together.  It looks like there are several cases to consider.  One of
them is what happens if bio_crypt_set_ctx() is called when blk-integrity
verification or generation is enabled for the disk.  I suppose it could either
return an error, or we could make blk-crypto use the crypto API fallback
provided that it was modified to make the decryption stop relying on
->bi_crypt_context, which could be done by cloning the bio and using
->bi_private instead.

- Eric
Martin K. Petersen Jan. 7, 2020, 4:35 a.m. UTC | #7
Eric,

> However, the nature of the per-bio information is very different.
> Most of the complexity in blk-integrity is around managing of a
> separate integrity scatterlist for each bio, alongside the regular
> data scatterlist.

> That's not something we need or want for inline encryption.  For each
> bio we just need a key, algorithm, data unit number, and data unit
> size.  Since the data unit number (IV) is automatically incremented
> for each sector and the encryption is length-preserving, there's no
> per-sector data.

Fair enough. I just wanted to make sure that you guys had actually
looked at the integrity stuff and determined it wasn't a good fit.

> There are some ways the two features could be supported simultaneously
> without using more space, like making the pointer point to a linked
> list of tagged structs, or making the struct contain both a
> bio_crypt_ctx and bio_integrity_payload (or whichever combination is
> enabled in kconfig).

We have previously discussed having a facility in which you could chain
several different things (with different prep/endio functions) off a
bio. Similar to how we allow arbitrary stacking of block_devices. That
was actually my main interest in terms of opening the integrity can of
worms in this thread. Trying to find out which pieces of the plumbing,
if any, could potentially be made generic and feature-independent.

The copy offload efforts, which are now again picking up momentum, also
need to hang things off of the bio. That's the original reason the
integrity field became a union, fwiw.

> So if people really aren't willing to accept the extra 8 bytes per bio
> even behind a kconfig option, my vote is we that we put
> bi_crypt_context in the union with bi_integrity, and add a flag
> REQ_INLINECRYPT (like REQ_INTEGRITY) that indicates that the
> bi_crypt_context member of the union is valid.

Agreed.

> We'd also need some error-handling to prevent the two features from
> actually being used together.  It looks like there are several cases
> to consider.  One of them is what happens if bio_crypt_set_ctx() is
> called when blk-integrity verification or generation is enabled for
> the disk.

The integrity profile is only attached if the device driver identifies a
discovered device as capable. At least in the short term no device
should indicate simultaneous support for DIX and your crypto interface.

Not saying that sanity checks shouldn't exist. But I think both of these
features fall into things that are registered at device discovery time
so we shouldn't need to clutter the I/O hot path with mutual exclusivity
checks.
Christoph Hellwig Jan. 8, 2020, 2:07 p.m. UTC | #8
On Wed, Dec 18, 2019 at 07:47:56PM -0500, Martin K. Petersen wrote:
> Absolutely. That's why it's a union. Putting your stuff there is a
> prerequisite as far as I'm concerned. No need to grow the bio when the
> two features are unlikely to coexist. We can revisit that later should
> the need arise.

With NVMe key per I/O support some form of inline encryption and PI are
very likely to be used together in the not too far future.
Eric Biggers Jan. 8, 2020, 5:26 p.m. UTC | #9
On Wed, Jan 08, 2020 at 06:07:30AM -0800, Christoph Hellwig wrote:
> On Wed, Dec 18, 2019 at 07:47:56PM -0500, Martin K. Petersen wrote:
> > Absolutely. That's why it's a union. Putting your stuff there is a
> > prerequisite as far as I'm concerned. No need to grow the bio when the
> > two features are unlikely to coexist. We can revisit that later should
> > the need arise.
> 
> With NVMe key per I/O support some form of inline encryption and PI are
> very likely to be used together in the not too far future.

The NVMe "key per I/O" draft is heavily flawed, and I don't think it will be
useful at all in the Linux kernel context.  The problem is that, as far as I can
tell, it doesn't allow the encryption algorithm and IVs to be selected, or even
standardized or made discoverable in any way.  It does say that AES-256 must be
supported, but it doesn't say which mode of operation (i.e. it could be
something inappropriate for disk encryption, like ECB), nor does it say whether
AES-256 has to be the default or not, and if it's not the default how to
discover that and select AES-256.  IV generation is also unspecified, so it
could be something insecure like always using the same IV.

So effectively the NVMe encryption will be unspecified, untestable, and
unverifiable.  That means that vendors are likely to implement it insecurely,
similar to how they're implementing self-encrypting drives insecurely [1].
(Granted, there are some reasons to think that vendors are less likely to screw
up key per I/O.  But inevitably some will still get it wrong.)

[1] https://www.ieee-security.org/TC/SP2019/papers/310.pdf

Also, since "key per I/O" won't allow selecting IVs, all the encrypted data will
be tied to its physical location on-disk.  That will make "key per I/O" unusable
in any case where encrypted blocks are moved without the key, e.g.
filesystem-level encryption on many filesystems.

And since the way that dm-crypt and fscrypt work is that you select which
algorithm and IV generator you want to use, to even use NVMe "key per I/O" with
them we'd have to add magic settings that say to use some unspecified
hardware-specific encryption format, which could be completely insecure.  As one
of the fscrypt maintainers I'd be really hesistant to accept any such patch, and
I think the dm-crypt people would feel the same way.

I've already raised these concerns in the NVMe and TCG Storage working groups,
and the people working on it refused to make any changes, as they consider "key
per I/O" to be more akin to the TCG Opal self-encrypting drive specification,
and not actually intended to be "inline encryption".

So let's not over-engineer this kernel patchset to support some broken
vaporware, please.

- Eric
Martin K. Petersen Jan. 9, 2020, 3:40 a.m. UTC | #10
Christoph,

>> Absolutely. That's why it's a union. Putting your stuff there is a
>> prerequisite as far as I'm concerned. No need to grow the bio when the
>> two features are unlikely to coexist. We can revisit that later should
>> the need arise.
>
> With NVMe key per I/O support some form of inline encryption and PI are
> very likely to be used together in the not too far future.

I don't disagree that we'll have to manage coexistence eventually. Hence
my comments about being able to chain multiple things to a bio.

In the immediate term, though, I think it makes sense to leverage the
integrity pointer to avoid growing struct bio.
Eric Biggers Jan. 14, 2020, 9:24 p.m. UTC | #11
On Wed, Dec 18, 2019 at 06:51:29AM -0800, Satya Tangirala wrote:
> +static inline void bio_crypt_set_ctx(struct bio *bio,
> +				     const struct blk_crypto_key *key,
> +				     u64 dun[BLK_CRYPTO_DUN_ARRAY_SIZE],
> +				     gfp_t gfp_mask)
> +{
> +	struct bio_crypt_ctx *bc = bio_crypt_alloc_ctx(gfp_mask);
> +
> +	bc->bc_key = key;
> +	memcpy(bc->bc_dun, dun, sizeof(bc->bc_dun));
> +	bc->bc_ksm = NULL;
> +	bc->bc_keyslot = -1;
> +
> +	bio->bi_crypt_context = bc;
> +}

The 'dun' argument should be const.

- Eric
Christoph Hellwig Jan. 17, 2020, 8:32 a.m. UTC | #12
Hi Eric,

On Wed, Jan 08, 2020 at 09:26:29AM -0800, Eric Biggers wrote:
> The NVMe "key per I/O" draft is heavily flawed, and I don't think it will be
> useful at all in the Linux kernel context.  The problem is that, as far as I
> can tell, it doesn't allow the encryption algorithm and IVs to be selected,
> or even standardized or made discoverable in any way.  It does say that
> AES-256 must be supported, but it doesn't say which mode of operation (i.e.
> it could be something inappropriate for disk encryption, like ECB), nor
> does it say whether AES-256 has to be the default or not, and if it's not
> the default how to discover that and select AES-256.

I've talked to people involved with the TCG side of this spec, where
all the interesting crypto happens.  Currently the plan is to support
KMIP wrapper keys, which specify the exact algorithm and operation
mode, and algorithms and modes for the initial version are planned to
be AES 256/512 XTS.  I also had a chat with an involved person and
they understand the principle that for the inline crypto to be trusted
it needs to be interoperable with (trusted) software algorithms.  So
I don't think it is all doom.

> IV generation is also unspecified, so it
> could be something insecure like always using the same IV.

From talking to one of the initiators of the spec, no it is not intended
to be unspecified, but indeed tied to the LBA (see below).

> Also, since "key per I/O" won't allow selecting IVs, all the encrypted data will
> be tied to its physical location on-disk.  That will make "key per I/O" unusable
> in any case where encrypted blocks are moved without the key, e.g.
> filesystem-level encryption on many filesystems.

File systems don't move data around all that often (saying that with my
fs developer hat on).  In traditional file systems only defragmentation
will move data around, with extent refcounting it can also happen for
dedup, and for file systems that write out of place data gets moved
when parts of a block are rewritten, but in that case a read modify
write cycle is perfomed in the Linux code anyway, so it will go through
the inline encryption engined on the way and the way out.

So in other words - specifying an IV would be useful for some use cases,
but I don't think it is a deal blocker. Even without that is is useful
for block device level encryption, and could have some usefulness for
file system encryption usage.

I think that adding an IV would eventually be useful, but fitting that
into NVMe won't be easy, as you'd need to find a way to specify the IV
for each submission queue entry, which requires growing it, or finding
some way to extend it out of band.

> I've already raised these concerns in the NVMe and TCG Storage working groups,
> and the people working on it refused to make any changes, as they consider "key
> per I/O" to be more akin to the TCG Opal self-encrypting drive specification,
> and not actually intended to be "inline encryption".

While I have my fair share of issues how the spec is developed that
isn't my impression, and at least for the verifyable part I heard
contrary statements.  Feel free to contact me offline to make sure we
can move this into the right direction.

> So let's not over-engineer this kernel patchset to support some broken
> vaporware, please.

Not sharing bio fields for integrity and encryption actually keeps
the patchset simpler (although uses more memory if both options are
enabled).  So my main point here is to not over engineer it for broken
premise that won't be true soon.
Eric Biggers Jan. 18, 2020, 5:11 a.m. UTC | #13
On Fri, Jan 17, 2020 at 12:32:21AM -0800, Christoph Hellwig wrote:
> 
> File systems don't move data around all that often (saying that with my
> fs developer hat on).  In traditional file systems only defragmentation
> will move data around, with extent refcounting it can also happen for
> dedup, and for file systems that write out of place data gets moved
> when parts of a block are rewritten, but in that case a read modify
> write cycle is perfomed in the Linux code anyway, so it will go through
> the inline encryption engined on the way and the way out.
> 
> So in other words - specifying an IV would be useful for some use cases,
> but I don't think it is a deal blocker. Even without that is is useful
> for block device level encryption, and could have some usefulness for
> file system encryption usage.
> 
> I think that adding an IV would eventually be useful, but fitting that
> into NVMe won't be easy, as you'd need to find a way to specify the IV
> for each submission queue entry, which requires growing it, or finding
> some way to extend it out of band.

Sure, people have even done inline crypto on ext4 before (downstream), using the
LBA for the IV.  But log-structured filesystems like f2fs move data blocks
around *without the encryption key*; and at least for fscrypt, f2fs support is
essential.  In any case it's also awkward having the physical on-disk location
determine the ciphertext on-disk, as then the result isn't fully controlled by
the encryption settings you set, but also based on where your filesystem is
located on-disk (with extra fun occurring if there's any sort of remapping layer
in between).  But sure, it's not *useless* to not be able to specify the IV,
it's just annoying and less useful.

[I was also a bit surprised to see that NVMe won't actually allow specify the
IV, as I thought you had objected to the naming of the INLINE_CRYPT_OPTIMIZED
fscrypt policy flag partly on the grounds that NVMe would support IVs longer
than the 64 bits that UFS is limited to.  Perhaps I misunderstood though.]

> > So let's not over-engineer this kernel patchset to support some broken
> > vaporware, please.
> 
> Not sharing bio fields for integrity and encryption actually keeps
> the patchset simpler (although uses more memory if both options are
> enabled).  So my main point here is to not over engineer it for broken
> premise that won't be true soon.

Well there are 3 options:

(a) Separate fields for bi_crypt_context and bi_integrity
(b) bi_crypt_context and bi_integrity in union
(c) One pointer that can support both features,
    e.g. linked list of tagged structs.

It sounds like you're advocating for (a), but I had misunderstood and thought
you're advocating for (c).  We'd of course be fine with (a) as it's the
simplest, but other people are saying they prefer (b).

Satya, to resolve this I think you should check how hard (b) is to implement --
i.e. is it easy, or is it really tricky to ensure the features are never used
together?  (Considering that it's probably not just a matter of whether any
hardware supports both features, as dm-integrity supports blk-integrity in
software and blk-crypto-fallback supports blk-crypto in software.)

- Eric
Satya Tangirala Jan. 21, 2020, 10:05 p.m. UTC | #14
On Fri, Jan 17, 2020 at 09:11:32PM -0800, Eric Biggers wrote:
> On Fri, Jan 17, 2020 at 12:32:21AM -0800, Christoph Hellwig wrote:
> > 
> > File systems don't move data around all that often (saying that with my
> > fs developer hat on).  In traditional file systems only defragmentation
> > will move data around, with extent refcounting it can also happen for
> > dedup, and for file systems that write out of place data gets moved
> > when parts of a block are rewritten, but in that case a read modify
> > write cycle is perfomed in the Linux code anyway, so it will go through
> > the inline encryption engined on the way and the way out.
> > 
> > So in other words - specifying an IV would be useful for some use cases,
> > but I don't think it is a deal blocker. Even without that is is useful
> > for block device level encryption, and could have some usefulness for
> > file system encryption usage.
> > 
> > I think that adding an IV would eventually be useful, but fitting that
> > into NVMe won't be easy, as you'd need to find a way to specify the IV
> > for each submission queue entry, which requires growing it, or finding
> > some way to extend it out of band.
> 
> Sure, people have even done inline crypto on ext4 before (downstream), using the
> LBA for the IV.  But log-structured filesystems like f2fs move data blocks
> around *without the encryption key*; and at least for fscrypt, f2fs support is
> essential.  In any case it's also awkward having the physical on-disk location
> determine the ciphertext on-disk, as then the result isn't fully controlled by
> the encryption settings you set, but also based on where your filesystem is
> located on-disk (with extra fun occurring if there's any sort of remapping layer
> in between).  But sure, it's not *useless* to not be able to specify the IV,
> it's just annoying and less useful.
> 
> [I was also a bit surprised to see that NVMe won't actually allow specify the
> IV, as I thought you had objected to the naming of the INLINE_CRYPT_OPTIMIZED
> fscrypt policy flag partly on the grounds that NVMe would support IVs longer
> than the 64 bits that UFS is limited to.  Perhaps I misunderstood though.]
> 
> > > So let's not over-engineer this kernel patchset to support some broken
> > > vaporware, please.
> > 
> > Not sharing bio fields for integrity and encryption actually keeps
> > the patchset simpler (although uses more memory if both options are
> > enabled).  So my main point here is to not over engineer it for broken
> > premise that won't be true soon.
> 
> Well there are 3 options:
> 
> (a) Separate fields for bi_crypt_context and bi_integrity
> (b) bi_crypt_context and bi_integrity in union
> (c) One pointer that can support both features,
>     e.g. linked list of tagged structs.
> 
> It sounds like you're advocating for (a), but I had misunderstood and thought
> you're advocating for (c).  We'd of course be fine with (a) as it's the
> simplest, but other people are saying they prefer (b).
> 
> Satya, to resolve this I think you should check how hard (b) is to implement --
> i.e. is it easy, or is it really tricky to ensure the features are never used
> together?  (Considering that it's probably not just a matter of whether any
> hardware supports both features, as dm-integrity supports blk-integrity in
> software and blk-crypto-fallback supports blk-crypto in software.)
> 
> - Eric

What I have right now for v7 of the patch series was my attempt at (b) (since
some were saying they prefer it) - it may still be incomplete though because I
missed something, but here's what I think it involved:

1) bi_crypt_context is never set after bi_integrity, so I don't check if
   bi_integrity is set or not when setting bi_crypt_context - this keeps the
   code cleaner when setting the bi_crypt_context.

2) I made it error whenever we try to set bi_integrity on a bio with
   REQ_BLK_CRYPTO.

3) There is an issue with the way the blk-crypto-fallback interacts with bio
   integrity. One of the goals of the fallback is to make it inline encryption
   transparent to the upper layers (i.e. as far as the upper layers are
   concerned there should be no difference whether there is actual hardware
   inline encryption support or whether the fallback is used instead).

   The issue is, when the fallback encrypts a bio, it clones the bio and
   encrypts the pages in the clone, but the clone won't have a bi_crypt_context
   (since otherwise, when the clone is resubmitted, blk-crypto would incorrectly
   assume that the clone needs to be encrypted once again). When bi_integrity is
   set on the clone, there won't be any error detected since REQ_BLK_CRYPTO
   won't be set on the clone. However, if hardware inline encryption support had
   been present, and the blk-crypto had not used the fallback, the same bio
   would have bi_crypt_context set when we try to set bi_integrity, which would
   then fail.

   To fix this issue, I introduced another flag REQ_NO_SPECIAL (subject to being
   renamed when I think of/someone suggests a better name), which when set on
   the bio, indicates that bi_integrity (and in future whatever other fields
   share the same union, shouldn't be set on that bio. I can probably do away
   with the new flag and just set REQ_BLK_CRYPTO and also set
   bi_crypt_context = NULL to signify the same thing, but either way, it doesn't
   seem like a very clean solution to me.

4) I don't think there's actually an issue with dm-integrity as long as when we
   add support for inline encryption to dm, we ensure that if any of the child
   targets is dm-integrity, then the dm device says it doesn't support any
   crypto mode. This way, encryption is always consistently done before
   integrity calculation, which is always consistently done before decryption
   (and when dm-integrity is not using the internal hash, then the bio will fail
   because of (2), but this is still consistent whether or not hardware inline
   encryption support is present.

However, even if we decide to do (a), I'll still need to ensure that when bio
integrity and blk-crypto are used together, the results are consistent
regardless of whether hardware inline crypto support is present. So I either
disallow bio integrity to be used with blk-crypto, in which case I'll need to
do something similar to what I had to do in (3) like introduce REQ_NO_SPECIAL
and we also lose the advantage of not having the two fields shared in a union,
or I allow them to be used together while ensuring consistent results by
maybe doing something like forcing blk-crypto to fallback to the crypto API
whenever the target device for a bio has a bio integrity profile (and whatever
checks are done in bio_integrity_prep).

So I'm not sure if (a) is a lot easier or cleaner. But unlike (b), (a)
does give us the option of using the two features together....if it seems
likely that we'll want to use both these features together at some point,
then maybe I should switch back to (a), and for now just disallow the two
features from being used together since that's still easier than trying to get
them both to work together, and once we know for sure that we want to use both
together, we can make the necessary additions then?
diff mbox series

Patch

diff --git a/block/Makefile b/block/Makefile
index 7c603669f216..79f2b8b3fc5d 100644
--- a/block/Makefile
+++ b/block/Makefile
@@ -37,4 +37,4 @@  obj-$(CONFIG_BLK_DEBUG_FS)	+= blk-mq-debugfs.o
 obj-$(CONFIG_BLK_DEBUG_FS_ZONED)+= blk-mq-debugfs-zoned.o
 obj-$(CONFIG_BLK_SED_OPAL)	+= sed-opal.o
 obj-$(CONFIG_BLK_PM)		+= blk-pm.o
-obj-$(CONFIG_BLK_INLINE_ENCRYPTION)	+= keyslot-manager.o
\ No newline at end of file
+obj-$(CONFIG_BLK_INLINE_ENCRYPTION)	+= keyslot-manager.o bio-crypt-ctx.o
\ No newline at end of file
diff --git a/block/bio-crypt-ctx.c b/block/bio-crypt-ctx.c
new file mode 100644
index 000000000000..dadf0da3c21b
--- /dev/null
+++ b/block/bio-crypt-ctx.c
@@ -0,0 +1,131 @@ 
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright 2019 Google LLC
+ */
+
+#include <linux/bio.h>
+#include <linux/blkdev.h>
+#include <linux/keyslot-manager.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+
+
+static int num_prealloc_crypt_ctxs = 128;
+
+module_param(num_prealloc_crypt_ctxs, int, 0444);
+MODULE_PARM_DESC(num_prealloc_crypt_ctxs,
+		"Number of bio crypto contexts to preallocate");
+
+static struct kmem_cache *bio_crypt_ctx_cache;
+static mempool_t *bio_crypt_ctx_pool;
+
+int __init bio_crypt_ctx_init(void)
+{
+	bio_crypt_ctx_cache = KMEM_CACHE(bio_crypt_ctx, 0);
+	if (!bio_crypt_ctx_cache)
+		return -ENOMEM;
+
+	bio_crypt_ctx_pool = mempool_create_slab_pool(num_prealloc_crypt_ctxs,
+						      bio_crypt_ctx_cache);
+	if (!bio_crypt_ctx_pool)
+		return -ENOMEM;
+
+	/* This is assumed in various places. */
+	BUILD_BUG_ON(BLK_ENCRYPTION_MODE_INVALID != 0);
+
+	return 0;
+}
+
+struct bio_crypt_ctx *bio_crypt_alloc_ctx(gfp_t gfp_mask)
+{
+	return mempool_alloc(bio_crypt_ctx_pool, gfp_mask);
+}
+
+void bio_crypt_free_ctx(struct bio *bio)
+{
+	mempool_free(bio->bi_crypt_context, bio_crypt_ctx_pool);
+	bio->bi_crypt_context = NULL;
+}
+
+void bio_crypt_clone(struct bio *dst, struct bio *src, gfp_t gfp_mask)
+{
+	const struct bio_crypt_ctx *src_bc = src->bi_crypt_context;
+
+	/*
+	 * If a bio is swhandled, then it will be decrypted when bio_endio
+	 * is called. As we only want the data to be decrypted once, copies
+	 * of the bio must not have have a crypt context.
+	 */
+	if (!src_bc)
+		return;
+
+	dst->bi_crypt_context = bio_crypt_alloc_ctx(gfp_mask);
+	*dst->bi_crypt_context = *src_bc;
+
+	if (src_bc->bc_keyslot >= 0)
+		keyslot_manager_get_slot(src_bc->bc_ksm, src_bc->bc_keyslot);
+}
+EXPORT_SYMBOL_GPL(bio_crypt_clone);
+
+bool bio_crypt_should_process(struct request *rq)
+{
+	struct bio *bio = rq->bio;
+
+	if (!bio || !bio->bi_crypt_context)
+		return false;
+
+	return rq->q->ksm == bio->bi_crypt_context->bc_ksm;
+}
+EXPORT_SYMBOL_GPL(bio_crypt_should_process);
+
+/*
+ * Checks that two bio crypt contexts are compatible - i.e. that
+ * they are mergeable except for data_unit_num continuity.
+ */
+bool bio_crypt_ctx_compatible(struct bio *b_1, struct bio *b_2)
+{
+	struct bio_crypt_ctx *bc1 = b_1->bi_crypt_context;
+	struct bio_crypt_ctx *bc2 = b_2->bi_crypt_context;
+
+	if (bc1 != bc2)
+		return false;
+
+	return !bc1 || bc1->bc_key == bc2->bc_key;
+}
+
+/*
+ * Checks that two bio crypt contexts are compatible, and also
+ * that their data_unit_nums are continuous (and can hence be merged)
+ * in the order b_1 followed by b_2.
+ */
+bool bio_crypt_ctx_mergeable(struct bio *b_1, unsigned int b1_bytes,
+			     struct bio *b_2)
+{
+	struct bio_crypt_ctx *bc1 = b_1->bi_crypt_context;
+	struct bio_crypt_ctx *bc2 = b_2->bi_crypt_context;
+
+	if (!bio_crypt_ctx_compatible(b_1, b_2))
+		return false;
+
+	return !bc1 || bio_crypt_dun_is_contiguous(bc1, b1_bytes, bc2->bc_dun);
+}
+
+void bio_crypt_ctx_release_keyslot(struct bio_crypt_ctx *bc)
+{
+	keyslot_manager_put_slot(bc->bc_ksm, bc->bc_keyslot);
+	bc->bc_ksm = NULL;
+	bc->bc_keyslot = -1;
+}
+
+int bio_crypt_ctx_acquire_keyslot(struct bio_crypt_ctx *bc,
+				  struct keyslot_manager *ksm)
+{
+	int slot = keyslot_manager_get_slot_for_key(ksm, bc->bc_key);
+
+	if (slot < 0)
+		return slot;
+
+	bc->bc_keyslot = slot;
+	bc->bc_ksm = ksm;
+	return 0;
+}
diff --git a/block/bio.c b/block/bio.c
index a5d75f6bf4c7..c99e054d56ef 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -236,6 +236,8 @@  void bio_uninit(struct bio *bio)
 
 	if (bio_integrity(bio))
 		bio_integrity_free(bio);
+
+	bio_crypt_free_ctx(bio);
 }
 EXPORT_SYMBOL(bio_uninit);
 
@@ -615,15 +617,12 @@  struct bio *bio_clone_fast(struct bio *bio, gfp_t gfp_mask, struct bio_set *bs)
 
 	__bio_clone_fast(b, bio);
 
-	if (bio_integrity(bio)) {
-		int ret;
-
-		ret = bio_integrity_clone(b, bio, gfp_mask);
+	bio_crypt_clone(b, bio, gfp_mask);
 
-		if (ret < 0) {
-			bio_put(b);
-			return NULL;
-		}
+	if (bio_integrity(bio) &&
+	    bio_integrity_clone(b, bio, gfp_mask) < 0) {
+		bio_put(b);
+		return NULL;
 	}
 
 	return b;
@@ -997,6 +996,7 @@  void bio_advance(struct bio *bio, unsigned bytes)
 	if (bio_integrity(bio))
 		bio_integrity_advance(bio, bytes);
 
+	bio_crypt_advance(bio, bytes);
 	bio_advance_iter(bio, &bio->bi_iter, bytes);
 }
 EXPORT_SYMBOL(bio_advance);
diff --git a/block/blk-core.c b/block/blk-core.c
index e0a094fddee5..5200f4d1fed4 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1810,5 +1810,8 @@  int __init blk_dev_init(void)
 	blk_debugfs_root = debugfs_create_dir("block", NULL);
 #endif
 
+	if (bio_crypt_ctx_init() < 0)
+		panic("Failed to allocate mem for bio crypt ctxs\n");
+
 	return 0;
 }
diff --git a/block/blk-merge.c b/block/blk-merge.c
index d783bdc4559b..5e53aad97da9 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -596,6 +596,8 @@  int ll_back_merge_fn(struct request *req, struct bio *bio, unsigned int nr_segs)
 		req_set_nomerge(req->q, req);
 		return 0;
 	}
+	if (!bio_crypt_ctx_mergeable(req->bio, blk_rq_bytes(req), bio))
+		return 0;
 
 	return ll_new_hw_segment(req, bio, nr_segs);
 }
@@ -612,6 +614,8 @@  int ll_front_merge_fn(struct request *req, struct bio *bio, unsigned int nr_segs
 		req_set_nomerge(req->q, req);
 		return 0;
 	}
+	if (!bio_crypt_ctx_mergeable(bio, bio->bi_iter.bi_size, req->bio))
+		return 0;
 
 	return ll_new_hw_segment(req, bio, nr_segs);
 }
@@ -656,6 +660,9 @@  static int ll_merge_requests_fn(struct request_queue *q, struct request *req,
 	if (blk_integrity_merge_rq(q, req, next) == false)
 		return 0;
 
+	if (!bio_crypt_ctx_mergeable(req->bio, blk_rq_bytes(req), next->bio))
+		return 0;
+
 	/* Merge is OK... */
 	req->nr_phys_segments = total_phys_segments;
 	return 1;
@@ -895,6 +902,10 @@  bool blk_rq_merge_ok(struct request *rq, struct bio *bio)
 	if (rq->ioprio != bio_prio(bio))
 		return false;
 
+	/* Only merge if the crypt contexts are compatible */
+	if (!bio_crypt_ctx_compatible(bio, rq->bio))
+		return false;
+
 	return true;
 }
 
diff --git a/block/bounce.c b/block/bounce.c
index f8ed677a1bf7..aa57ccc6ced3 100644
--- a/block/bounce.c
+++ b/block/bounce.c
@@ -267,14 +267,12 @@  static struct bio *bounce_clone_bio(struct bio *bio_src, gfp_t gfp_mask,
 		break;
 	}
 
-	if (bio_integrity(bio_src)) {
-		int ret;
+	bio_crypt_clone(bio, bio_src, gfp_mask);
 
-		ret = bio_integrity_clone(bio, bio_src, gfp_mask);
-		if (ret < 0) {
-			bio_put(bio);
-			return NULL;
-		}
+	if (bio_integrity(bio_src) &&
+	    bio_integrity_clone(bio, bio_src, gfp_mask) < 0) {
+		bio_put(bio);
+		return NULL;
 	}
 
 	bio_clone_blkg_association(bio, bio_src);
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index e8f9661a10a1..783e0d5fd130 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -1304,9 +1304,10 @@  static int clone_bio(struct dm_target_io *tio, struct bio *bio,
 
 	__bio_clone_fast(clone, bio);
 
+	bio_crypt_clone(clone, bio, GFP_NOIO);
+
 	if (bio_integrity(bio)) {
 		int r;
-
 		if (unlikely(!dm_target_has_integrity(tio->ti->type) &&
 			     !dm_target_passes_integrity(tio->ti->type))) {
 			DMWARN("%s: the target %s doesn't support integrity data.",
diff --git a/include/linux/bio-crypt-ctx.h b/include/linux/bio-crypt-ctx.h
index dd4ac9d95428..4535df0a6349 100644
--- a/include/linux/bio-crypt-ctx.h
+++ b/include/linux/bio-crypt-ctx.h
@@ -8,7 +8,7 @@ 
 enum blk_crypto_mode_num {
 	BLK_ENCRYPTION_MODE_INVALID,
 	BLK_ENCRYPTION_MODE_AES_256_XTS,
-	BLK_ENCRYPTION_MODE_AES_128_CBC,
+	BLK_ENCRYPTION_MODE_AES_128_CBC_ESSIV,
 	BLK_ENCRYPTION_MODE_ADIANTUM,
 	BLK_ENCRYPTION_MODE_MAX,
 };
@@ -44,6 +44,150 @@  struct blk_crypto_key {
 	u8 raw[BLK_CRYPTO_MAX_KEY_SIZE];
 };
 
+#define BLK_CRYPTO_MAX_IV_SIZE		32
+#define BLK_CRYPTO_DUN_ARRAY_SIZE	(BLK_CRYPTO_MAX_IV_SIZE/sizeof(u64))
+
+/**
+ * struct bio_crypt_ctx - an inline encryption context
+ * @bc_key: the key, algorithm, and data unit size to use
+ * @bc_keyslot: the keyslot that has been assigned for this key in @bc_ksm,
+ *		or -1 if no keyslot has been assigned yet.
+ * @bc_dun: the data unit number (starting IV) to use
+ * @bc_ksm: the keyslot manager into which the key has been programmed with
+ *	    @bc_keyslot, or NULL if this key hasn't yet been programmed.
+ *
+ * A bio_crypt_ctx specifies that the contents of the bio will be encrypted (for
+ * write requests) or decrypted (for read requests) inline by the storage device
+ * or controller, or by the crypto API fallback.
+ */
+struct bio_crypt_ctx {
+	const struct blk_crypto_key	*bc_key;
+	int				bc_keyslot;
+
+	/* Data unit number */
+	u64				bc_dun[BLK_CRYPTO_DUN_ARRAY_SIZE];
+
+	/*
+	 * The keyslot manager where the key has been programmed
+	 * with keyslot.
+	 */
+	struct keyslot_manager		*bc_ksm;
+};
+
+int bio_crypt_ctx_init(void);
+
+struct bio_crypt_ctx *bio_crypt_alloc_ctx(gfp_t gfp_mask);
+
+void bio_crypt_free_ctx(struct bio *bio);
+
+static inline bool bio_has_crypt_ctx(struct bio *bio)
+{
+	return bio->bi_crypt_context;
+}
+
+void bio_crypt_clone(struct bio *dst, struct bio *src, gfp_t gfp_mask);
+
+static inline void bio_crypt_set_ctx(struct bio *bio,
+				     const struct blk_crypto_key *key,
+				     u64 dun[BLK_CRYPTO_DUN_ARRAY_SIZE],
+				     gfp_t gfp_mask)
+{
+	struct bio_crypt_ctx *bc = bio_crypt_alloc_ctx(gfp_mask);
+
+	bc->bc_key = key;
+	memcpy(bc->bc_dun, dun, sizeof(bc->bc_dun));
+	bc->bc_ksm = NULL;
+	bc->bc_keyslot = -1;
+
+	bio->bi_crypt_context = bc;
+}
+
+void bio_crypt_ctx_release_keyslot(struct bio_crypt_ctx *bc);
+
+int bio_crypt_ctx_acquire_keyslot(struct bio_crypt_ctx *bc,
+				  struct keyslot_manager *ksm);
+
+struct request;
+bool bio_crypt_should_process(struct request *rq);
+
+static inline bool bio_crypt_dun_is_contiguous(const struct bio_crypt_ctx *bc,
+					       unsigned int bytes,
+					u64 next_dun[BLK_CRYPTO_DUN_ARRAY_SIZE])
+{
+	int i = 0;
+	unsigned int inc = bytes >> bc->bc_key->data_unit_size_bits;
+
+	while (inc && i < BLK_CRYPTO_DUN_ARRAY_SIZE) {
+		if (bc->bc_dun[i] + inc != next_dun[i])
+			return false;
+		inc = ((bc->bc_dun[i] + inc)  < inc);
+		i++;
+	}
+
+	return true;
+}
+
+
+static inline void bio_crypt_dun_increment(u64 dun[BLK_CRYPTO_DUN_ARRAY_SIZE],
+					   unsigned int inc)
+{
+	int i = 0;
+
+	while (inc && i < BLK_CRYPTO_DUN_ARRAY_SIZE) {
+		dun[i] += inc;
+		inc = (dun[i] < inc);
+		i++;
+	}
+}
+
+static inline void bio_crypt_advance(struct bio *bio, unsigned int bytes)
+{
+	struct bio_crypt_ctx *bc = bio->bi_crypt_context;
+
+	if (!bc)
+		return;
+
+	bio_crypt_dun_increment(bc->bc_dun,
+				bytes >> bc->bc_key->data_unit_size_bits);
+}
+
+bool bio_crypt_ctx_compatible(struct bio *b_1, struct bio *b_2);
+
+bool bio_crypt_ctx_mergeable(struct bio *b_1, unsigned int b1_bytes,
+			     struct bio *b_2);
+
+#else /* CONFIG_BLK_INLINE_ENCRYPTION */
+static inline int bio_crypt_ctx_init(void)
+{
+	return 0;
+}
+
+static inline bool bio_has_crypt_ctx(struct bio *bio)
+{
+	return false;
+}
+
+static inline void bio_crypt_clone(struct bio *dst, struct bio *src,
+				   gfp_t gfp_mask) { }
+
+static inline void bio_crypt_free_ctx(struct bio *bio) { }
+
+static inline void bio_crypt_advance(struct bio *bio, unsigned int bytes) { }
+
+static inline bool bio_crypt_ctx_compatible(struct bio *b_1, struct bio *b_2)
+{
+	return true;
+}
+
+static inline bool bio_crypt_ctx_mergeable(struct bio *b_1,
+					   unsigned int b1_bytes,
+					   struct bio *b_2)
+{
+	return true;
+}
+
 #endif /* CONFIG_BLK_INLINE_ENCRYPTION */
+
 #endif /* CONFIG_BLOCK */
+
 #endif /* __LINUX_BIO_CRYPT_CTX_H */
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 70254ae11769..1996689c51d3 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -18,6 +18,7 @@  struct block_device;
 struct io_context;
 struct cgroup_subsys_state;
 typedef void (bio_end_io_t) (struct bio *);
+struct bio_crypt_ctx;
 
 /*
  * Block error status values.  See block/blk-core:blk_errors for the details.
@@ -173,6 +174,11 @@  struct bio {
 	u64			bi_iocost_cost;
 #endif
 #endif
+
+#ifdef CONFIG_BLK_INLINE_ENCRYPTION
+	struct bio_crypt_ctx	*bi_crypt_context;
+#endif
+
 	union {
 #if defined(CONFIG_BLK_DEV_INTEGRITY)
 		struct bio_integrity_payload *bi_integrity; /* data integrity */