diff mbox series

[PATCHv3,08/10] block: add pi for nvme enhanced integrity

Message ID 20220222163144.1782447-9-kbusch@kernel.org (mailing list archive)
State Not Applicable
Delegated to: Herbert Xu
Headers show
Series 64-bit data integrity field support | expand

Commit Message

Keith Busch Feb. 22, 2022, 4:31 p.m. UTC
The NVMe specification defines new data integrity formats beyond the
t10 tuple. Add support for the specification defined CRC64 formats,
assuming the reference tag does not need to be split with the "storage
tag".

Cc: Hannes Reinecke <hare@suse.de>
Cc: "Martin K. Petersen" <martin.petersen@oracle.com>
Signed-off-by: Keith Busch <kbusch@kernel.org>
---
 block/Kconfig          |   1 +
 block/t10-pi.c         | 194 +++++++++++++++++++++++++++++++++++++++++
 include/linux/t10-pi.h |  20 +++++
 3 files changed, 215 insertions(+)

Comments

Christoph Hellwig Feb. 25, 2022, 4:14 p.m. UTC | #1
On Tue, Feb 22, 2022 at 08:31:42AM -0800, Keith Busch wrote:
> +static __be64 nvme_pi_crc64(void *data, unsigned int len)
> +{
> +	return cpu_to_be64(crc64_rocksoft(data, len));
> +}
> +
> +static blk_status_t nvme_crc64_generate(struct blk_integrity_iter *iter,
> +					enum t10_dif_type type)

Shouldn't the naming be something more like ext_pi_*?  For one thing
I kinda hate having the nvme prefix here in block layer code, but also
nvme supports the normal 8 byte PI tuples, so this is a bit confusing.

> +{
> +	unsigned int i;
> +
> +	for (i = 0 ; i < iter->data_size ; i += iter->interval) {
> +		struct nvme_crc64_pi_tuple *pi = iter->prot_buf;
> +
> +		pi->guard_tag = nvme_pi_crc64(iter->data_buf, iter->interval);
> +		pi->app_tag = 0;
> +
> +		if (type == T10_PI_TYPE1_PROTECTION)
> +			put_unaligned_be48(iter->seed, pi->ref_tag);
> +		else
> +			put_unaligned_be48(0ULL, pi->ref_tag);
> +
> +		iter->data_buf += iter->interval;
> +		iter->prot_buf += iter->tuple_size;
> +		iter->seed++;
> +	}
> +
> +	return BLK_STS_OK;
> +}
> +
> +static bool nvme_crc64_ref_escape(u8 *ref_tag)
> +{
> +	static u8 ref_escape[6] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
> +
> +	return memcmp(ref_tag, ref_escape, sizeof(ref_escape)) == 0;
> +}
> +
> +static blk_status_t nvme_crc64_verify(struct blk_integrity_iter *iter,
> +				      enum t10_dif_type type)
> +{
> +	unsigned int i;
> +
> +	for (i = 0; i < iter->data_size; i += iter->interval) {
> +		struct nvme_crc64_pi_tuple *pi = iter->prot_buf;
> +		u64 ref, seed;
> +		__be64 csum;
> +
> +		if (type == T10_PI_TYPE1_PROTECTION) {
> +			if (pi->app_tag == T10_PI_APP_ESCAPE)
> +				goto next;
> +
> +			ref = get_unaligned_be48(pi->ref_tag);
> +			seed = lower_48_bits(iter->seed);
> +			if (ref != seed) {
> +				pr_err("%s: ref tag error at location %llu (rcvd %llu)\n",
> +					iter->disk_name, seed, ref);
> +				return BLK_STS_PROTECTION;
> +			}
> +		} else if (type == T10_PI_TYPE3_PROTECTION) {
> +			if (pi->app_tag == T10_PI_APP_ESCAPE &&
> +			    nvme_crc64_ref_escape(pi->ref_tag))
> +				goto next;
> +		}
> +
> +		csum = nvme_pi_crc64(iter->data_buf, iter->interval);
> +		if (pi->guard_tag != csum) {
> +			pr_err("%s: guard tag error at sector %llu " \
> +			       "(rcvd %016llx, want %016llx)\n",
> +				iter->disk_name, (unsigned long long)iter->seed,
> +				be64_to_cpu(pi->guard_tag), be64_to_cpu(csum));
> +			return BLK_STS_PROTECTION;
> +		}
> +
> +next:
> +		iter->data_buf += iter->interval;
> +		iter->prot_buf += iter->tuple_size;
> +		iter->seed++;
> +	}
> +
> +	return BLK_STS_OK;
> +}
> +
> +static blk_status_t nvme_pi_type1_verify_crc(struct blk_integrity_iter *iter)
> +{
> +	return nvme_crc64_verify(iter, T10_PI_TYPE1_PROTECTION);
> +}
> +
> +static blk_status_t nvme_pi_type1_generate_crc(struct blk_integrity_iter *iter)
> +{
> +	return nvme_crc64_generate(iter, T10_PI_TYPE1_PROTECTION);
> +}
> +
> +static void nvme_pi_type1_prepare(struct request *rq)
> +{
> +	const int tuple_sz = rq->q->integrity.tuple_size;
> +	u64 ref_tag = nvme_pi_extended_ref_tag(rq);
> +	struct bio *bio;
> +
> +	__rq_for_each_bio(bio, rq) {
> +		struct bio_integrity_payload *bip = bio_integrity(bio);
> +		u64 virt = lower_48_bits(bip_get_seed(bip));
> +		struct bio_vec iv;
> +		struct bvec_iter iter;
> +
> +		/* Already remapped? */
> +		if (bip->bip_flags & BIP_MAPPED_INTEGRITY)
> +			break;
> +
> +		bip_for_each_vec(iv, bip, iter) {
> +			unsigned int j;
> +			void *p;
> +
> +			p = bvec_kmap_local(&iv);
> +			for (j = 0; j < iv.bv_len; j += tuple_sz) {
> +				struct nvme_crc64_pi_tuple *pi = p;
> +				u64 ref = get_unaligned_be48(pi->ref_tag);
> +
> +				if (ref == virt)
> +					put_unaligned_be48(ref_tag, pi->ref_tag);
> +				virt++;
> +				ref_tag++;
> +				p += tuple_sz;
> +			}
> +			kunmap_local(p);
> +		}
> +
> +		bip->bip_flags |= BIP_MAPPED_INTEGRITY;
> +	}
> +}
> +
> +static void nvme_pi_type1_complete(struct request *rq, unsigned int nr_bytes)
> +{
> +	unsigned intervals = nr_bytes >> rq->q->integrity.interval_exp;
> +	const int tuple_sz = rq->q->integrity.tuple_size;
> +	u64 ref_tag = nvme_pi_extended_ref_tag(rq);
> +	struct bio *bio;
> +
> +	__rq_for_each_bio(bio, rq) {
> +		struct bio_integrity_payload *bip = bio_integrity(bio);
> +		u64 virt = lower_48_bits(bip_get_seed(bip));
> +		struct bio_vec iv;
> +		struct bvec_iter iter;
> +
> +		bip_for_each_vec(iv, bip, iter) {
> +			unsigned int j;
> +			void *p;
> +
> +			p = bvec_kmap_local(&iv);
> +			for (j = 0; j < iv.bv_len && intervals; j += tuple_sz) {
> +				struct nvme_crc64_pi_tuple *pi = p;
> +				u64 ref = get_unaligned_be48(pi->ref_tag);
> +
> +				if (ref == ref_tag)
> +					put_unaligned_be48(virt, pi->ref_tag);
> +				virt++;
> +				ref_tag++;
> +				intervals--;
> +				p += tuple_sz;
> +			}
> +			kunmap_local(p);
> +		}
> +	}
> +}
> +
> +static blk_status_t nvme_pi_type3_verify_crc(struct blk_integrity_iter *iter)
> +{
> +	return nvme_crc64_verify(iter, T10_PI_TYPE3_PROTECTION);
> +}
> +
> +static blk_status_t nvme_pi_type3_generate_crc(struct blk_integrity_iter *iter)
> +{
> +	return nvme_crc64_generate(iter, T10_PI_TYPE3_PROTECTION);
> +}
> +
> +const struct blk_integrity_profile nvme_pi_type1_crc64 = {
> +	.name			= "NVME-DIF-TYPE1-CRC64",
> +	.generate_fn		= nvme_pi_type1_generate_crc,
> +	.verify_fn		= nvme_pi_type1_verify_crc,
> +	.prepare_fn		= nvme_pi_type1_prepare,
> +	.complete_fn		= nvme_pi_type1_complete,
> +};
> +EXPORT_SYMBOL(nvme_pi_type1_crc64);

All this should probably be EXPORT_SYMBOL_GPL (and I have a series
pending that would remove the exports of the profiles entirely,
but that will need some major rework after your series goes in).

The actual code looks fine to me.
Martin K. Petersen March 2, 2022, 3:15 a.m. UTC | #2
Christoph,

>> +static blk_status_t nvme_crc64_generate(struct blk_integrity_iter *iter,
>> +					enum t10_dif_type type)
>
> Shouldn't the naming be something more like ext_pi_*?  For one thing
> I kinda hate having the nvme prefix here in block layer code, but also
> nvme supports the normal 8 byte PI tuples, so this is a bit confusing.

The rationale behind the original t10 prefix was that the format was
defined by the T10 organization. At the time a T13 format was also on
the table. So from that perspective, using nvme_ here is correct. I do
like ext_ better, though.

I don't particularly appreciate the way the new formats were defined in
NVMe. I would have preferred new types instead of this "just like type N
except for all these differences" approach. But that comes from NVMe
completely missing how DIX removed all the format type knowledge from
the controller/device and instead put the burden on the driver to tell
the device what and how to check.

In any case: Naming is hard, the code looks fine to me.

Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
diff mbox series

Patch

diff --git a/block/Kconfig b/block/Kconfig
index 168b873eb666..ce3d8d63c199 100644
--- a/block/Kconfig
+++ b/block/Kconfig
@@ -75,6 +75,7 @@  config BLK_DEV_INTEGRITY_T10
 	tristate
 	depends on BLK_DEV_INTEGRITY
 	select CRC_T10DIF
+	select CRC64_ROCKSOFT
 
 config BLK_DEV_ZONED
 	bool "Zoned block device support"
diff --git a/block/t10-pi.c b/block/t10-pi.c
index 758a76518854..449d62782d79 100644
--- a/block/t10-pi.c
+++ b/block/t10-pi.c
@@ -7,8 +7,10 @@ 
 #include <linux/t10-pi.h>
 #include <linux/blk-integrity.h>
 #include <linux/crc-t10dif.h>
+#include <linux/crc64.h>
 #include <linux/module.h>
 #include <net/checksum.h>
+#include <asm/unaligned.h>
 
 typedef __be16 (csum_fn) (void *, unsigned int);
 
@@ -278,4 +280,196 @@  const struct blk_integrity_profile t10_pi_type3_ip = {
 };
 EXPORT_SYMBOL(t10_pi_type3_ip);
 
+static __be64 nvme_pi_crc64(void *data, unsigned int len)
+{
+	return cpu_to_be64(crc64_rocksoft(data, len));
+}
+
+static blk_status_t nvme_crc64_generate(struct blk_integrity_iter *iter,
+					enum t10_dif_type type)
+{
+	unsigned int i;
+
+	for (i = 0 ; i < iter->data_size ; i += iter->interval) {
+		struct nvme_crc64_pi_tuple *pi = iter->prot_buf;
+
+		pi->guard_tag = nvme_pi_crc64(iter->data_buf, iter->interval);
+		pi->app_tag = 0;
+
+		if (type == T10_PI_TYPE1_PROTECTION)
+			put_unaligned_be48(iter->seed, pi->ref_tag);
+		else
+			put_unaligned_be48(0ULL, pi->ref_tag);
+
+		iter->data_buf += iter->interval;
+		iter->prot_buf += iter->tuple_size;
+		iter->seed++;
+	}
+
+	return BLK_STS_OK;
+}
+
+static bool nvme_crc64_ref_escape(u8 *ref_tag)
+{
+	static u8 ref_escape[6] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
+
+	return memcmp(ref_tag, ref_escape, sizeof(ref_escape)) == 0;
+}
+
+static blk_status_t nvme_crc64_verify(struct blk_integrity_iter *iter,
+				      enum t10_dif_type type)
+{
+	unsigned int i;
+
+	for (i = 0; i < iter->data_size; i += iter->interval) {
+		struct nvme_crc64_pi_tuple *pi = iter->prot_buf;
+		u64 ref, seed;
+		__be64 csum;
+
+		if (type == T10_PI_TYPE1_PROTECTION) {
+			if (pi->app_tag == T10_PI_APP_ESCAPE)
+				goto next;
+
+			ref = get_unaligned_be48(pi->ref_tag);
+			seed = lower_48_bits(iter->seed);
+			if (ref != seed) {
+				pr_err("%s: ref tag error at location %llu (rcvd %llu)\n",
+					iter->disk_name, seed, ref);
+				return BLK_STS_PROTECTION;
+			}
+		} else if (type == T10_PI_TYPE3_PROTECTION) {
+			if (pi->app_tag == T10_PI_APP_ESCAPE &&
+			    nvme_crc64_ref_escape(pi->ref_tag))
+				goto next;
+		}
+
+		csum = nvme_pi_crc64(iter->data_buf, iter->interval);
+		if (pi->guard_tag != csum) {
+			pr_err("%s: guard tag error at sector %llu " \
+			       "(rcvd %016llx, want %016llx)\n",
+				iter->disk_name, (unsigned long long)iter->seed,
+				be64_to_cpu(pi->guard_tag), be64_to_cpu(csum));
+			return BLK_STS_PROTECTION;
+		}
+
+next:
+		iter->data_buf += iter->interval;
+		iter->prot_buf += iter->tuple_size;
+		iter->seed++;
+	}
+
+	return BLK_STS_OK;
+}
+
+static blk_status_t nvme_pi_type1_verify_crc(struct blk_integrity_iter *iter)
+{
+	return nvme_crc64_verify(iter, T10_PI_TYPE1_PROTECTION);
+}
+
+static blk_status_t nvme_pi_type1_generate_crc(struct blk_integrity_iter *iter)
+{
+	return nvme_crc64_generate(iter, T10_PI_TYPE1_PROTECTION);
+}
+
+static void nvme_pi_type1_prepare(struct request *rq)
+{
+	const int tuple_sz = rq->q->integrity.tuple_size;
+	u64 ref_tag = nvme_pi_extended_ref_tag(rq);
+	struct bio *bio;
+
+	__rq_for_each_bio(bio, rq) {
+		struct bio_integrity_payload *bip = bio_integrity(bio);
+		u64 virt = lower_48_bits(bip_get_seed(bip));
+		struct bio_vec iv;
+		struct bvec_iter iter;
+
+		/* Already remapped? */
+		if (bip->bip_flags & BIP_MAPPED_INTEGRITY)
+			break;
+
+		bip_for_each_vec(iv, bip, iter) {
+			unsigned int j;
+			void *p;
+
+			p = bvec_kmap_local(&iv);
+			for (j = 0; j < iv.bv_len; j += tuple_sz) {
+				struct nvme_crc64_pi_tuple *pi = p;
+				u64 ref = get_unaligned_be48(pi->ref_tag);
+
+				if (ref == virt)
+					put_unaligned_be48(ref_tag, pi->ref_tag);
+				virt++;
+				ref_tag++;
+				p += tuple_sz;
+			}
+			kunmap_local(p);
+		}
+
+		bip->bip_flags |= BIP_MAPPED_INTEGRITY;
+	}
+}
+
+static void nvme_pi_type1_complete(struct request *rq, unsigned int nr_bytes)
+{
+	unsigned intervals = nr_bytes >> rq->q->integrity.interval_exp;
+	const int tuple_sz = rq->q->integrity.tuple_size;
+	u64 ref_tag = nvme_pi_extended_ref_tag(rq);
+	struct bio *bio;
+
+	__rq_for_each_bio(bio, rq) {
+		struct bio_integrity_payload *bip = bio_integrity(bio);
+		u64 virt = lower_48_bits(bip_get_seed(bip));
+		struct bio_vec iv;
+		struct bvec_iter iter;
+
+		bip_for_each_vec(iv, bip, iter) {
+			unsigned int j;
+			void *p;
+
+			p = bvec_kmap_local(&iv);
+			for (j = 0; j < iv.bv_len && intervals; j += tuple_sz) {
+				struct nvme_crc64_pi_tuple *pi = p;
+				u64 ref = get_unaligned_be48(pi->ref_tag);
+
+				if (ref == ref_tag)
+					put_unaligned_be48(virt, pi->ref_tag);
+				virt++;
+				ref_tag++;
+				intervals--;
+				p += tuple_sz;
+			}
+			kunmap_local(p);
+		}
+	}
+}
+
+static blk_status_t nvme_pi_type3_verify_crc(struct blk_integrity_iter *iter)
+{
+	return nvme_crc64_verify(iter, T10_PI_TYPE3_PROTECTION);
+}
+
+static blk_status_t nvme_pi_type3_generate_crc(struct blk_integrity_iter *iter)
+{
+	return nvme_crc64_generate(iter, T10_PI_TYPE3_PROTECTION);
+}
+
+const struct blk_integrity_profile nvme_pi_type1_crc64 = {
+	.name			= "NVME-DIF-TYPE1-CRC64",
+	.generate_fn		= nvme_pi_type1_generate_crc,
+	.verify_fn		= nvme_pi_type1_verify_crc,
+	.prepare_fn		= nvme_pi_type1_prepare,
+	.complete_fn		= nvme_pi_type1_complete,
+};
+EXPORT_SYMBOL(nvme_pi_type1_crc64);
+
+const struct blk_integrity_profile nvme_pi_type3_crc64 = {
+	.name			= "NVME-DIF-TYPE3-CRC64",
+	.generate_fn		= nvme_pi_type3_generate_crc,
+	.verify_fn		= nvme_pi_type3_verify_crc,
+	.prepare_fn		= t10_pi_type3_prepare,
+	.complete_fn		= t10_pi_type3_complete,
+};
+EXPORT_SYMBOL(nvme_pi_type3_crc64);
+
+MODULE_LICENSE("GPL");
 MODULE_LICENSE("GPL");
diff --git a/include/linux/t10-pi.h b/include/linux/t10-pi.h
index c635c2e014e3..a246ab3840bc 100644
--- a/include/linux/t10-pi.h
+++ b/include/linux/t10-pi.h
@@ -53,4 +53,24 @@  extern const struct blk_integrity_profile t10_pi_type1_ip;
 extern const struct blk_integrity_profile t10_pi_type3_crc;
 extern const struct blk_integrity_profile t10_pi_type3_ip;
 
+struct nvme_crc64_pi_tuple {
+	__be64 guard_tag;
+	__be16 app_tag;
+	__u8   ref_tag[6];
+};
+
+static inline u64 nvme_pi_extended_ref_tag(struct request *rq)
+{
+	unsigned int shift = ilog2(queue_logical_block_size(rq->q));
+
+#ifdef CONFIG_BLK_DEV_INTEGRITY
+	if (rq->q->integrity.interval_exp)
+		shift = rq->q->integrity.interval_exp;
+#endif
+	return lower_48_bits(blk_rq_pos(rq) >> (shift - SECTOR_SHIFT));
+}
+
+extern const struct blk_integrity_profile nvme_pi_type1_crc64;
+extern const struct blk_integrity_profile nvme_pi_type3_crc64;
+
 #endif