diff mbox series

[2/2] riscv: Optimize crct10dif with zbc extension

Message ID 20241113104036.254491-3-zhihang.shao.iscas@gmail.com (mailing list archive)
State New
Headers show
Series RISC-V CRC-T10DIF optimization with zbc extension and CRC-T10DIF selftest | expand

Checks

Context Check Description
conchuod/vmtest-fixes-PR fail merge-conflict

Commit Message

Zhihang Shao Nov. 13, 2024, 10:40 a.m. UTC
The current CRC-T10DIF algorithm is based on table-lookup optimization.
Given the previous work on optimizing crc32 calculations with zbc
extension, it is believed that this will be equally effective for
accelerating crc-t10dif.
Therefore, this patch offers a implementation of crc-t10dif using zbc
extension. This can detect whether the current runtime environment
supports zbc feature and, if so, uses it to accelerate crc-t10dif
calculations.

This patch is tested on QEMU VM with the kernel CRC-T10DIF selftests.

Signed-off-by: Zhihang Shao <zhihang.shao.iscas@gmail.com>
---
 arch/riscv/crypto/Kconfig               |  14 ++
 arch/riscv/crypto/Makefile              |   4 +
 arch/riscv/crypto/crct10dif-riscv-zbc.c | 182 ++++++++++++++++++++++++
 lib/crct10diftest.c                     |   4 +-
 4 files changed, 202 insertions(+), 2 deletions(-)
 create mode 100644 arch/riscv/crypto/crct10dif-riscv-zbc.c

Comments

Chunyan Zhang Nov. 14, 2024, 7:04 a.m. UTC | #1
On Wed, 13 Nov 2024 at 19:06, Zhihang Shao <zhihang.shao.iscas@gmail.com> wrote:
>
> The current CRC-T10DIF algorithm is based on table-lookup optimization.
> Given the previous work on optimizing crc32 calculations with zbc
> extension, it is believed that this will be equally effective for
> accelerating crc-t10dif.
> Therefore, this patch offers a implementation of crc-t10dif using zbc
> extension. This can detect whether the current runtime environment
> supports zbc feature and, if so, uses it to accelerate crc-t10dif
> calculations.
>
> This patch is tested on QEMU VM with the kernel CRC-T10DIF selftests.
>
> Signed-off-by: Zhihang Shao <zhihang.shao.iscas@gmail.com>
> ---
>  arch/riscv/crypto/Kconfig               |  14 ++
>  arch/riscv/crypto/Makefile              |   4 +
>  arch/riscv/crypto/crct10dif-riscv-zbc.c | 182 ++++++++++++++++++++++++
>  lib/crct10diftest.c                     |   4 +-
>  4 files changed, 202 insertions(+), 2 deletions(-)
>  create mode 100644 arch/riscv/crypto/crct10dif-riscv-zbc.c
>
> diff --git a/arch/riscv/crypto/Kconfig b/arch/riscv/crypto/Kconfig
> index ad58dad9a580..69751ff5573e 100644
> --- a/arch/riscv/crypto/Kconfig
> +++ b/arch/riscv/crypto/Kconfig
> @@ -29,6 +29,20 @@ config CRYPTO_CHACHA_RISCV64
>           Architecture: riscv64 using:
>           - Zvkb vector crypto extension
>
> +config CRYPTO_CRCT10DIF_RISCV
> +       tristate "Checksum: CRCT10DIF"
> +       depends on TOOLCHAIN_HAS_ZBC
> +       depends on MMU
> +       depends on RISCV_ALTERNATIVE
> +       default y
> +       help
> +         CRCT10DIF checksum with Zbc extension optimized
> +        To accelerate CRCT10DIF checksum, choose Y here.

This line should be aligned with two characters.

> +
> +         Architecture: riscv using:
> +          - Zbc extension

Same here.

> +
> +

More than one redundant empty line should be removed.

>  config CRYPTO_GHASH_RISCV64
>         tristate "Hash functions: GHASH"
>         depends on 64BIT && RISCV_ISA_V && TOOLCHAIN_HAS_VECTOR_CRYPTO
> diff --git a/arch/riscv/crypto/Makefile b/arch/riscv/crypto/Makefile
> index 247c7bc7288c..6f849f4dc4cc 100644
> --- a/arch/riscv/crypto/Makefile
> +++ b/arch/riscv/crypto/Makefile
> @@ -7,6 +7,9 @@ aes-riscv64-y := aes-riscv64-glue.o aes-riscv64-zvkned.o \
>  obj-$(CONFIG_CRYPTO_CHACHA_RISCV64) += chacha-riscv64.o
>  chacha-riscv64-y := chacha-riscv64-glue.o chacha-riscv64-zvkb.o
>
> +obj-$(CONFIG_CRYPTO_CRCT10DIF_RISCV) += crct10dif-riscv.o
> +crct10dif-riscv-y := crct10dif-riscv-zbc.o
> +
>  obj-$(CONFIG_CRYPTO_GHASH_RISCV64) += ghash-riscv64.o
>  ghash-riscv64-y := ghash-riscv64-glue.o ghash-riscv64-zvkg.o
>
> @@ -21,3 +24,4 @@ sm3-riscv64-y := sm3-riscv64-glue.o sm3-riscv64-zvksh-zvkb.o
>
>  obj-$(CONFIG_CRYPTO_SM4_RISCV64) += sm4-riscv64.o
>  sm4-riscv64-y := sm4-riscv64-glue.o sm4-riscv64-zvksed-zvkb.o
> +
> diff --git a/arch/riscv/crypto/crct10dif-riscv-zbc.c b/arch/riscv/crypto/crct10dif-riscv-zbc.c
> new file mode 100644
> index 000000000000..01571b4286f1
> --- /dev/null
> +++ b/arch/riscv/crypto/crct10dif-riscv-zbc.c
> @@ -0,0 +1,182 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +/*
> + * Accelerated CRC-T10DIF implementation with RISC-V Zbc extension.
> + *
> + * Copyright (C) 2024 Institute of Software, CAS.
> + */
> +
> +#include <asm/alternative-macros.h>
> +#include <asm/byteorder.h>
> +#include <asm/hwcap.h>
> +
> +#include <crypto/internal/hash.h>
> +
> +#include <linux/byteorder/generic.h>
> +#include <linux/crc-t10dif.h>
> +#include <linux/minmax.h>
> +#include <linux/module.h>
> +#include <linux/types.h>
> +
> +static u16 crc_t10dif_generic_zbc(u16 crc, unsigned char const *p, size_t len);
> +
> +#define CRCT10DIF_POLY 0x8bb7
> +
> +#if __riscv_xlen == 64
> +#define STEP_ORDER 3
> +
> +#define CRCT10DIF_POLY_QT_BE 0xf65a57f81d33a48a
> +
> +static inline u64 crct10dif_prep(u16 crc, unsigned long const *ptr)
> +{
> +       return ((u64)crc << 48) ^ (__force u64)__cpu_to_be64(*ptr);
> +}
> +
> +#elif __riscv_xlen == 32
> +#define STEP_ORDER 2
> +#define CRCT10DIF_POLY_QT_BE 0xf65a57f8
> +
> +static inline u32 crct10dif_prep(u16 crc, unsigned long const *ptr)
> +{
> +       return ((u32)crc << 16) ^ (__force u32)__cpu_to_be32(*ptr);
> +}
> +
> +#else
> +#error "Unexpected __riscv_xlen"
> +#endif
> +
> +static inline u16 crct10dif_zbc(unsigned long s)
> +{
> +       u16 crc;
> +
> +       asm volatile   (".option push\n"
> +                       ".option arch,+zbc\n"
> +                       "clmulh %0, %1, %2\n"
> +                       "xor    %0, %0, %1\n"
> +                       "clmul  %0, %0, %3\n"
> +                       ".option pop\n"
> +                       : "=&r" (crc)
> +                       : "r"(s),
> +                         "r"(CRCT10DIF_POLY_QT_BE),
> +                         "r"(CRCT10DIF_POLY)
> +                       :);
> +
> +       return crc;
> +}
> +
> +#define STEP (1 << STEP_ORDER)
> +#define OFFSET_MASK (STEP - 1)
> +
> +static inline u16 crct10dif_unaligned(u16 crc, unsigned char const *p, size_t len)
> +{
> +       size_t bits = len * 8;
> +       unsigned long s = 0;
> +       u16 crc_low = 0;
> +
> +       for (int i = 0; i < len; i++)
> +               s = *p++ | (s << 8);
> +
> +       if (len < sizeof(u16)) {
> +               s ^= crc >> (16 - bits);
> +               crc_low = crc << bits;
> +       } else {
> +               s ^= (unsigned long)crc << (bits - 16);
> +       }
> +
> +       crc = crct10dif_zbc(s);
> +       crc ^= crc_low;
> +
> +       return crc;
> +}
> +
> +static u16 crc_t10dif_generic_zbc(u16 crc, unsigned char const *p, size_t len)
> +{
> +       size_t offset, head_len, tail_len;
> +       unsigned long const *p_ul;
> +       unsigned long s;
> +
> +       offset = (unsigned long)p & OFFSET_MASK;
> +       if (offset && len) {
> +               head_len = min(STEP - offset, len);
> +               crc = crct10dif_unaligned(crc, p, head_len);
> +               p += head_len;
> +               len -= head_len;
> +       }
> +
> +       tail_len = len & OFFSET_MASK;
> +       len = len >> STEP_ORDER;
> +       p_ul = (unsigned long const *)p;
> +
> +       for (int i = 0; i < len; i++) {
> +               s = crct10dif_prep(crc, p_ul);
> +               crc = crct10dif_zbc(s);
> +               p_ul++;
> +       }
> +
> +       p = (unsigned char const *)p_ul;
> +       if (tail_len)
> +               crc = crct10dif_unaligned(crc, p, tail_len);
> +
> +       return crc;
> +}
> +
> +static int crc_t10dif_init(struct shash_desc *desc)
> +{
> +       u16 *crc = shash_desc_ctx(desc);
> +
> +       *crc = 0;
> +
> +       return 0;
> +}
> +
> +static int crc_t10dif_final(struct shash_desc *desc, u8 *out)
> +{
> +       u16 *crc = shash_desc_ctx(desc);
> +
> +       *(u16 *)out = *crc;
> +
> +       return 0;
> +}
> +
> +static int crc_t10dif_update_zbc(struct shash_desc *desc, const u8 *data,
> +                               unsigned int length)
> +{
> +       u16 *crc = shash_desc_ctx(desc);
> +
> +       *crc = crc_t10dif_generic_zbc(*crc, data, length);
> +
> +       return 0;
> +}
> +
> +static struct shash_alg crc_t10dif_alg = {
> +       .digestsize             = CRC_T10DIF_DIGEST_SIZE,
> +       .init                   = crc_t10dif_init,
> +       .update                 = crc_t10dif_update_zbc,
> +       .final                  = crc_t10dif_final,
> +       .descsize               = CRC_T10DIF_DIGEST_SIZE,
> +
> +       .base.cra_name          = "crct10dif",
> +       .base.cra_driver_name   = "crct10dif-riscv-zbc",
> +       .base.cra_priority      = 150,
> +       .base.cra_blocksize     = CRC_T10DIF_BLOCK_SIZE,
> +       .base.cra_module        = THIS_MODULE,
> +};
> +
> +static int __init crc_t10dif_mod_init(void)
> +{
> +       if (riscv_isa_extension_available(NULL, ZBC))
> +               return crypto_register_shash(&crc_t10dif_alg);
> +
> +       return -ENODEV;
> +}
> +
> +static void __exit crc_t10dif_mod_exit(void)
> +{
> +       crypto_unregister_shash(&crc_t10dif_alg);
> +}
> +
> +module_init(crc_t10dif_mod_init);
> +module_exit(crc_t10dif_mod_exit);
> +
> +MODULE_DESCRIPTION("CRC-T10DIF using RISC-V ZBC Extension");
> +MODULE_ALIAS_CRYPTO("crct10dif");
> +MODULE_LICENSE("GPL");
> diff --git a/lib/crct10diftest.c b/lib/crct10diftest.c
> index 9541892eb12c..ac642191e6e8 100644
> --- a/lib/crct10diftest.c
> +++ b/lib/crct10diftest.c
> @@ -666,9 +666,9 @@ static int __init crct10dif_test_init(void)
>
>         local_irq_restore(flags);
>
> -       if (errors)
> +       if (errors)

This change should be removed from this patch.

>                 pr_warn("crct10dif: %d self tests failed\n", errors);
> -       else
> +       else

Same here.

>                 pr_info("crct10dif: self tests passed, processed %d bytes in %lld nsec\n",
>                 bytes, nsec);
>
> --
> 2.34.1
>
>
diff mbox series

Patch

diff --git a/arch/riscv/crypto/Kconfig b/arch/riscv/crypto/Kconfig
index ad58dad9a580..69751ff5573e 100644
--- a/arch/riscv/crypto/Kconfig
+++ b/arch/riscv/crypto/Kconfig
@@ -29,6 +29,20 @@  config CRYPTO_CHACHA_RISCV64
 	  Architecture: riscv64 using:
 	  - Zvkb vector crypto extension
 
+config CRYPTO_CRCT10DIF_RISCV
+       tristate "Checksum: CRCT10DIF"
+       depends on TOOLCHAIN_HAS_ZBC
+       depends on MMU
+       depends on RISCV_ALTERNATIVE
+       default y
+       help
+         CRCT10DIF checksum with Zbc extension optimized
+	 To accelerate CRCT10DIF checksum, choose Y here.
+
+         Architecture: riscv using:
+          - Zbc extension
+
+
 config CRYPTO_GHASH_RISCV64
 	tristate "Hash functions: GHASH"
 	depends on 64BIT && RISCV_ISA_V && TOOLCHAIN_HAS_VECTOR_CRYPTO
diff --git a/arch/riscv/crypto/Makefile b/arch/riscv/crypto/Makefile
index 247c7bc7288c..6f849f4dc4cc 100644
--- a/arch/riscv/crypto/Makefile
+++ b/arch/riscv/crypto/Makefile
@@ -7,6 +7,9 @@  aes-riscv64-y := aes-riscv64-glue.o aes-riscv64-zvkned.o \
 obj-$(CONFIG_CRYPTO_CHACHA_RISCV64) += chacha-riscv64.o
 chacha-riscv64-y := chacha-riscv64-glue.o chacha-riscv64-zvkb.o
 
+obj-$(CONFIG_CRYPTO_CRCT10DIF_RISCV) += crct10dif-riscv.o
+crct10dif-riscv-y := crct10dif-riscv-zbc.o
+
 obj-$(CONFIG_CRYPTO_GHASH_RISCV64) += ghash-riscv64.o
 ghash-riscv64-y := ghash-riscv64-glue.o ghash-riscv64-zvkg.o
 
@@ -21,3 +24,4 @@  sm3-riscv64-y := sm3-riscv64-glue.o sm3-riscv64-zvksh-zvkb.o
 
 obj-$(CONFIG_CRYPTO_SM4_RISCV64) += sm4-riscv64.o
 sm4-riscv64-y := sm4-riscv64-glue.o sm4-riscv64-zvksed-zvkb.o
+
diff --git a/arch/riscv/crypto/crct10dif-riscv-zbc.c b/arch/riscv/crypto/crct10dif-riscv-zbc.c
new file mode 100644
index 000000000000..01571b4286f1
--- /dev/null
+++ b/arch/riscv/crypto/crct10dif-riscv-zbc.c
@@ -0,0 +1,182 @@ 
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Accelerated CRC-T10DIF implementation with RISC-V Zbc extension.
+ *
+ * Copyright (C) 2024 Institute of Software, CAS.
+ */
+
+#include <asm/alternative-macros.h>
+#include <asm/byteorder.h>
+#include <asm/hwcap.h>
+
+#include <crypto/internal/hash.h>
+
+#include <linux/byteorder/generic.h>
+#include <linux/crc-t10dif.h>
+#include <linux/minmax.h>
+#include <linux/module.h>
+#include <linux/types.h>
+
+static u16 crc_t10dif_generic_zbc(u16 crc, unsigned char const *p, size_t len);
+
+#define CRCT10DIF_POLY 0x8bb7
+
+#if __riscv_xlen == 64
+#define STEP_ORDER 3
+
+#define CRCT10DIF_POLY_QT_BE 0xf65a57f81d33a48a
+
+static inline u64 crct10dif_prep(u16 crc, unsigned long const *ptr)
+{
+	return ((u64)crc << 48) ^ (__force u64)__cpu_to_be64(*ptr);
+}
+
+#elif __riscv_xlen == 32
+#define STEP_ORDER 2
+#define CRCT10DIF_POLY_QT_BE 0xf65a57f8
+
+static inline u32 crct10dif_prep(u16 crc, unsigned long const *ptr)
+{
+	return ((u32)crc << 16) ^ (__force u32)__cpu_to_be32(*ptr);
+}
+
+#else
+#error "Unexpected __riscv_xlen"
+#endif
+
+static inline u16 crct10dif_zbc(unsigned long s)
+{
+	u16 crc;
+
+	asm volatile   (".option push\n"
+			".option arch,+zbc\n"
+			"clmulh %0, %1, %2\n"
+			"xor    %0, %0, %1\n"
+			"clmul  %0, %0, %3\n"
+			".option pop\n"
+			: "=&r" (crc)
+			: "r"(s),
+			  "r"(CRCT10DIF_POLY_QT_BE),
+			  "r"(CRCT10DIF_POLY)
+			:);
+
+	return crc;
+}
+
+#define STEP (1 << STEP_ORDER)
+#define OFFSET_MASK (STEP - 1)
+
+static inline u16 crct10dif_unaligned(u16 crc, unsigned char const *p, size_t len)
+{
+	size_t bits = len * 8;
+	unsigned long s = 0;
+	u16 crc_low = 0;
+
+	for (int i = 0; i < len; i++)
+		s = *p++ | (s << 8);
+
+	if (len < sizeof(u16)) {
+		s ^= crc >> (16 - bits);
+		crc_low = crc << bits;
+	} else {
+		s ^= (unsigned long)crc << (bits - 16);
+	}
+
+	crc = crct10dif_zbc(s);
+	crc ^= crc_low;
+
+	return crc;
+}
+
+static u16 crc_t10dif_generic_zbc(u16 crc, unsigned char const *p, size_t len)
+{
+	size_t offset, head_len, tail_len;
+	unsigned long const *p_ul;
+	unsigned long s;
+
+	offset = (unsigned long)p & OFFSET_MASK;
+	if (offset && len) {
+		head_len = min(STEP - offset, len);
+		crc = crct10dif_unaligned(crc, p, head_len);
+		p += head_len;
+		len -= head_len;
+	}
+
+	tail_len = len & OFFSET_MASK;
+	len = len >> STEP_ORDER;
+	p_ul = (unsigned long const *)p;
+
+	for (int i = 0; i < len; i++) {
+		s = crct10dif_prep(crc, p_ul);
+		crc = crct10dif_zbc(s);
+		p_ul++;
+	}
+
+	p = (unsigned char const *)p_ul;
+	if (tail_len)
+		crc = crct10dif_unaligned(crc, p, tail_len);
+
+	return crc;
+}
+
+static int crc_t10dif_init(struct shash_desc *desc)
+{
+	u16 *crc = shash_desc_ctx(desc);
+
+	*crc = 0;
+
+	return 0;
+}
+
+static int crc_t10dif_final(struct shash_desc *desc, u8 *out)
+{
+	u16 *crc = shash_desc_ctx(desc);
+
+	*(u16 *)out = *crc;
+
+	return 0;
+}
+
+static int crc_t10dif_update_zbc(struct shash_desc *desc, const u8 *data,
+				unsigned int length)
+{
+	u16 *crc = shash_desc_ctx(desc);
+
+	*crc = crc_t10dif_generic_zbc(*crc, data, length);
+
+	return 0;
+}
+
+static struct shash_alg crc_t10dif_alg = {
+	.digestsize		= CRC_T10DIF_DIGEST_SIZE,
+	.init			= crc_t10dif_init,
+	.update			= crc_t10dif_update_zbc,
+	.final			= crc_t10dif_final,
+	.descsize		= CRC_T10DIF_DIGEST_SIZE,
+
+	.base.cra_name		= "crct10dif",
+	.base.cra_driver_name	= "crct10dif-riscv-zbc",
+	.base.cra_priority	= 150,
+	.base.cra_blocksize	= CRC_T10DIF_BLOCK_SIZE,
+	.base.cra_module	= THIS_MODULE,
+};
+
+static int __init crc_t10dif_mod_init(void)
+{
+	if (riscv_isa_extension_available(NULL, ZBC))
+		return crypto_register_shash(&crc_t10dif_alg);
+
+	return -ENODEV;
+}
+
+static void __exit crc_t10dif_mod_exit(void)
+{
+	crypto_unregister_shash(&crc_t10dif_alg);
+}
+
+module_init(crc_t10dif_mod_init);
+module_exit(crc_t10dif_mod_exit);
+
+MODULE_DESCRIPTION("CRC-T10DIF using RISC-V ZBC Extension");
+MODULE_ALIAS_CRYPTO("crct10dif");
+MODULE_LICENSE("GPL");
diff --git a/lib/crct10diftest.c b/lib/crct10diftest.c
index 9541892eb12c..ac642191e6e8 100644
--- a/lib/crct10diftest.c
+++ b/lib/crct10diftest.c
@@ -666,9 +666,9 @@  static int __init crct10dif_test_init(void)
 
 	local_irq_restore(flags);
 
-	if (errors) 
+	if (errors)
 		pr_warn("crct10dif: %d self tests failed\n", errors);
-	else 
+	else
 		pr_info("crct10dif: self tests passed, processed %d bytes in %lld nsec\n",
 		bytes, nsec);