Message ID | 20241113104036.254491-3-zhihang.shao.iscas@gmail.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | RISC-V CRC-T10DIF optimization with zbc extension and CRC-T10DIF selftest | expand |
Context | Check | Description |
---|---|---|
conchuod/vmtest-fixes-PR | fail | merge-conflict |
On Wed, 13 Nov 2024 at 19:06, Zhihang Shao <zhihang.shao.iscas@gmail.com> wrote: > > The current CRC-T10DIF algorithm is based on table-lookup optimization. > Given the previous work on optimizing crc32 calculations with zbc > extension, it is believed that this will be equally effective for > accelerating crc-t10dif. > Therefore, this patch offers a implementation of crc-t10dif using zbc > extension. This can detect whether the current runtime environment > supports zbc feature and, if so, uses it to accelerate crc-t10dif > calculations. > > This patch is tested on QEMU VM with the kernel CRC-T10DIF selftests. > > Signed-off-by: Zhihang Shao <zhihang.shao.iscas@gmail.com> > --- > arch/riscv/crypto/Kconfig | 14 ++ > arch/riscv/crypto/Makefile | 4 + > arch/riscv/crypto/crct10dif-riscv-zbc.c | 182 ++++++++++++++++++++++++ > lib/crct10diftest.c | 4 +- > 4 files changed, 202 insertions(+), 2 deletions(-) > create mode 100644 arch/riscv/crypto/crct10dif-riscv-zbc.c > > diff --git a/arch/riscv/crypto/Kconfig b/arch/riscv/crypto/Kconfig > index ad58dad9a580..69751ff5573e 100644 > --- a/arch/riscv/crypto/Kconfig > +++ b/arch/riscv/crypto/Kconfig > @@ -29,6 +29,20 @@ config CRYPTO_CHACHA_RISCV64 > Architecture: riscv64 using: > - Zvkb vector crypto extension > > +config CRYPTO_CRCT10DIF_RISCV > + tristate "Checksum: CRCT10DIF" > + depends on TOOLCHAIN_HAS_ZBC > + depends on MMU > + depends on RISCV_ALTERNATIVE > + default y > + help > + CRCT10DIF checksum with Zbc extension optimized > + To accelerate CRCT10DIF checksum, choose Y here. This line should be aligned with two characters. > + > + Architecture: riscv using: > + - Zbc extension Same here. > + > + More than one redundant empty line should be removed. > config CRYPTO_GHASH_RISCV64 > tristate "Hash functions: GHASH" > depends on 64BIT && RISCV_ISA_V && TOOLCHAIN_HAS_VECTOR_CRYPTO > diff --git a/arch/riscv/crypto/Makefile b/arch/riscv/crypto/Makefile > index 247c7bc7288c..6f849f4dc4cc 100644 > --- a/arch/riscv/crypto/Makefile > +++ b/arch/riscv/crypto/Makefile > @@ -7,6 +7,9 @@ aes-riscv64-y := aes-riscv64-glue.o aes-riscv64-zvkned.o \ > obj-$(CONFIG_CRYPTO_CHACHA_RISCV64) += chacha-riscv64.o > chacha-riscv64-y := chacha-riscv64-glue.o chacha-riscv64-zvkb.o > > +obj-$(CONFIG_CRYPTO_CRCT10DIF_RISCV) += crct10dif-riscv.o > +crct10dif-riscv-y := crct10dif-riscv-zbc.o > + > obj-$(CONFIG_CRYPTO_GHASH_RISCV64) += ghash-riscv64.o > ghash-riscv64-y := ghash-riscv64-glue.o ghash-riscv64-zvkg.o > > @@ -21,3 +24,4 @@ sm3-riscv64-y := sm3-riscv64-glue.o sm3-riscv64-zvksh-zvkb.o > > obj-$(CONFIG_CRYPTO_SM4_RISCV64) += sm4-riscv64.o > sm4-riscv64-y := sm4-riscv64-glue.o sm4-riscv64-zvksed-zvkb.o > + > diff --git a/arch/riscv/crypto/crct10dif-riscv-zbc.c b/arch/riscv/crypto/crct10dif-riscv-zbc.c > new file mode 100644 > index 000000000000..01571b4286f1 > --- /dev/null > +++ b/arch/riscv/crypto/crct10dif-riscv-zbc.c > @@ -0,0 +1,182 @@ > +// SPDX-License-Identifier: GPL-2.0-only > +/* > + * Accelerated CRC-T10DIF implementation with RISC-V Zbc extension. > + * > + * Copyright (C) 2024 Institute of Software, CAS. > + */ > + > +#include <asm/alternative-macros.h> > +#include <asm/byteorder.h> > +#include <asm/hwcap.h> > + > +#include <crypto/internal/hash.h> > + > +#include <linux/byteorder/generic.h> > +#include <linux/crc-t10dif.h> > +#include <linux/minmax.h> > +#include <linux/module.h> > +#include <linux/types.h> > + > +static u16 crc_t10dif_generic_zbc(u16 crc, unsigned char const *p, size_t len); > + > +#define CRCT10DIF_POLY 0x8bb7 > + > +#if __riscv_xlen == 64 > +#define STEP_ORDER 3 > + > +#define CRCT10DIF_POLY_QT_BE 0xf65a57f81d33a48a > + > +static inline u64 crct10dif_prep(u16 crc, unsigned long const *ptr) > +{ > + return ((u64)crc << 48) ^ (__force u64)__cpu_to_be64(*ptr); > +} > + > +#elif __riscv_xlen == 32 > +#define STEP_ORDER 2 > +#define CRCT10DIF_POLY_QT_BE 0xf65a57f8 > + > +static inline u32 crct10dif_prep(u16 crc, unsigned long const *ptr) > +{ > + return ((u32)crc << 16) ^ (__force u32)__cpu_to_be32(*ptr); > +} > + > +#else > +#error "Unexpected __riscv_xlen" > +#endif > + > +static inline u16 crct10dif_zbc(unsigned long s) > +{ > + u16 crc; > + > + asm volatile (".option push\n" > + ".option arch,+zbc\n" > + "clmulh %0, %1, %2\n" > + "xor %0, %0, %1\n" > + "clmul %0, %0, %3\n" > + ".option pop\n" > + : "=&r" (crc) > + : "r"(s), > + "r"(CRCT10DIF_POLY_QT_BE), > + "r"(CRCT10DIF_POLY) > + :); > + > + return crc; > +} > + > +#define STEP (1 << STEP_ORDER) > +#define OFFSET_MASK (STEP - 1) > + > +static inline u16 crct10dif_unaligned(u16 crc, unsigned char const *p, size_t len) > +{ > + size_t bits = len * 8; > + unsigned long s = 0; > + u16 crc_low = 0; > + > + for (int i = 0; i < len; i++) > + s = *p++ | (s << 8); > + > + if (len < sizeof(u16)) { > + s ^= crc >> (16 - bits); > + crc_low = crc << bits; > + } else { > + s ^= (unsigned long)crc << (bits - 16); > + } > + > + crc = crct10dif_zbc(s); > + crc ^= crc_low; > + > + return crc; > +} > + > +static u16 crc_t10dif_generic_zbc(u16 crc, unsigned char const *p, size_t len) > +{ > + size_t offset, head_len, tail_len; > + unsigned long const *p_ul; > + unsigned long s; > + > + offset = (unsigned long)p & OFFSET_MASK; > + if (offset && len) { > + head_len = min(STEP - offset, len); > + crc = crct10dif_unaligned(crc, p, head_len); > + p += head_len; > + len -= head_len; > + } > + > + tail_len = len & OFFSET_MASK; > + len = len >> STEP_ORDER; > + p_ul = (unsigned long const *)p; > + > + for (int i = 0; i < len; i++) { > + s = crct10dif_prep(crc, p_ul); > + crc = crct10dif_zbc(s); > + p_ul++; > + } > + > + p = (unsigned char const *)p_ul; > + if (tail_len) > + crc = crct10dif_unaligned(crc, p, tail_len); > + > + return crc; > +} > + > +static int crc_t10dif_init(struct shash_desc *desc) > +{ > + u16 *crc = shash_desc_ctx(desc); > + > + *crc = 0; > + > + return 0; > +} > + > +static int crc_t10dif_final(struct shash_desc *desc, u8 *out) > +{ > + u16 *crc = shash_desc_ctx(desc); > + > + *(u16 *)out = *crc; > + > + return 0; > +} > + > +static int crc_t10dif_update_zbc(struct shash_desc *desc, const u8 *data, > + unsigned int length) > +{ > + u16 *crc = shash_desc_ctx(desc); > + > + *crc = crc_t10dif_generic_zbc(*crc, data, length); > + > + return 0; > +} > + > +static struct shash_alg crc_t10dif_alg = { > + .digestsize = CRC_T10DIF_DIGEST_SIZE, > + .init = crc_t10dif_init, > + .update = crc_t10dif_update_zbc, > + .final = crc_t10dif_final, > + .descsize = CRC_T10DIF_DIGEST_SIZE, > + > + .base.cra_name = "crct10dif", > + .base.cra_driver_name = "crct10dif-riscv-zbc", > + .base.cra_priority = 150, > + .base.cra_blocksize = CRC_T10DIF_BLOCK_SIZE, > + .base.cra_module = THIS_MODULE, > +}; > + > +static int __init crc_t10dif_mod_init(void) > +{ > + if (riscv_isa_extension_available(NULL, ZBC)) > + return crypto_register_shash(&crc_t10dif_alg); > + > + return -ENODEV; > +} > + > +static void __exit crc_t10dif_mod_exit(void) > +{ > + crypto_unregister_shash(&crc_t10dif_alg); > +} > + > +module_init(crc_t10dif_mod_init); > +module_exit(crc_t10dif_mod_exit); > + > +MODULE_DESCRIPTION("CRC-T10DIF using RISC-V ZBC Extension"); > +MODULE_ALIAS_CRYPTO("crct10dif"); > +MODULE_LICENSE("GPL"); > diff --git a/lib/crct10diftest.c b/lib/crct10diftest.c > index 9541892eb12c..ac642191e6e8 100644 > --- a/lib/crct10diftest.c > +++ b/lib/crct10diftest.c > @@ -666,9 +666,9 @@ static int __init crct10dif_test_init(void) > > local_irq_restore(flags); > > - if (errors) > + if (errors) This change should be removed from this patch. > pr_warn("crct10dif: %d self tests failed\n", errors); > - else > + else Same here. > pr_info("crct10dif: self tests passed, processed %d bytes in %lld nsec\n", > bytes, nsec); > > -- > 2.34.1 > >
diff --git a/arch/riscv/crypto/Kconfig b/arch/riscv/crypto/Kconfig index ad58dad9a580..69751ff5573e 100644 --- a/arch/riscv/crypto/Kconfig +++ b/arch/riscv/crypto/Kconfig @@ -29,6 +29,20 @@ config CRYPTO_CHACHA_RISCV64 Architecture: riscv64 using: - Zvkb vector crypto extension +config CRYPTO_CRCT10DIF_RISCV + tristate "Checksum: CRCT10DIF" + depends on TOOLCHAIN_HAS_ZBC + depends on MMU + depends on RISCV_ALTERNATIVE + default y + help + CRCT10DIF checksum with Zbc extension optimized + To accelerate CRCT10DIF checksum, choose Y here. + + Architecture: riscv using: + - Zbc extension + + config CRYPTO_GHASH_RISCV64 tristate "Hash functions: GHASH" depends on 64BIT && RISCV_ISA_V && TOOLCHAIN_HAS_VECTOR_CRYPTO diff --git a/arch/riscv/crypto/Makefile b/arch/riscv/crypto/Makefile index 247c7bc7288c..6f849f4dc4cc 100644 --- a/arch/riscv/crypto/Makefile +++ b/arch/riscv/crypto/Makefile @@ -7,6 +7,9 @@ aes-riscv64-y := aes-riscv64-glue.o aes-riscv64-zvkned.o \ obj-$(CONFIG_CRYPTO_CHACHA_RISCV64) += chacha-riscv64.o chacha-riscv64-y := chacha-riscv64-glue.o chacha-riscv64-zvkb.o +obj-$(CONFIG_CRYPTO_CRCT10DIF_RISCV) += crct10dif-riscv.o +crct10dif-riscv-y := crct10dif-riscv-zbc.o + obj-$(CONFIG_CRYPTO_GHASH_RISCV64) += ghash-riscv64.o ghash-riscv64-y := ghash-riscv64-glue.o ghash-riscv64-zvkg.o @@ -21,3 +24,4 @@ sm3-riscv64-y := sm3-riscv64-glue.o sm3-riscv64-zvksh-zvkb.o obj-$(CONFIG_CRYPTO_SM4_RISCV64) += sm4-riscv64.o sm4-riscv64-y := sm4-riscv64-glue.o sm4-riscv64-zvksed-zvkb.o + diff --git a/arch/riscv/crypto/crct10dif-riscv-zbc.c b/arch/riscv/crypto/crct10dif-riscv-zbc.c new file mode 100644 index 000000000000..01571b4286f1 --- /dev/null +++ b/arch/riscv/crypto/crct10dif-riscv-zbc.c @@ -0,0 +1,182 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Accelerated CRC-T10DIF implementation with RISC-V Zbc extension. + * + * Copyright (C) 2024 Institute of Software, CAS. + */ + +#include <asm/alternative-macros.h> +#include <asm/byteorder.h> +#include <asm/hwcap.h> + +#include <crypto/internal/hash.h> + +#include <linux/byteorder/generic.h> +#include <linux/crc-t10dif.h> +#include <linux/minmax.h> +#include <linux/module.h> +#include <linux/types.h> + +static u16 crc_t10dif_generic_zbc(u16 crc, unsigned char const *p, size_t len); + +#define CRCT10DIF_POLY 0x8bb7 + +#if __riscv_xlen == 64 +#define STEP_ORDER 3 + +#define CRCT10DIF_POLY_QT_BE 0xf65a57f81d33a48a + +static inline u64 crct10dif_prep(u16 crc, unsigned long const *ptr) +{ + return ((u64)crc << 48) ^ (__force u64)__cpu_to_be64(*ptr); +} + +#elif __riscv_xlen == 32 +#define STEP_ORDER 2 +#define CRCT10DIF_POLY_QT_BE 0xf65a57f8 + +static inline u32 crct10dif_prep(u16 crc, unsigned long const *ptr) +{ + return ((u32)crc << 16) ^ (__force u32)__cpu_to_be32(*ptr); +} + +#else +#error "Unexpected __riscv_xlen" +#endif + +static inline u16 crct10dif_zbc(unsigned long s) +{ + u16 crc; + + asm volatile (".option push\n" + ".option arch,+zbc\n" + "clmulh %0, %1, %2\n" + "xor %0, %0, %1\n" + "clmul %0, %0, %3\n" + ".option pop\n" + : "=&r" (crc) + : "r"(s), + "r"(CRCT10DIF_POLY_QT_BE), + "r"(CRCT10DIF_POLY) + :); + + return crc; +} + +#define STEP (1 << STEP_ORDER) +#define OFFSET_MASK (STEP - 1) + +static inline u16 crct10dif_unaligned(u16 crc, unsigned char const *p, size_t len) +{ + size_t bits = len * 8; + unsigned long s = 0; + u16 crc_low = 0; + + for (int i = 0; i < len; i++) + s = *p++ | (s << 8); + + if (len < sizeof(u16)) { + s ^= crc >> (16 - bits); + crc_low = crc << bits; + } else { + s ^= (unsigned long)crc << (bits - 16); + } + + crc = crct10dif_zbc(s); + crc ^= crc_low; + + return crc; +} + +static u16 crc_t10dif_generic_zbc(u16 crc, unsigned char const *p, size_t len) +{ + size_t offset, head_len, tail_len; + unsigned long const *p_ul; + unsigned long s; + + offset = (unsigned long)p & OFFSET_MASK; + if (offset && len) { + head_len = min(STEP - offset, len); + crc = crct10dif_unaligned(crc, p, head_len); + p += head_len; + len -= head_len; + } + + tail_len = len & OFFSET_MASK; + len = len >> STEP_ORDER; + p_ul = (unsigned long const *)p; + + for (int i = 0; i < len; i++) { + s = crct10dif_prep(crc, p_ul); + crc = crct10dif_zbc(s); + p_ul++; + } + + p = (unsigned char const *)p_ul; + if (tail_len) + crc = crct10dif_unaligned(crc, p, tail_len); + + return crc; +} + +static int crc_t10dif_init(struct shash_desc *desc) +{ + u16 *crc = shash_desc_ctx(desc); + + *crc = 0; + + return 0; +} + +static int crc_t10dif_final(struct shash_desc *desc, u8 *out) +{ + u16 *crc = shash_desc_ctx(desc); + + *(u16 *)out = *crc; + + return 0; +} + +static int crc_t10dif_update_zbc(struct shash_desc *desc, const u8 *data, + unsigned int length) +{ + u16 *crc = shash_desc_ctx(desc); + + *crc = crc_t10dif_generic_zbc(*crc, data, length); + + return 0; +} + +static struct shash_alg crc_t10dif_alg = { + .digestsize = CRC_T10DIF_DIGEST_SIZE, + .init = crc_t10dif_init, + .update = crc_t10dif_update_zbc, + .final = crc_t10dif_final, + .descsize = CRC_T10DIF_DIGEST_SIZE, + + .base.cra_name = "crct10dif", + .base.cra_driver_name = "crct10dif-riscv-zbc", + .base.cra_priority = 150, + .base.cra_blocksize = CRC_T10DIF_BLOCK_SIZE, + .base.cra_module = THIS_MODULE, +}; + +static int __init crc_t10dif_mod_init(void) +{ + if (riscv_isa_extension_available(NULL, ZBC)) + return crypto_register_shash(&crc_t10dif_alg); + + return -ENODEV; +} + +static void __exit crc_t10dif_mod_exit(void) +{ + crypto_unregister_shash(&crc_t10dif_alg); +} + +module_init(crc_t10dif_mod_init); +module_exit(crc_t10dif_mod_exit); + +MODULE_DESCRIPTION("CRC-T10DIF using RISC-V ZBC Extension"); +MODULE_ALIAS_CRYPTO("crct10dif"); +MODULE_LICENSE("GPL"); diff --git a/lib/crct10diftest.c b/lib/crct10diftest.c index 9541892eb12c..ac642191e6e8 100644 --- a/lib/crct10diftest.c +++ b/lib/crct10diftest.c @@ -666,9 +666,9 @@ static int __init crct10dif_test_init(void) local_irq_restore(flags); - if (errors) + if (errors) pr_warn("crct10dif: %d self tests failed\n", errors); - else + else pr_info("crct10dif: self tests passed, processed %d bytes in %lld nsec\n", bytes, nsec);
The current CRC-T10DIF algorithm is based on table-lookup optimization. Given the previous work on optimizing crc32 calculations with zbc extension, it is believed that this will be equally effective for accelerating crc-t10dif. Therefore, this patch offers a implementation of crc-t10dif using zbc extension. This can detect whether the current runtime environment supports zbc feature and, if so, uses it to accelerate crc-t10dif calculations. This patch is tested on QEMU VM with the kernel CRC-T10DIF selftests. Signed-off-by: Zhihang Shao <zhihang.shao.iscas@gmail.com> --- arch/riscv/crypto/Kconfig | 14 ++ arch/riscv/crypto/Makefile | 4 + arch/riscv/crypto/crct10dif-riscv-zbc.c | 182 ++++++++++++++++++++++++ lib/crct10diftest.c | 4 +- 4 files changed, 202 insertions(+), 2 deletions(-) create mode 100644 arch/riscv/crypto/crct10dif-riscv-zbc.c