Message ID | 20180826024006.13800-4-martin.petersen@oracle.com (mailing list archive) |
---|---|
State | Superseded |
Delegated to: | Herbert Xu |
Headers | show |
Series | [1/4] crypto: Introduce notifier for new crypto algorithms | expand |
Hi Martin, On 26 August 2018 at 03:40, Martin K. Petersen <martin.petersen@oracle.com> wrote: > The T10 CRC library function is built into the kernel and therefore > registered early. The hardware-accelerated CRC helpers are typically > loaded as modules and only become available later in the boot > sequence. A separate patch modifies the T10 CRC library to subscribe > to notifications from crypto and permits switching from the > table-based algorithm to a hardware accelerated ditto once the > relevant module is loaded. > > However, since the dependency for "crc10dif" is already satisfied, > nothing is going to cause the hardware-accelerated kernel modules to > get loaded. This is not true. All accelerated implementations based on SIMD polynomial multiplication are tried to the respective CPU features bits. This applies to x86, power, ARM and arm64. E.g., for x86 you have alias: cpu:type:x86,ven*fam*mod*:feature:*0081* which will be matched by udev if /sys/devices/system/cpu/modalias contains feature 0081, and so the modules will be loaded automatically at boot. > Introduce an init_fn in the integrity profile that can be > called to trigger a load of modules providing the T10 CRC calculation > capability. This function will ony get called when a new integrity > profile is registered during device discovery. > > Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com> > --- > block/blk-integrity.c | 5 +++++ > block/t10-pi.c | 10 ++++++++++ > include/linux/blkdev.h | 2 ++ > 3 files changed, 17 insertions(+) > > diff --git a/block/blk-integrity.c b/block/blk-integrity.c > index 6121611e1316..5cacae9a2dc2 100644 > --- a/block/blk-integrity.c > +++ b/block/blk-integrity.c > @@ -27,6 +27,7 @@ > #include <linux/scatterlist.h> > #include <linux/export.h> > #include <linux/slab.h> > +#include <linux/module.h> > > #include "blk.h" > > @@ -391,6 +392,7 @@ static blk_status_t blk_integrity_nop_fn(struct blk_integrity_iter *iter) > > static const struct blk_integrity_profile nop_profile = { > .name = "nop", > + .init_fn = NULL, > .generate_fn = blk_integrity_nop_fn, > .verify_fn = blk_integrity_nop_fn, > }; > @@ -418,6 +420,9 @@ void blk_integrity_register(struct gendisk *disk, struct blk_integrity *template > bi->tuple_size = template->tuple_size; > bi->tag_size = template->tag_size; > > + if (bi->profile->init_fn) > + bi->profile->init_fn(); > + > disk->queue->backing_dev_info->capabilities |= BDI_CAP_STABLE_WRITES; > } > EXPORT_SYMBOL(blk_integrity_register); > diff --git a/block/t10-pi.c b/block/t10-pi.c > index a98db384048f..b83278f9163a 100644 > --- a/block/t10-pi.c > +++ b/block/t10-pi.c > @@ -24,6 +24,7 @@ > #include <linux/t10-pi.h> > #include <linux/blkdev.h> > #include <linux/crc-t10dif.h> > +#include <linux/module.h> > #include <net/checksum.h> > > typedef __be16 (csum_fn) (void *, unsigned int); > @@ -157,8 +158,14 @@ static blk_status_t t10_pi_type3_verify_ip(struct blk_integrity_iter *iter) > return t10_pi_verify(iter, t10_pi_ip_fn, 3); > } > > +static void t10_pi_crc_init(void) > +{ > + request_module_nowait(CRC_T10DIF_STRING); > +} > + > const struct blk_integrity_profile t10_pi_type1_crc = { > .name = "T10-DIF-TYPE1-CRC", > + .init_fn = t10_pi_crc_init, > .generate_fn = t10_pi_type1_generate_crc, > .verify_fn = t10_pi_type1_verify_crc, > }; > @@ -166,6 +173,7 @@ EXPORT_SYMBOL(t10_pi_type1_crc); > > const struct blk_integrity_profile t10_pi_type1_ip = { > .name = "T10-DIF-TYPE1-IP", > + .init_fn = NULL, > .generate_fn = t10_pi_type1_generate_ip, > .verify_fn = t10_pi_type1_verify_ip, > }; > @@ -173,6 +181,7 @@ EXPORT_SYMBOL(t10_pi_type1_ip); > > const struct blk_integrity_profile t10_pi_type3_crc = { > .name = "T10-DIF-TYPE3-CRC", > + .init_fn = t10_pi_crc_init, > .generate_fn = t10_pi_type3_generate_crc, > .verify_fn = t10_pi_type3_verify_crc, > }; > @@ -180,6 +189,7 @@ EXPORT_SYMBOL(t10_pi_type3_crc); > > const struct blk_integrity_profile t10_pi_type3_ip = { > .name = "T10-DIF-TYPE3-IP", > + .init_fn = NULL, > .generate_fn = t10_pi_type3_generate_ip, > .verify_fn = t10_pi_type3_verify_ip, > }; > diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h > index 79226ca8f80f..a43c02e4f43d 100644 > --- a/include/linux/blkdev.h > +++ b/include/linux/blkdev.h > @@ -1806,10 +1806,12 @@ struct blk_integrity_iter { > }; > > typedef blk_status_t (integrity_processing_fn) (struct blk_integrity_iter *); > +typedef void (integrity_init_fn) (void); > > struct blk_integrity_profile { > integrity_processing_fn *generate_fn; > integrity_processing_fn *verify_fn; > + integrity_init_fn *init_fn; > const char *name; > }; > > -- > 2.17.1 >
Hi Ard, >> However, since the dependency for "crc10dif" is already satisfied, >> nothing is going to cause the hardware-accelerated kernel modules to >> get loaded. > > This is not true. All accelerated implementations based on SIMD > polynomial multiplication are tried to the respective CPU features > bits. This applies to x86, power, ARM and arm64. > > E.g., for x86 you have > > alias: cpu:type:x86,ven*fam*mod*:feature:*0081* > > which will be matched by udev if /sys/devices/system/cpu/modalias > contains feature 0081, and so the modules will be loaded automatically > at boot. If I can avoid carrying that init callback in the block integrity code that will definitely make me happy. However, loading crct10dif-pclmul does not happen automatically for me. crc-t10dif is linked statically and every user of the CRC goes through that library. So nothing ever requests the "crct10dif" modalias and no accelerator modules are loaded. <fresh boot> # lsmod | grep crc crc32c_intel 24576 0 crc_ccitt 16384 1 ipv6 # modinfo crc32c_intel | grep cpu:type alias: cpu:type:x86,ven*fam*mod*:feature:*0094* # modinfo crct10dif-pclmul | grep cpu:type alias: cpu:type:x86,ven*fam*mod*:feature:*0081* # egrep -o "0081|0094" /sys/devices/system/cpu/modalias 0081 0094 # modprobe crct10dif # lsmod | grep crc crct10dif_pclmul 16384 1 crc32c_intel 24576 0 crc_ccitt 16384 1 ipv6 It's interesting that crc32c_intel is loaded but libcrc32c is not. That matches your description of how things should work. But crct10dif-pclmul isn't loaded and neither is crc32_pclmul: # modprobe crc32 # lsmod | grep crc crc32_generic 16384 0 crc32_pclmul 16384 0 crc32c_intel 24576 0 crc_ccitt 16384 1 ipv6
On 26 August 2018 at 15:30, Martin K. Petersen <martin.petersen@oracle.com> wrote: > > Hi Ard, > >>> However, since the dependency for "crc10dif" is already satisfied, >>> nothing is going to cause the hardware-accelerated kernel modules to >>> get loaded. >> >> This is not true. All accelerated implementations based on SIMD >> polynomial multiplication are tried to the respective CPU features >> bits. This applies to x86, power, ARM and arm64. >> >> E.g., for x86 you have >> >> alias: cpu:type:x86,ven*fam*mod*:feature:*0081* >> >> which will be matched by udev if /sys/devices/system/cpu/modalias >> contains feature 0081, and so the modules will be loaded automatically >> at boot. > > If I can avoid carrying that init callback in the block integrity code > that will definitely make me happy. However, loading crct10dif-pclmul > does not happen automatically for me. crc-t10dif is linked statically > and every user of the CRC goes through that library. So nothing ever > requests the "crct10dif" modalias and no accelerator modules are loaded. > > <fresh boot> > > # lsmod | grep crc > crc32c_intel 24576 0 > crc_ccitt 16384 1 ipv6 > > # modinfo crc32c_intel | grep cpu:type > alias: cpu:type:x86,ven*fam*mod*:feature:*0094* > > # modinfo crct10dif-pclmul | grep cpu:type > alias: cpu:type:x86,ven*fam*mod*:feature:*0081* > > # egrep -o "0081|0094" /sys/devices/system/cpu/modalias > 0081 > 0094 > > # modprobe crct10dif > # lsmod | grep crc > crct10dif_pclmul 16384 1 > crc32c_intel 24576 0 > crc_ccitt 16384 1 ipv6 > > It's interesting that crc32c_intel is loaded but libcrc32c is not. That > matches your description of how things should work. But crct10dif-pclmul > isn't loaded and neither is crc32_pclmul: > > # modprobe crc32 > # lsmod | grep crc > crc32_generic 16384 0 > crc32_pclmul 16384 0 > crc32c_intel 24576 0 > crc_ccitt 16384 1 ipv6 > That is odd. On my Ubuntu system, both crct10dif_pclmul and crc32_pclmul get loaded automatically.
Ard, > That is odd. On my Ubuntu system, both crct10dif_pclmul and > crc32_pclmul get loaded automatically. Just checked my Fedora box and they are loaded there too. Peculiar. I'll keep digging... (crc32c-intel is brought in by dracut on RHEL/OL/Fedora, fwiw)
diff --git a/block/blk-integrity.c b/block/blk-integrity.c index 6121611e1316..5cacae9a2dc2 100644 --- a/block/blk-integrity.c +++ b/block/blk-integrity.c @@ -27,6 +27,7 @@ #include <linux/scatterlist.h> #include <linux/export.h> #include <linux/slab.h> +#include <linux/module.h> #include "blk.h" @@ -391,6 +392,7 @@ static blk_status_t blk_integrity_nop_fn(struct blk_integrity_iter *iter) static const struct blk_integrity_profile nop_profile = { .name = "nop", + .init_fn = NULL, .generate_fn = blk_integrity_nop_fn, .verify_fn = blk_integrity_nop_fn, }; @@ -418,6 +420,9 @@ void blk_integrity_register(struct gendisk *disk, struct blk_integrity *template bi->tuple_size = template->tuple_size; bi->tag_size = template->tag_size; + if (bi->profile->init_fn) + bi->profile->init_fn(); + disk->queue->backing_dev_info->capabilities |= BDI_CAP_STABLE_WRITES; } EXPORT_SYMBOL(blk_integrity_register); diff --git a/block/t10-pi.c b/block/t10-pi.c index a98db384048f..b83278f9163a 100644 --- a/block/t10-pi.c +++ b/block/t10-pi.c @@ -24,6 +24,7 @@ #include <linux/t10-pi.h> #include <linux/blkdev.h> #include <linux/crc-t10dif.h> +#include <linux/module.h> #include <net/checksum.h> typedef __be16 (csum_fn) (void *, unsigned int); @@ -157,8 +158,14 @@ static blk_status_t t10_pi_type3_verify_ip(struct blk_integrity_iter *iter) return t10_pi_verify(iter, t10_pi_ip_fn, 3); } +static void t10_pi_crc_init(void) +{ + request_module_nowait(CRC_T10DIF_STRING); +} + const struct blk_integrity_profile t10_pi_type1_crc = { .name = "T10-DIF-TYPE1-CRC", + .init_fn = t10_pi_crc_init, .generate_fn = t10_pi_type1_generate_crc, .verify_fn = t10_pi_type1_verify_crc, }; @@ -166,6 +173,7 @@ EXPORT_SYMBOL(t10_pi_type1_crc); const struct blk_integrity_profile t10_pi_type1_ip = { .name = "T10-DIF-TYPE1-IP", + .init_fn = NULL, .generate_fn = t10_pi_type1_generate_ip, .verify_fn = t10_pi_type1_verify_ip, }; @@ -173,6 +181,7 @@ EXPORT_SYMBOL(t10_pi_type1_ip); const struct blk_integrity_profile t10_pi_type3_crc = { .name = "T10-DIF-TYPE3-CRC", + .init_fn = t10_pi_crc_init, .generate_fn = t10_pi_type3_generate_crc, .verify_fn = t10_pi_type3_verify_crc, }; @@ -180,6 +189,7 @@ EXPORT_SYMBOL(t10_pi_type3_crc); const struct blk_integrity_profile t10_pi_type3_ip = { .name = "T10-DIF-TYPE3-IP", + .init_fn = NULL, .generate_fn = t10_pi_type3_generate_ip, .verify_fn = t10_pi_type3_verify_ip, }; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 79226ca8f80f..a43c02e4f43d 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1806,10 +1806,12 @@ struct blk_integrity_iter { }; typedef blk_status_t (integrity_processing_fn) (struct blk_integrity_iter *); +typedef void (integrity_init_fn) (void); struct blk_integrity_profile { integrity_processing_fn *generate_fn; integrity_processing_fn *verify_fn; + integrity_init_fn *init_fn; const char *name; };
The T10 CRC library function is built into the kernel and therefore registered early. The hardware-accelerated CRC helpers are typically loaded as modules and only become available later in the boot sequence. A separate patch modifies the T10 CRC library to subscribe to notifications from crypto and permits switching from the table-based algorithm to a hardware accelerated ditto once the relevant module is loaded. However, since the dependency for "crc10dif" is already satisfied, nothing is going to cause the hardware-accelerated kernel modules to get loaded. Introduce an init_fn in the integrity profile that can be called to trigger a load of modules providing the T10 CRC calculation capability. This function will ony get called when a new integrity profile is registered during device discovery. Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com> --- block/blk-integrity.c | 5 +++++ block/t10-pi.c | 10 ++++++++++ include/linux/blkdev.h | 2 ++ 3 files changed, 17 insertions(+)