Message ID | 20250210174540.161705-2-ebiggers@kernel.org (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | x86 CRC optimizations | expand |
On Mon, Feb 10, 2025 at 09:45:35AM -0800, Eric Biggers wrote: > @@ -1598,11 +1578,11 @@ static int __init register_avx_algs(void) > ARRAY_SIZE(aes_gcm_algs_vaes_avx10_256), > aes_gcm_simdalgs_vaes_avx10_256); > if (err) > return err; > > - if (x86_match_cpu(zmm_exclusion_list)) { > + if (boot_cpu_has(X86_FEATURE_PREFER_YMM)) { s/boot_cpu_has/cpu_feature_enabled/
On Mon, Feb 10, 2025 at 09:40:30PM +0100, Borislav Petkov wrote: > On Mon, Feb 10, 2025 at 09:45:35AM -0800, Eric Biggers wrote: > > @@ -1598,11 +1578,11 @@ static int __init register_avx_algs(void) > > ARRAY_SIZE(aes_gcm_algs_vaes_avx10_256), > > aes_gcm_simdalgs_vaes_avx10_256); > > if (err) > > return err; > > > > - if (x86_match_cpu(zmm_exclusion_list)) { > > + if (boot_cpu_has(X86_FEATURE_PREFER_YMM)) { > > s/boot_cpu_has/cpu_feature_enabled/ > $ git grep boot_cpu_has arch/x86/crypto | wc -l 87 $ git grep cpu_feature_enabled arch/x86/crypto | wc -l 0 It wouldn't make sense to change just this one. Should they really all be changed? I see that cpu_feature_enabled() uses code patching while boot_cpu_has() does not. All these checks occur once at module load time, though, so code patching wouldn't be beneficial. - Eric
On Mon, Feb 10, 2025 at 01:01:03PM -0800, Eric Biggers wrote: > I see that cpu_feature_enabled() uses code patching while boot_cpu_has() does > not. All these checks occur once at module load time, though, so code patching > wouldn't be beneficial. We want to convert all code to use a single interface for testing CPU features - cpu_feature_enabled() - and the implementation shouldn't be important to users - it should just work. Since you're adding new code, you might as well use the proper interface. As to converting crypto/ and the rest of the tree, that should happen at some point... eventually... Thx.
On Mon, Feb 10, 2025 at 10:17:10PM +0100, Borislav Petkov wrote: > On Mon, Feb 10, 2025 at 01:01:03PM -0800, Eric Biggers wrote: > > I see that cpu_feature_enabled() uses code patching while boot_cpu_has() does > > not. All these checks occur once at module load time, though, so code patching > > wouldn't be beneficial. > > We want to convert all code to use a single interface for testing CPU features > - cpu_feature_enabled() - and the implementation shouldn't be important to > users - it should just work. > > Since you're adding new code, you might as well use the proper interface. As > to converting crypto/ and the rest of the tree, that should happen at some > point... eventually... Well, it's new code in a function that already has a bunch of boot_cpu_has() checks. I don't really like leaving around random inconsistencies. If there is a new way to do it, we should just update it everywhere. I'll also note that boot_cpu_has() is missing a comment that says it is deprecated (if it really is). - Eric
On Mon, Feb 10, 2025 at 01:37:05PM -0800, Eric Biggers wrote: > I'll also note that boot_cpu_has() is missing a comment that says it is > deprecated (if it really is). /* * This is the default CPU features testing macro to use in code. ^^^^^^^^^^^^^ * * It is for detection of features which need kernel infrastructure to be * used. It may *not* directly test the CPU itself. Use the cpu_has() family * if you want true runtime testing of CPU features, like in hypervisor code * where you are supporting a possible guest feature where host support for it * is not relevant. */ #define cpu_feature_enabled(bit) \ (__builtin_constant_p(bit) && DISABLED_MASK_BIT_SET(bit) ? 0 : static_cpu_has(bit)) #define boot_cpu_has(bit) cpu_has(&boot_cpu_data, bit) --- Forget what I said - we'll convert everything when the time comes.
diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c index 11e95fc62636e..3e9ab5cdade47 100644 --- a/arch/x86/crypto/aesni-intel_glue.c +++ b/arch/x86/crypto/aesni-intel_glue.c @@ -1534,30 +1534,10 @@ DEFINE_GCM_ALGS(vaes_avx10_256, FLAG_AVX10_256, DEFINE_GCM_ALGS(vaes_avx10_512, FLAG_AVX10_512, "generic-gcm-vaes-avx10_512", "rfc4106-gcm-vaes-avx10_512", AES_GCM_KEY_AVX10_SIZE, 800); #endif /* CONFIG_AS_VAES && CONFIG_AS_VPCLMULQDQ */ -/* - * This is a list of CPU models that are known to suffer from downclocking when - * zmm registers (512-bit vectors) are used. On these CPUs, the AES mode - * implementations with zmm registers won't be used by default. Implementations - * with ymm registers (256-bit vectors) will be used by default instead. - */ -static const struct x86_cpu_id zmm_exclusion_list[] = { - X86_MATCH_VFM(INTEL_SKYLAKE_X, 0), - X86_MATCH_VFM(INTEL_ICELAKE_X, 0), - X86_MATCH_VFM(INTEL_ICELAKE_D, 0), - X86_MATCH_VFM(INTEL_ICELAKE, 0), - X86_MATCH_VFM(INTEL_ICELAKE_L, 0), - X86_MATCH_VFM(INTEL_ICELAKE_NNPI, 0), - X86_MATCH_VFM(INTEL_TIGERLAKE_L, 0), - X86_MATCH_VFM(INTEL_TIGERLAKE, 0), - /* Allow Rocket Lake and later, and Sapphire Rapids and later. */ - /* Also allow AMD CPUs (starting with Zen 4, the first with AVX-512). */ - {}, -}; - static int __init register_avx_algs(void) { int err; if (!boot_cpu_has(X86_FEATURE_AVX)) @@ -1598,11 +1578,11 @@ static int __init register_avx_algs(void) ARRAY_SIZE(aes_gcm_algs_vaes_avx10_256), aes_gcm_simdalgs_vaes_avx10_256); if (err) return err; - if (x86_match_cpu(zmm_exclusion_list)) { + if (boot_cpu_has(X86_FEATURE_PREFER_YMM)) { int i; aes_xts_alg_vaes_avx10_512.base.cra_priority = 1; for (i = 0; i < ARRAY_SIZE(aes_gcm_algs_vaes_avx10_512); i++) aes_gcm_algs_vaes_avx10_512[i].base.cra_priority = 1; diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 508c0dad116bc..99334026a26c9 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -481,10 +481,11 @@ #define X86_FEATURE_CLEAR_BHB_HW (21*32+ 3) /* BHI_DIS_S HW control enabled */ #define X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT (21*32+ 4) /* Clear branch history at vmexit using SW loop */ #define X86_FEATURE_AMD_FAST_CPPC (21*32 + 5) /* Fast CPPC */ #define X86_FEATURE_AMD_HETEROGENEOUS_CORES (21*32 + 6) /* Heterogeneous Core Topology */ #define X86_FEATURE_AMD_WORKLOAD_CLASS (21*32 + 7) /* Workload Classification */ +#define X86_FEATURE_PREFER_YMM (21*32 + 8) /* Avoid ZMM registers due to downclocking */ /* * BUG word(s) */ #define X86_BUG(x) (NCAPINTS*32 + (x)) diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 3dce22f00dc34..c3005c4ec46a9 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -519,10 +519,29 @@ static void init_intel_misc_features(struct cpuinfo_x86 *c) msr = this_cpu_read(msr_misc_features_shadow); wrmsrl(MSR_MISC_FEATURES_ENABLES, msr); } +/* + * This is a list of Intel CPUs that are known to suffer from downclocking when + * ZMM registers (512-bit vectors) are used. On these CPUs, when the kernel + * executes SIMD-optimized code such as cryptography functions or CRCs, it + * should prefer 256-bit (YMM) code to 512-bit (ZMM) code. + */ +static const struct x86_cpu_id zmm_exclusion_list[] = { + X86_MATCH_VFM(INTEL_SKYLAKE_X, 0), + X86_MATCH_VFM(INTEL_ICELAKE_X, 0), + X86_MATCH_VFM(INTEL_ICELAKE_D, 0), + X86_MATCH_VFM(INTEL_ICELAKE, 0), + X86_MATCH_VFM(INTEL_ICELAKE_L, 0), + X86_MATCH_VFM(INTEL_ICELAKE_NNPI, 0), + X86_MATCH_VFM(INTEL_TIGERLAKE_L, 0), + X86_MATCH_VFM(INTEL_TIGERLAKE, 0), + /* Allow Rocket Lake and later, and Sapphire Rapids and later. */ + {}, +}; + static void init_intel(struct cpuinfo_x86 *c) { early_init_intel(c); intel_workarounds(c); @@ -599,10 +618,13 @@ static void init_intel(struct cpuinfo_x86 *c) if (p) strcpy(c->x86_model_id, p); } #endif + if (x86_match_cpu(zmm_exclusion_list)) + set_cpu_cap(c, X86_FEATURE_PREFER_YMM); + /* Work around errata */ srat_detect_node(c); init_ia32_feat_ctl(c);