diff mbox series

[v4,10/38] arm64: Explicitly save/restore CPACR when probing SVE and SME

Message ID 20231016102501.3643901-11-mark.rutland@arm.com (mailing list archive)
State New, archived
Headers show
Series [v4,01/38] clocksource/drivers/arm_arch_timer: Initialize evtstrm after finalizing cpucaps | expand

Commit Message

Mark Rutland Oct. 16, 2023, 10:24 a.m. UTC
When a CPUs onlined we first probe for supported features and
propetites, and then we subsequently enable features that have been
detected. This is a little problematic for SVE and SME, as some
properties (e.g. vector lengths) cannot be probed while they are
disabled. Due to this, the code probing for SVE properties has to enable
SVE for EL1 prior to proving, and the code probing for SME properties
has to enable SME for EL1 prior to probing. We never disable SVE or SME
for EL1 after probing.

It would be a little nicer to transiently enable SVE and SME during
probing, leaving them both disabled unless explicitly enabled, as this
would make it much easier to catch unintentional usage (e.g. when they
are not present system-wide).

This patch reworks the SVE and SME feature probing code to only
transiently enable support at EL1, disabling after probing is complete.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Mark Brown <broonie@kernel.org>
Cc: Suzuki K Poulose <suzuki.poulose@arm.com>
Cc: Will Deacon <will@kernel.org>
---
 arch/arm64/include/asm/fpsimd.h | 26 ++++++++++++++++++++++++++
 arch/arm64/kernel/cpufeature.c  | 24 ++++++++++++++++++++++--
 arch/arm64/kernel/fpsimd.c      |  7 ++-----
 3 files changed, 50 insertions(+), 7 deletions(-)

Comments

Mark Brown Oct. 16, 2023, 12:02 p.m. UTC | #1
On Mon, Oct 16, 2023 at 11:24:33AM +0100, Mark Rutland wrote:
> When a CPUs onlined we first probe for supported features and
> propetites, and then we subsequently enable features that have been
> detected. This is a little problematic for SVE and SME, as some
> properties (e.g. vector lengths) cannot be probed while they are
> disabled. Due to this, the code probing for SVE properties has to enable

Reviewed-by: Mark Brown <broonie@kernel.org>
Catalin Marinas Oct. 16, 2023, 4:11 p.m. UTC | #2
On Mon, Oct 16, 2023 at 01:02:13PM +0100, Mark Brown wrote:
> On Mon, Oct 16, 2023 at 11:24:33AM +0100, Mark Rutland wrote:
> > When a CPUs onlined we first probe for supported features and
> > propetites, and then we subsequently enable features that have been
> > detected. This is a little problematic for SVE and SME, as some
> > properties (e.g. vector lengths) cannot be probed while they are
> > disabled. Due to this, the code probing for SVE properties has to enable
> 
> Reviewed-by: Mark Brown <broonie@kernel.org>

Thanks Mark for reviewing. Could you please also check my conflict
resolution? Mark R's patches conflict with your patches on
for-next/sve-remove-pseudo-regs (maybe I should have applied them on
top; hopefully git rerere remembers it correctly).

diff --cc arch/arm64/include/asm/fpsimd.h
index 9e5d3a0812b6,c43ae9c013ec..50e5f25d3024
--- a/arch/arm64/include/asm/fpsimd.h
+++ b/arch/arm64/include/asm/fpsimd.h
@@@ -123,11 -149,13 +149,12 @@@ extern void sme_save_state(void *state
  extern void sme_load_state(void const *state, int zt);
  
  struct arm64_cpu_capabilities;
- extern void sve_kernel_enable(const struct arm64_cpu_capabilities *__unused);
- extern void sme_kernel_enable(const struct arm64_cpu_capabilities *__unused);
- extern void sme2_kernel_enable(const struct arm64_cpu_capabilities *__unused);
- extern void fa64_kernel_enable(const struct arm64_cpu_capabilities *__unused);
+ extern void cpu_enable_fpsimd(const struct arm64_cpu_capabilities *__unused);
+ extern void cpu_enable_sve(const struct arm64_cpu_capabilities *__unused);
+ extern void cpu_enable_sme(const struct arm64_cpu_capabilities *__unused);
+ extern void cpu_enable_sme2(const struct arm64_cpu_capabilities *__unused);
+ extern void cpu_enable_fa64(const struct arm64_cpu_capabilities *__unused);
  
 -extern u64 read_zcr_features(void);
  extern u64 read_smcr_features(void);
  
  /*
diff --cc arch/arm64/kernel/cpufeature.c
index 55a3bc719d46,ad7ec30d3bd3..397a1bbf4fba
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@@ -1026,21 -1040,30 +1026,26 @@@ void __init init_cpu_features(struct cp
  
  	if (IS_ENABLED(CONFIG_ARM64_SVE) &&
  	    id_aa64pfr0_sve(read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1))) {
- 		sve_kernel_enable(NULL);
+ 		unsigned long cpacr = cpacr_save_enable_kernel_sve();
+ 
 -		info->reg_zcr = read_zcr_features();
 -		init_cpu_ftr_reg(SYS_ZCR_EL1, info->reg_zcr);
  		vec_init_vq_map(ARM64_VEC_SVE);
+ 
+ 		cpacr_restore(cpacr);
  	}
  
  	if (IS_ENABLED(CONFIG_ARM64_SME) &&
  	    id_aa64pfr1_sme(read_sanitised_ftr_reg(SYS_ID_AA64PFR1_EL1))) {
- 		sme_kernel_enable(NULL);
+ 		unsigned long cpacr = cpacr_save_enable_kernel_sme();
  
 -		info->reg_smcr = read_smcr_features();
  		/*
  		 * We mask out SMPS since even if the hardware
  		 * supports priorities the kernel does not at present
  		 * and we block access to them.
  		 */
  		info->reg_smidr = read_cpuid(SMIDR_EL1) & ~SMIDR_EL1_SMPS;
 -		init_cpu_ftr_reg(SYS_SMCR_EL1, info->reg_smcr);
  		vec_init_vq_map(ARM64_VEC_SME);
+ 
+ 		cpacr_restore(cpacr);
  	}
  
  	if (id_aa64pfr1_mte(info->reg_id_aa64pfr1))
@@@ -1274,29 -1297,40 +1279,35 @@@ void update_cpu_features(int cpu
  	taint |= check_update_ftr_reg(SYS_ID_AA64SMFR0_EL1, cpu,
  				      info->reg_id_aa64smfr0, boot->reg_id_aa64smfr0);
  
 +	/* Probe vector lengths */
  	if (IS_ENABLED(CONFIG_ARM64_SVE) &&
  	    id_aa64pfr0_sve(read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1))) {
 -		unsigned long cpacr = cpacr_save_enable_kernel_sve();
 +		if (!system_capabilities_finalized()) {
- 			sve_kernel_enable(NULL);
++			unsigned long cpacr = cpacr_save_enable_kernel_sve();
+ 
 -		info->reg_zcr = read_zcr_features();
 -		taint |= check_update_ftr_reg(SYS_ZCR_EL1, cpu,
 -					info->reg_zcr, boot->reg_zcr);
 -
 -		/* Probe vector lengths */
 -		if (!system_capabilities_finalized())
  			vec_update_vq_map(ARM64_VEC_SVE);
+ 
 -		cpacr_restore(cpacr);
++			cpacr_restore(cpacr);
 +		}
  	}
  
  	if (IS_ENABLED(CONFIG_ARM64_SME) &&
  	    id_aa64pfr1_sme(read_sanitised_ftr_reg(SYS_ID_AA64PFR1_EL1))) {
- 		sme_kernel_enable(NULL);
 -		unsigned long cpacr = cpacr_save_enable_kernel_sme();
--
 -		info->reg_smcr = read_smcr_features();
  		/*
  		 * We mask out SMPS since even if the hardware
  		 * supports priorities the kernel does not at present
  		 * and we block access to them.
  		 */
  		info->reg_smidr = read_cpuid(SMIDR_EL1) & ~SMIDR_EL1_SMPS;
 -		taint |= check_update_ftr_reg(SYS_SMCR_EL1, cpu,
 -					info->reg_smcr, boot->reg_smcr);
  
  		/* Probe vector lengths */
--		if (!system_capabilities_finalized())
++		if (!system_capabilities_finalized()) {
++			unsigned long cpacr = cpacr_save_enable_kernel_sme();
++
  			vec_update_vq_map(ARM64_VEC_SME);
+ 
 -		cpacr_restore(cpacr);
++			cpacr_restore(cpacr);
++		}
  	}
  
  	/*
@@@ -3138,7 -3182,15 +3162,9 @@@ static void verify_local_elf_hwcaps(voi
  
  static void verify_sve_features(void)
  {
+ 	unsigned long cpacr = cpacr_save_enable_kernel_sve();
+ 
 -	u64 safe_zcr = read_sanitised_ftr_reg(SYS_ZCR_EL1);
 -	u64 zcr = read_zcr_features();
 -
 -	unsigned int safe_len = safe_zcr & ZCR_ELx_LEN_MASK;
 -	unsigned int len = zcr & ZCR_ELx_LEN_MASK;
 -
 -	if (len < safe_len || vec_verify_vq_map(ARM64_VEC_SVE)) {
 +	if (vec_verify_vq_map(ARM64_VEC_SVE)) {
  		pr_crit("CPU%d: SVE: vector length support mismatch\n",
  			smp_processor_id());
  		cpu_die_early();
@@@ -3147,7 -3201,15 +3175,9 @@@
  
  static void verify_sme_features(void)
  {
+ 	unsigned long cpacr = cpacr_save_enable_kernel_sme();
+ 
 -	u64 safe_smcr = read_sanitised_ftr_reg(SYS_SMCR_EL1);
 -	u64 smcr = read_smcr_features();
 -
 -	unsigned int safe_len = safe_smcr & SMCR_ELx_LEN_MASK;
 -	unsigned int len = smcr & SMCR_ELx_LEN_MASK;
 -
 -	if (len < safe_len || vec_verify_vq_map(ARM64_VEC_SME)) {
 +	if (vec_verify_vq_map(ARM64_VEC_SME)) {
  		pr_crit("CPU%d: SME: vector length support mismatch\n",
  			smp_processor_id());
  		cpu_die_early();
diff --cc arch/arm64/kernel/fpsimd.c
index 04c801001767,d0d28bc069d2..5ddc246f1482
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@@ -1173,11 -1169,30 +1169,11 @@@ void cpu_enable_sve(const struct arm64_
  void __init sve_setup(void)
  {
  	struct vl_info *info = &vl_info[ARM64_VEC_SVE];
 -	u64 zcr;
  	DECLARE_BITMAP(tmp_map, SVE_VQ_MAX);
  	unsigned long b;
 +	int max_bit;
  
- 	if (!system_supports_sve())
+ 	if (!cpus_have_cap(ARM64_SVE))
  		return;
  
  	/*
@@@ -1307,9 -1329,29 +1301,9 @@@ void cpu_enable_fa64(const struct arm64
  void __init sme_setup(void)
  {
  	struct vl_info *info = &vl_info[ARM64_VEC_SME];
 -	u64 smcr;
 -	int min_bit;
 +	int min_bit, max_bit;
  
- 	if (!system_supports_sme())
+ 	if (!cpus_have_cap(ARM64_SME))
  		return;
  
  	/*
Mark Brown Oct. 16, 2023, 4:19 p.m. UTC | #3
On Mon, Oct 16, 2023 at 05:11:15PM +0100, Catalin Marinas wrote:

> Thanks Mark for reviewing. Could you please also check my conflict
> resolution? Mark R's patches conflict with your patches on
> for-next/sve-remove-pseudo-regs (maybe I should have applied them on
> top; hopefully git rerere remembers it correctly).

I think that looks fine assuming I'm reading it correctly, I'll keep an
eye on the CI to make sure.
diff mbox series

Patch

diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h
index 8df46f186c64b..bd7bea92dae07 100644
--- a/arch/arm64/include/asm/fpsimd.h
+++ b/arch/arm64/include/asm/fpsimd.h
@@ -32,6 +32,32 @@ 
 #define VFP_STATE_SIZE		((32 * 8) + 4)
 #endif
 
+static inline unsigned long cpacr_save_enable_kernel_sve(void)
+{
+	unsigned long old = read_sysreg(cpacr_el1);
+	unsigned long set = CPACR_EL1_FPEN_EL1EN | CPACR_EL1_ZEN_EL1EN;
+
+	write_sysreg(old | set, cpacr_el1);
+	isb();
+	return old;
+}
+
+static inline unsigned long cpacr_save_enable_kernel_sme(void)
+{
+	unsigned long old = read_sysreg(cpacr_el1);
+	unsigned long set = CPACR_EL1_FPEN_EL1EN | CPACR_EL1_SMEN_EL1EN;
+
+	write_sysreg(old | set, cpacr_el1);
+	isb();
+	return old;
+}
+
+static inline void cpacr_restore(unsigned long cpacr)
+{
+	write_sysreg(cpacr, cpacr_el1);
+	isb();
+}
+
 /*
  * When we defined the maximum SVE vector length we defined the ABI so
  * that the maximum vector length included all the reserved for future
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 5add7d06469d8..e3e0d3d3bd6b7 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -1040,13 +1040,19 @@  void __init init_cpu_features(struct cpuinfo_arm64 *info)
 
 	if (IS_ENABLED(CONFIG_ARM64_SVE) &&
 	    id_aa64pfr0_sve(read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1))) {
+		unsigned long cpacr = cpacr_save_enable_kernel_sve();
+
 		info->reg_zcr = read_zcr_features();
 		init_cpu_ftr_reg(SYS_ZCR_EL1, info->reg_zcr);
 		vec_init_vq_map(ARM64_VEC_SVE);
+
+		cpacr_restore(cpacr);
 	}
 
 	if (IS_ENABLED(CONFIG_ARM64_SME) &&
 	    id_aa64pfr1_sme(read_sanitised_ftr_reg(SYS_ID_AA64PFR1_EL1))) {
+		unsigned long cpacr = cpacr_save_enable_kernel_sme();
+
 		info->reg_smcr = read_smcr_features();
 		/*
 		 * We mask out SMPS since even if the hardware
@@ -1056,6 +1062,8 @@  void __init init_cpu_features(struct cpuinfo_arm64 *info)
 		info->reg_smidr = read_cpuid(SMIDR_EL1) & ~SMIDR_EL1_SMPS;
 		init_cpu_ftr_reg(SYS_SMCR_EL1, info->reg_smcr);
 		vec_init_vq_map(ARM64_VEC_SME);
+
+		cpacr_restore(cpacr);
 	}
 
 	if (id_aa64pfr1_mte(info->reg_id_aa64pfr1))
@@ -1291,6 +1299,8 @@  void update_cpu_features(int cpu,
 
 	if (IS_ENABLED(CONFIG_ARM64_SVE) &&
 	    id_aa64pfr0_sve(read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1))) {
+		unsigned long cpacr = cpacr_save_enable_kernel_sve();
+
 		info->reg_zcr = read_zcr_features();
 		taint |= check_update_ftr_reg(SYS_ZCR_EL1, cpu,
 					info->reg_zcr, boot->reg_zcr);
@@ -1298,10 +1308,14 @@  void update_cpu_features(int cpu,
 		/* Probe vector lengths */
 		if (!system_capabilities_finalized())
 			vec_update_vq_map(ARM64_VEC_SVE);
+
+		cpacr_restore(cpacr);
 	}
 
 	if (IS_ENABLED(CONFIG_ARM64_SME) &&
 	    id_aa64pfr1_sme(read_sanitised_ftr_reg(SYS_ID_AA64PFR1_EL1))) {
+		unsigned long cpacr = cpacr_save_enable_kernel_sme();
+
 		info->reg_smcr = read_smcr_features();
 		/*
 		 * We mask out SMPS since even if the hardware
@@ -1315,6 +1329,8 @@  void update_cpu_features(int cpu,
 		/* Probe vector lengths */
 		if (!system_capabilities_finalized())
 			vec_update_vq_map(ARM64_VEC_SME);
+
+		cpacr_restore(cpacr);
 	}
 
 	/*
@@ -3174,6 +3190,8 @@  static void verify_local_elf_hwcaps(void)
 
 static void verify_sve_features(void)
 {
+	unsigned long cpacr = cpacr_save_enable_kernel_sve();
+
 	u64 safe_zcr = read_sanitised_ftr_reg(SYS_ZCR_EL1);
 	u64 zcr = read_zcr_features();
 
@@ -3186,11 +3204,13 @@  static void verify_sve_features(void)
 		cpu_die_early();
 	}
 
-	/* Add checks on other ZCR bits here if necessary */
+	cpacr_restore(cpacr);
 }
 
 static void verify_sme_features(void)
 {
+	unsigned long cpacr = cpacr_save_enable_kernel_sme();
+
 	u64 safe_smcr = read_sanitised_ftr_reg(SYS_SMCR_EL1);
 	u64 smcr = read_smcr_features();
 
@@ -3203,7 +3223,7 @@  static void verify_sme_features(void)
 		cpu_die_early();
 	}
 
-	/* Add checks on other SMCR bits here if necessary */
+	cpacr_restore(cpacr);
 }
 
 static void verify_hyp_capabilities(void)
diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index 91e44ac7150f9..601b973f90ad2 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -1174,7 +1174,7 @@  void sve_kernel_enable(const struct arm64_cpu_capabilities *__always_unused p)
  * Read the pseudo-ZCR used by cpufeatures to identify the supported SVE
  * vector length.
  *
- * Use only if SVE is present.
+ * Use only if SVE is present and enabled.
  * This function clobbers the SVE vector length.
  */
 u64 read_zcr_features(void)
@@ -1183,7 +1183,6 @@  u64 read_zcr_features(void)
 	 * Set the maximum possible VL, and write zeroes to all other
 	 * bits to see if they stick.
 	 */
-	sve_kernel_enable(NULL);
 	write_sysreg_s(ZCR_ELx_LEN_MASK, SYS_ZCR_EL1);
 
 	/* Return LEN value that would be written to get the maximum VL */
@@ -1337,13 +1336,11 @@  void fa64_kernel_enable(const struct arm64_cpu_capabilities *__always_unused p)
  * Read the pseudo-SMCR used by cpufeatures to identify the supported
  * vector length.
  *
- * Use only if SME is present.
+ * Use only if SME is present and enabled.
  * This function clobbers the SME vector length.
  */
 u64 read_smcr_features(void)
 {
-	sme_kernel_enable(NULL);
-
 	/*
 	 * Set the maximum possible VL.
 	 */