diff mbox series

[v7,4/6] arm64: cpufeature: Modify address authentication cpufeature to exact

Message ID 20200909140759.4170-5-amit.kachhap@arm.com (mailing list archive)
State New, archived
Headers show
Series arm64: add Armv8.3 pointer authentication enhancements | expand

Commit Message

Amit Daniel Kachhap Sept. 9, 2020, 2:07 p.m. UTC
The current address authentication cpufeature levels are set as LOWER_SAFE
which is not compatible with the different configurations added for Armv8.3
ptrauth enhancements as the different levels have different behaviour and
there is no tunable to enable the lower safe versions. This is rectified
by setting those cpufeature type as EXACT.

The current cpufeature framework also does not interfere in the booting of
non-exact secondary cpus but rather marks them as tainted. As a workaround
this is fixed by replacing the generic match handler with a new handler
specific to ptrauth.

After this change, if there is any variation in ptrauth configurations in
secondary cpus from boot cpu then those mismatched cpus are parked in an
infinite loop.

Following ptrauth crash log is oberserved in Arm fastmodel with mismatched
cpus without this fix,

 CPU features: SANITY CHECK: Unexpected variation in SYS_ID_AA64ISAR1_EL1. Boot CPU: 0x11111110211402, CPU4: 0x11111110211102
 CPU features: Unsupported CPU feature variation detected.
 GICv3: CPU4: found redistributor 100 region 0:0x000000002f180000
 CPU4: Booted secondary processor 0x0000000100 [0x410fd0f0]
 Unable to handle kernel paging request at virtual address bfff800010dadf3c
 Mem abort info:
   ESR = 0x86000004
   EC = 0x21: IABT (current EL), IL = 32 bits
   SET = 0, FnV = 0
   EA = 0, S1PTW = 0
 [bfff800010dadf3c] address between user and kernel address ranges
 Internal error: Oops: 86000004 [#1] PREEMPT SMP
 Modules linked in:
 CPU: 4 PID: 29 Comm: migration/4 Tainted: G S                5.8.0-rc4-00005-ge658591d66d1-dirty #158
 Hardware name: Foundation-v8A (DT)
 pstate: 60000089 (nZCv daIf -PAN -UAO BTYPE=--)
 pc : 0xbfff800010dadf3c
 lr : __schedule+0x2b4/0x5a8
 sp : ffff800012043d70
 x29: ffff800012043d70 x28: 0080000000000000
 x27: ffff800011cbe000 x26: ffff00087ad37580
 x25: ffff00087ad37000 x24: ffff800010de7d50
 x23: ffff800011674018 x22: 0784800010dae2a8
 x21: ffff00087ad37000 x20: ffff00087acb8000
 x19: ffff00087f742100 x18: 0000000000000030
 x17: 0000000000000000 x16: 0000000000000000
 x15: ffff800011ac1000 x14: 00000000000001bd
 x13: 0000000000000000 x12: 0000000000000000
 x11: 0000000000000000 x10: 71519a147ddfeb82
 x9 : 825d5ec0fb246314 x8 : ffff00087ad37dd8
 x7 : 0000000000000000 x6 : 00000000fffedb0e
 x5 : 00000000ffffffff x4 : 0000000000000000
 x3 : 0000000000000028 x2 : ffff80086e11e000
 x1 : ffff00087ad37000 x0 : ffff00087acdc600
 Call trace:
  0xbfff800010dadf3c
  schedule+0x78/0x110
  schedule_preempt_disabled+0x24/0x40
  __kthread_parkme+0x68/0xd0
  kthread+0x138/0x160
  ret_from_fork+0x10/0x34
 Code: bad PC value

Signed-off-by: Amit Daniel Kachhap <amit.kachhap@arm.com>
[Suzuki: Introduce new matching function for address authentication]
Suggested-by: Suzuki K Poulose <suzuki.poulose@arm.com>
Reviewed-by: Suzuki K Poulose <suzuki.poulose@arm.com>
---
 arch/arm64/kernel/cpufeature.c | 46 +++++++++++++++++++++++++++-------
 1 file changed, 37 insertions(+), 9 deletions(-)

Comments

Catalin Marinas Sept. 11, 2020, 5:24 p.m. UTC | #1
On Wed, Sep 09, 2020 at 07:37:57PM +0530, Amit Daniel Kachhap wrote:
> The current address authentication cpufeature levels are set as LOWER_SAFE
> which is not compatible with the different configurations added for Armv8.3
> ptrauth enhancements as the different levels have different behaviour and
> there is no tunable to enable the lower safe versions. This is rectified
> by setting those cpufeature type as EXACT.
> 
> The current cpufeature framework also does not interfere in the booting of
> non-exact secondary cpus but rather marks them as tainted. As a workaround
> this is fixed by replacing the generic match handler with a new handler
> specific to ptrauth.
> 
> After this change, if there is any variation in ptrauth configurations in
> secondary cpus from boot cpu then those mismatched cpus are parked in an
> infinite loop.
> 
> Following ptrauth crash log is oberserved in Arm fastmodel with mismatched
> cpus without this fix,
> 
>  CPU features: SANITY CHECK: Unexpected variation in SYS_ID_AA64ISAR1_EL1. Boot CPU: 0x11111110211402, CPU4: 0x11111110211102
>  CPU features: Unsupported CPU feature variation detected.
>  GICv3: CPU4: found redistributor 100 region 0:0x000000002f180000
>  CPU4: Booted secondary processor 0x0000000100 [0x410fd0f0]
>  Unable to handle kernel paging request at virtual address bfff800010dadf3c
>  Mem abort info:
>    ESR = 0x86000004
>    EC = 0x21: IABT (current EL), IL = 32 bits
>    SET = 0, FnV = 0
>    EA = 0, S1PTW = 0
>  [bfff800010dadf3c] address between user and kernel address ranges
>  Internal error: Oops: 86000004 [#1] PREEMPT SMP
>  Modules linked in:
>  CPU: 4 PID: 29 Comm: migration/4 Tainted: G S                5.8.0-rc4-00005-ge658591d66d1-dirty #158
>  Hardware name: Foundation-v8A (DT)
>  pstate: 60000089 (nZCv daIf -PAN -UAO BTYPE=--)
>  pc : 0xbfff800010dadf3c
>  lr : __schedule+0x2b4/0x5a8
>  sp : ffff800012043d70
>  x29: ffff800012043d70 x28: 0080000000000000
>  x27: ffff800011cbe000 x26: ffff00087ad37580
>  x25: ffff00087ad37000 x24: ffff800010de7d50
>  x23: ffff800011674018 x22: 0784800010dae2a8
>  x21: ffff00087ad37000 x20: ffff00087acb8000
>  x19: ffff00087f742100 x18: 0000000000000030
>  x17: 0000000000000000 x16: 0000000000000000
>  x15: ffff800011ac1000 x14: 00000000000001bd
>  x13: 0000000000000000 x12: 0000000000000000
>  x11: 0000000000000000 x10: 71519a147ddfeb82
>  x9 : 825d5ec0fb246314 x8 : ffff00087ad37dd8
>  x7 : 0000000000000000 x6 : 00000000fffedb0e
>  x5 : 00000000ffffffff x4 : 0000000000000000
>  x3 : 0000000000000028 x2 : ffff80086e11e000
>  x1 : ffff00087ad37000 x0 : ffff00087acdc600
>  Call trace:
>   0xbfff800010dadf3c
>   schedule+0x78/0x110
>   schedule_preempt_disabled+0x24/0x40
>   __kthread_parkme+0x68/0xd0
>   kthread+0x138/0x160
>   ret_from_fork+0x10/0x34
>  Code: bad PC value

That's what FTR_EXACT gives us. The variation above is in the field at
bit position 8 (API_SHIFT) with the boot CPU value of 4 and the
secondary CPU of 1, if I read it correctly.

Would it be better if the incompatible CPUs are just parked? I'm trying
to figure out from the verify_local_cpu_caps() code whether that's
possible. I don't fully understand why we don't trigger the "Detected
conflict for capability" message instead.

My reading of the code is that we set the system capability based on the
boot CPU since it's a BOOT_CPU_FEATURE. has_address_auth_cpucap() should
return false with this mismatch and verify_local_cpu_caps() would park
the CPU. Once parked, it shouldn't reach the sanity check since
cpuinfo_store_cpu() is called after check_local_cpu_capabilities() (and
the match function).

> diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
> index 6424584be01e..4bb3f2b2ffed 100644
> --- a/arch/arm64/kernel/cpufeature.c
> +++ b/arch/arm64/kernel/cpufeature.c
> @@ -197,9 +197,9 @@ static const struct arm64_ftr_bits ftr_id_aa64isar1[] = {
>  	ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_FCMA_SHIFT, 4, 0),
>  	ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_JSCVT_SHIFT, 4, 0),
>  	ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_PTR_AUTH),
> -		       FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_API_SHIFT, 4, 0),
> +		       FTR_STRICT, FTR_EXACT, ID_AA64ISAR1_API_SHIFT, 4, 0),
>  	ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_PTR_AUTH),
> -		       FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_APA_SHIFT, 4, 0),
> +		       FTR_STRICT, FTR_EXACT, ID_AA64ISAR1_APA_SHIFT, 4, 0),

So we go for FTR_EXACT here with a safe value of 0. It makes sense.
IIUC, in case of a mismatch, we end up with the sanitised register field
as 0.

>  	ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_DPB_SHIFT, 4, 0),
>  	ARM64_FTR_END,
>  };
> @@ -1648,11 +1648,39 @@ static void cpu_clear_disr(const struct arm64_cpu_capabilities *__unused)
>  #endif /* CONFIG_ARM64_RAS_EXTN */
>  
>  #ifdef CONFIG_ARM64_PTR_AUTH
> -static bool has_address_auth(const struct arm64_cpu_capabilities *entry,
> -			     int __unused)
> +static bool has_address_auth_cpucap(const struct arm64_cpu_capabilities *entry, int scope)
>  {
> -	return __system_matches_cap(ARM64_HAS_ADDRESS_AUTH_ARCH) ||
> -	       __system_matches_cap(ARM64_HAS_ADDRESS_AUTH_IMP_DEF);
> +	int boot_val, sec_val;
> +
> +	/* We don't expect to be called with SCOPE_SYSTEM */
> +	WARN_ON(scope == SCOPE_SYSTEM);
> +	/*
> +	 * The ptr-auth feature levels are not intercompatible with lower
> +	 * levels. Hence we must match ptr-auth feature level of the secondary
> +	 * CPUs with that of the boot CPU. The level of boot cpu is fetched
> +	 * from the sanitised register whereas direct register read is done for
> +	 * the secondary CPUs.
> +	 * The sanitised feature state is guaranteed to match that of the
> +	 * boot CPU as a mismatched secondary CPU is parked before it gets
> +	 * a chance to update the state, with the capability.
> +	 */
> +	boot_val = cpuid_feature_extract_field(read_sanitised_ftr_reg(entry->sys_reg),
> +					       entry->field_pos, entry->sign);
> +	if (scope & SCOPE_BOOT_CPU) {
> +		return boot_val >= entry->min_field_value;
> +	} else if (scope & SCOPE_LOCAL_CPU) {

Nitpick: just drop the else as we had a return already above. We could
also drop the SCOPE_LOCAL_CPU check since that's the only one left.

> +		sec_val = cpuid_feature_extract_field(__read_sysreg_by_encoding(entry->sys_reg),
> +						      entry->field_pos, entry->sign);
> +		return (sec_val >= entry->min_field_value) && (sec_val == boot_val);

Another nitpick: do you still need the sec_val >= ... if you check for
sec_val == boot_val? boot_val was already checked against
min_field_value. That's unless the sanitised reg is updated and boot_val
above is 0 on subsequent CPUs. This could only happen if we don't park a
CPU that has a mismatch and it ends up updating the sysreg. AFAICT, for
a secondary CPU, we first check the features then we update the
sanitised regs.
Amit Daniel Kachhap Sept. 14, 2020, 6:54 a.m. UTC | #2
On 9/11/20 10:54 PM, Catalin Marinas wrote:
> On Wed, Sep 09, 2020 at 07:37:57PM +0530, Amit Daniel Kachhap wrote:
>> The current address authentication cpufeature levels are set as LOWER_SAFE
>> which is not compatible with the different configurations added for Armv8.3
>> ptrauth enhancements as the different levels have different behaviour and
>> there is no tunable to enable the lower safe versions. This is rectified
>> by setting those cpufeature type as EXACT.
>>
>> The current cpufeature framework also does not interfere in the booting of
>> non-exact secondary cpus but rather marks them as tainted. As a workaround
>> this is fixed by replacing the generic match handler with a new handler
>> specific to ptrauth.
>>
>> After this change, if there is any variation in ptrauth configurations in
>> secondary cpus from boot cpu then those mismatched cpus are parked in an
>> infinite loop.
>>
>> Following ptrauth crash log is oberserved in Arm fastmodel with mismatched
>> cpus without this fix,
>>
>>   CPU features: SANITY CHECK: Unexpected variation in SYS_ID_AA64ISAR1_EL1. Boot CPU: 0x11111110211402, CPU4: 0x11111110211102
>>   CPU features: Unsupported CPU feature variation detected.
>>   GICv3: CPU4: found redistributor 100 region 0:0x000000002f180000
>>   CPU4: Booted secondary processor 0x0000000100 [0x410fd0f0]
>>   Unable to handle kernel paging request at virtual address bfff800010dadf3c
>>   Mem abort info:
>>     ESR = 0x86000004
>>     EC = 0x21: IABT (current EL), IL = 32 bits
>>     SET = 0, FnV = 0
>>     EA = 0, S1PTW = 0
>>   [bfff800010dadf3c] address between user and kernel address ranges
>>   Internal error: Oops: 86000004 [#1] PREEMPT SMP
>>   Modules linked in:
>>   CPU: 4 PID: 29 Comm: migration/4 Tainted: G S                5.8.0-rc4-00005-ge658591d66d1-dirty #158
>>   Hardware name: Foundation-v8A (DT)
>>   pstate: 60000089 (nZCv daIf -PAN -UAO BTYPE=--)
>>   pc : 0xbfff800010dadf3c
>>   lr : __schedule+0x2b4/0x5a8
>>   sp : ffff800012043d70
>>   x29: ffff800012043d70 x28: 0080000000000000
>>   x27: ffff800011cbe000 x26: ffff00087ad37580
>>   x25: ffff00087ad37000 x24: ffff800010de7d50
>>   x23: ffff800011674018 x22: 0784800010dae2a8
>>   x21: ffff00087ad37000 x20: ffff00087acb8000
>>   x19: ffff00087f742100 x18: 0000000000000030
>>   x17: 0000000000000000 x16: 0000000000000000
>>   x15: ffff800011ac1000 x14: 00000000000001bd
>>   x13: 0000000000000000 x12: 0000000000000000
>>   x11: 0000000000000000 x10: 71519a147ddfeb82
>>   x9 : 825d5ec0fb246314 x8 : ffff00087ad37dd8
>>   x7 : 0000000000000000 x6 : 00000000fffedb0e
>>   x5 : 00000000ffffffff x4 : 0000000000000000
>>   x3 : 0000000000000028 x2 : ffff80086e11e000
>>   x1 : ffff00087ad37000 x0 : ffff00087acdc600
>>   Call trace:
>>    0xbfff800010dadf3c
>>    schedule+0x78/0x110
>>    schedule_preempt_disabled+0x24/0x40
>>    __kthread_parkme+0x68/0xd0
>>    kthread+0x138/0x160
>>    ret_from_fork+0x10/0x34
>>   Code: bad PC value
> 
> That's what FTR_EXACT gives us. The variation above is in the field at
> bit position 8 (API_SHIFT) with the boot CPU value of 4 and the
> secondary CPU of 1, if I read it correctly.

Yes
> 
> Would it be better if the incompatible CPUs are just parked? I'm trying
> to figure out from the verify_local_cpu_caps() code whether that's
> possible. I don't fully understand why we don't trigger the "Detected
> conflict for capability" message instead.

The above ptrauth crash appears when this fix patch is not present and
with this fix present, cpu4 is actually parked as,

[    0.098833] CPU features: CPU4: Detected conflict for capability 39 
(Address authentication (IMP DEF algorithm)), System: 1, CPU: 0
[    0.098833] CPU4: will not boot

> 
> My reading of the code is that we set the system capability based on the
> boot CPU since it's a BOOT_CPU_FEATURE. has_address_auth_cpucap() should
> return false with this mismatch and verify_local_cpu_caps() would park
> the CPU. Once parked, it shouldn't reach the sanity check since
> cpuinfo_store_cpu() is called after check_local_cpu_capabilities() (and
> the match function).

Yes.
> 
>> diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
>> index 6424584be01e..4bb3f2b2ffed 100644
>> --- a/arch/arm64/kernel/cpufeature.c
>> +++ b/arch/arm64/kernel/cpufeature.c
>> @@ -197,9 +197,9 @@ static const struct arm64_ftr_bits ftr_id_aa64isar1[] = {
>>   	ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_FCMA_SHIFT, 4, 0),
>>   	ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_JSCVT_SHIFT, 4, 0),
>>   	ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_PTR_AUTH),
>> -		       FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_API_SHIFT, 4, 0),
>> +		       FTR_STRICT, FTR_EXACT, ID_AA64ISAR1_API_SHIFT, 4, 0),
>>   	ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_PTR_AUTH),
>> -		       FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_APA_SHIFT, 4, 0),
>> +		       FTR_STRICT, FTR_EXACT, ID_AA64ISAR1_APA_SHIFT, 4, 0),
> 
> So we go for FTR_EXACT here with a safe value of 0. It makes sense.
> IIUC, in case of a mismatch, we end up with the sanitised register field
> as 0.
> 
>>   	ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_DPB_SHIFT, 4, 0),
>>   	ARM64_FTR_END,
>>   };
>> @@ -1648,11 +1648,39 @@ static void cpu_clear_disr(const struct arm64_cpu_capabilities *__unused)
>>   #endif /* CONFIG_ARM64_RAS_EXTN */
>>   
>>   #ifdef CONFIG_ARM64_PTR_AUTH
>> -static bool has_address_auth(const struct arm64_cpu_capabilities *entry,
>> -			     int __unused)
>> +static bool has_address_auth_cpucap(const struct arm64_cpu_capabilities *entry, int scope)
>>   {
>> -	return __system_matches_cap(ARM64_HAS_ADDRESS_AUTH_ARCH) ||
>> -	       __system_matches_cap(ARM64_HAS_ADDRESS_AUTH_IMP_DEF);
>> +	int boot_val, sec_val;
>> +
>> +	/* We don't expect to be called with SCOPE_SYSTEM */
>> +	WARN_ON(scope == SCOPE_SYSTEM);
>> +	/*
>> +	 * The ptr-auth feature levels are not intercompatible with lower
>> +	 * levels. Hence we must match ptr-auth feature level of the secondary
>> +	 * CPUs with that of the boot CPU. The level of boot cpu is fetched
>> +	 * from the sanitised register whereas direct register read is done for
>> +	 * the secondary CPUs.
>> +	 * The sanitised feature state is guaranteed to match that of the
>> +	 * boot CPU as a mismatched secondary CPU is parked before it gets
>> +	 * a chance to update the state, with the capability.
>> +	 */
>> +	boot_val = cpuid_feature_extract_field(read_sanitised_ftr_reg(entry->sys_reg),
>> +					       entry->field_pos, entry->sign);
>> +	if (scope & SCOPE_BOOT_CPU) {
>> +		return boot_val >= entry->min_field_value;
>> +	} else if (scope & SCOPE_LOCAL_CPU) {
> 
> Nitpick: just drop the else as we had a return already above. We could
> also drop the SCOPE_LOCAL_CPU check since that's the only one left.

Ok I will update it in v8 version.
> 
>> +		sec_val = cpuid_feature_extract_field(__read_sysreg_by_encoding(entry->sys_reg),
>> +						      entry->field_pos, entry->sign);
>> +		return (sec_val >= entry->min_field_value) && (sec_val == boot_val);
> 
> Another nitpick: do you still need the sec_val >= ... if you check for
> sec_val == boot_val? boot_val was already checked against
> min_field_value. That's unless the sanitised reg is updated and boot_val
> above is 0 on subsequent CPUs. This could only happen if we don't park a
> CPU that has a mismatch and it ends up updating the sysreg. AFAICT, for
> a secondary CPU, we first check the features then we update the
> sanitised regs.

Yes agreed. I will update it in v8 version.
>
Catalin Marinas Sept. 14, 2020, 10:04 a.m. UTC | #3
On Mon, Sep 14, 2020 at 12:24:23PM +0530, Amit Kachhap wrote:
> On 9/11/20 10:54 PM, Catalin Marinas wrote:
> > On Wed, Sep 09, 2020 at 07:37:57PM +0530, Amit Daniel Kachhap wrote:
> > > The current address authentication cpufeature levels are set as LOWER_SAFE
> > > which is not compatible with the different configurations added for Armv8.3
> > > ptrauth enhancements as the different levels have different behaviour and
> > > there is no tunable to enable the lower safe versions. This is rectified
> > > by setting those cpufeature type as EXACT.
> > > 
> > > The current cpufeature framework also does not interfere in the booting of
> > > non-exact secondary cpus but rather marks them as tainted. As a workaround
> > > this is fixed by replacing the generic match handler with a new handler
> > > specific to ptrauth.
> > > 
> > > After this change, if there is any variation in ptrauth configurations in
> > > secondary cpus from boot cpu then those mismatched cpus are parked in an
> > > infinite loop.
> > > 
> > > Following ptrauth crash log is oberserved in Arm fastmodel with mismatched
> > > cpus without this fix,
> > > 
> > >   CPU features: SANITY CHECK: Unexpected variation in SYS_ID_AA64ISAR1_EL1. Boot CPU: 0x11111110211402, CPU4: 0x11111110211102
> > >   CPU features: Unsupported CPU feature variation detected.
> > >   GICv3: CPU4: found redistributor 100 region 0:0x000000002f180000
> > >   CPU4: Booted secondary processor 0x0000000100 [0x410fd0f0]
> > >   Unable to handle kernel paging request at virtual address bfff800010dadf3c
> > >   Mem abort info:
> > >     ESR = 0x86000004
> > >     EC = 0x21: IABT (current EL), IL = 32 bits
> > >     SET = 0, FnV = 0
> > >     EA = 0, S1PTW = 0
> > >   [bfff800010dadf3c] address between user and kernel address ranges
> > >   Internal error: Oops: 86000004 [#1] PREEMPT SMP
> > >   Modules linked in:
> > >   CPU: 4 PID: 29 Comm: migration/4 Tainted: G S                5.8.0-rc4-00005-ge658591d66d1-dirty #158
> > >   Hardware name: Foundation-v8A (DT)
> > >   pstate: 60000089 (nZCv daIf -PAN -UAO BTYPE=--)
> > >   pc : 0xbfff800010dadf3c
> > >   lr : __schedule+0x2b4/0x5a8
> > >   sp : ffff800012043d70
> > >   x29: ffff800012043d70 x28: 0080000000000000
> > >   x27: ffff800011cbe000 x26: ffff00087ad37580
> > >   x25: ffff00087ad37000 x24: ffff800010de7d50
> > >   x23: ffff800011674018 x22: 0784800010dae2a8
> > >   x21: ffff00087ad37000 x20: ffff00087acb8000
> > >   x19: ffff00087f742100 x18: 0000000000000030
> > >   x17: 0000000000000000 x16: 0000000000000000
> > >   x15: ffff800011ac1000 x14: 00000000000001bd
> > >   x13: 0000000000000000 x12: 0000000000000000
> > >   x11: 0000000000000000 x10: 71519a147ddfeb82
> > >   x9 : 825d5ec0fb246314 x8 : ffff00087ad37dd8
> > >   x7 : 0000000000000000 x6 : 00000000fffedb0e
> > >   x5 : 00000000ffffffff x4 : 0000000000000000
> > >   x3 : 0000000000000028 x2 : ffff80086e11e000
> > >   x1 : ffff00087ad37000 x0 : ffff00087acdc600
> > >   Call trace:
> > >    0xbfff800010dadf3c
> > >    schedule+0x78/0x110
> > >    schedule_preempt_disabled+0x24/0x40
> > >    __kthread_parkme+0x68/0xd0
> > >    kthread+0x138/0x160
> > >    ret_from_fork+0x10/0x34
> > >   Code: bad PC value
> > 
> > That's what FTR_EXACT gives us. The variation above is in the field at
> > bit position 8 (API_SHIFT) with the boot CPU value of 4 and the
> > secondary CPU of 1, if I read it correctly.
> 
> Yes
> 
> > Would it be better if the incompatible CPUs are just parked? I'm trying
> > to figure out from the verify_local_cpu_caps() code whether that's
> > possible. I don't fully understand why we don't trigger the "Detected
> > conflict for capability" message instead.
> 
> The above ptrauth crash appears when this fix patch is not present and
> with this fix present, cpu4 is actually parked as,
> 
> [    0.098833] CPU features: CPU4: Detected conflict for capability 39
> (Address authentication (IMP DEF algorithm)), System: 1, CPU: 0
> [    0.098833] CPU4: will not boot

Ah, should have read the commit log properly. Thanks for the
clarification.
diff mbox series

Patch

diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 6424584be01e..4bb3f2b2ffed 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -197,9 +197,9 @@  static const struct arm64_ftr_bits ftr_id_aa64isar1[] = {
 	ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_FCMA_SHIFT, 4, 0),
 	ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_JSCVT_SHIFT, 4, 0),
 	ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_PTR_AUTH),
-		       FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_API_SHIFT, 4, 0),
+		       FTR_STRICT, FTR_EXACT, ID_AA64ISAR1_API_SHIFT, 4, 0),
 	ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_PTR_AUTH),
-		       FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_APA_SHIFT, 4, 0),
+		       FTR_STRICT, FTR_EXACT, ID_AA64ISAR1_APA_SHIFT, 4, 0),
 	ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_DPB_SHIFT, 4, 0),
 	ARM64_FTR_END,
 };
@@ -1648,11 +1648,39 @@  static void cpu_clear_disr(const struct arm64_cpu_capabilities *__unused)
 #endif /* CONFIG_ARM64_RAS_EXTN */
 
 #ifdef CONFIG_ARM64_PTR_AUTH
-static bool has_address_auth(const struct arm64_cpu_capabilities *entry,
-			     int __unused)
+static bool has_address_auth_cpucap(const struct arm64_cpu_capabilities *entry, int scope)
 {
-	return __system_matches_cap(ARM64_HAS_ADDRESS_AUTH_ARCH) ||
-	       __system_matches_cap(ARM64_HAS_ADDRESS_AUTH_IMP_DEF);
+	int boot_val, sec_val;
+
+	/* We don't expect to be called with SCOPE_SYSTEM */
+	WARN_ON(scope == SCOPE_SYSTEM);
+	/*
+	 * The ptr-auth feature levels are not intercompatible with lower
+	 * levels. Hence we must match ptr-auth feature level of the secondary
+	 * CPUs with that of the boot CPU. The level of boot cpu is fetched
+	 * from the sanitised register whereas direct register read is done for
+	 * the secondary CPUs.
+	 * The sanitised feature state is guaranteed to match that of the
+	 * boot CPU as a mismatched secondary CPU is parked before it gets
+	 * a chance to update the state, with the capability.
+	 */
+	boot_val = cpuid_feature_extract_field(read_sanitised_ftr_reg(entry->sys_reg),
+					       entry->field_pos, entry->sign);
+	if (scope & SCOPE_BOOT_CPU) {
+		return boot_val >= entry->min_field_value;
+	} else if (scope & SCOPE_LOCAL_CPU) {
+		sec_val = cpuid_feature_extract_field(__read_sysreg_by_encoding(entry->sys_reg),
+						      entry->field_pos, entry->sign);
+		return (sec_val >= entry->min_field_value) && (sec_val == boot_val);
+	}
+	return false;
+}
+
+static bool has_address_auth_metacap(const struct arm64_cpu_capabilities *entry,
+				     int scope)
+{
+	return has_address_auth_cpucap(cpu_hwcaps_ptrs[ARM64_HAS_ADDRESS_AUTH_ARCH], scope) ||
+	       has_address_auth_cpucap(cpu_hwcaps_ptrs[ARM64_HAS_ADDRESS_AUTH_IMP_DEF], scope);
 }
 
 static bool has_generic_auth(const struct arm64_cpu_capabilities *entry,
@@ -2021,7 +2049,7 @@  static const struct arm64_cpu_capabilities arm64_features[] = {
 		.sign = FTR_UNSIGNED,
 		.field_pos = ID_AA64ISAR1_APA_SHIFT,
 		.min_field_value = ID_AA64ISAR1_APA_ARCHITECTED,
-		.matches = has_cpuid_feature,
+		.matches = has_address_auth_cpucap,
 	},
 	{
 		.desc = "Address authentication (IMP DEF algorithm)",
@@ -2031,12 +2059,12 @@  static const struct arm64_cpu_capabilities arm64_features[] = {
 		.sign = FTR_UNSIGNED,
 		.field_pos = ID_AA64ISAR1_API_SHIFT,
 		.min_field_value = ID_AA64ISAR1_API_IMP_DEF,
-		.matches = has_cpuid_feature,
+		.matches = has_address_auth_cpucap,
 	},
 	{
 		.capability = ARM64_HAS_ADDRESS_AUTH,
 		.type = ARM64_CPUCAP_BOOT_CPU_FEATURE,
-		.matches = has_address_auth,
+		.matches = has_address_auth_metacap,
 	},
 	{
 		.desc = "Generic authentication (architected algorithm)",