diff mbox series

arm64: Add ARM64_HAS_LSE2 CPU capability

Message ID 20240906090812.249473-1-tiantao6@hisilicon.com (mailing list archive)
State New, archived
Headers show
Series arm64: Add ARM64_HAS_LSE2 CPU capability | expand

Commit Message

tiantao (H) Sept. 6, 2024, 9:08 a.m. UTC
When FEAT_LSE2 is implemented and Bit 6 of sctlr_elx is nAA, the
full name of the Not-aligned access. nAA bit has two values:
0b0 Unaligned accesses by the specified instructions generate an
Alignment fault.
0b1 Unaligned accesses by the specified instructions do not generate
an Alignment fault.

this patch sets the nAA bit to 1,The following instructions will not
generate an Alignment fault if all bytes being accessed are not within
a single 16-byte quantity:
• LDAPR, LDAPRH, LDAPUR, LDAPURH, LDAPURSH, LDAPURSW, LDAR, LDARH,LDLAR,
LDLARH.
• STLLR, STLLRH, STLR, STLRH, STLUR, and STLURH

Signed-off-by: Tian Tao <tiantao6@hisilicon.com>
---
 arch/arm64/Kconfig              | 10 ++++++++++
 arch/arm64/include/asm/sysreg.h |  1 +
 arch/arm64/kernel/cpufeature.c  | 18 ++++++++++++++++++
 arch/arm64/tools/cpucaps        |  1 +
 4 files changed, 30 insertions(+)

Comments

Mark Rutland Sept. 6, 2024, 9:44 a.m. UTC | #1
On Fri, Sep 06, 2024 at 05:08:12PM +0800, Tian Tao wrote:
> When FEAT_LSE2 is implemented and Bit 6 of sctlr_elx is nAA, the
> full name of the Not-aligned access. nAA bit has two values:
> 0b0 Unaligned accesses by the specified instructions generate an
> Alignment fault.
> 0b1 Unaligned accesses by the specified instructions do not generate
> an Alignment fault.
> 
> this patch sets the nAA bit to 1,The following instructions will not
> generate an Alignment fault if all bytes being accessed are not within
> a single 16-byte quantity:
> • LDAPR, LDAPRH, LDAPUR, LDAPURH, LDAPURSH, LDAPURSW, LDAR, LDARH,LDLAR,
> LDLARH.
> • STLLR, STLLRH, STLR, STLRH, STLUR, and STLURH
> 
> Signed-off-by: Tian Tao <tiantao6@hisilicon.com>

What is going to depend on this? Nothing in the kernel depends on being
able to make unaligned accesses with these instructions, and (since you
haven't added a HWCAP), userspace has no idea that these accesses won't
generate an alignment fault.

Mark.

> ---
>  arch/arm64/Kconfig              | 10 ++++++++++
>  arch/arm64/include/asm/sysreg.h |  1 +
>  arch/arm64/kernel/cpufeature.c  | 18 ++++++++++++++++++
>  arch/arm64/tools/cpucaps        |  1 +
>  4 files changed, 30 insertions(+)
> 
> diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
> index 77d7ef0b16c2..7afe73ebcd79 100644
> --- a/arch/arm64/Kconfig
> +++ b/arch/arm64/Kconfig
> @@ -2023,6 +2023,16 @@ config ARM64_TLB_RANGE
>  	  The feature introduces new assembly instructions, and they were
>  	  support when binutils >= 2.30.
>  
> +config ARM64_LSE2_NAA
> +	bool "Enable support for not-aligned access"
> +	depends on AS_HAS_ARMV8_4
> +	help
> +	 LSE2 is an extension to the original LSE (Large System Extensions) feature,
> +	 introduced in ARMv8.4.
> +
> +	 Enable this feature will not generate an Alignment fault if all bytes being
> +	 accessed are not within a single 16-byte quantity.
> +
>  endmenu # "ARMv8.4 architectural features"
>  
>  menu "ARMv8.5 architectural features"
> diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
> index 8cced8aa75a9..42e3a1959aa8 100644
> --- a/arch/arm64/include/asm/sysreg.h
> +++ b/arch/arm64/include/asm/sysreg.h
> @@ -854,6 +854,7 @@
>  #define SCTLR_ELx_ENDB	 (BIT(13))
>  #define SCTLR_ELx_I	 (BIT(12))
>  #define SCTLR_ELx_EOS	 (BIT(11))
> +#define SCTLR_ELx_nAA    (BIT(6))
>  #define SCTLR_ELx_SA	 (BIT(3))
>  #define SCTLR_ELx_C	 (BIT(2))
>  #define SCTLR_ELx_A	 (BIT(1))
> diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
> index 646ecd3069fd..558869a7c7f0 100644
> --- a/arch/arm64/kernel/cpufeature.c
> +++ b/arch/arm64/kernel/cpufeature.c
> @@ -2299,6 +2299,14 @@ static void cpu_enable_mte(struct arm64_cpu_capabilities const *cap)
>  }
>  #endif /* CONFIG_ARM64_MTE */
>  
> +#ifdef CONFIG_ARM64_LSE2_NAA
> +static void cpu_enable_lse2(const struct arm64_cpu_capabilities *__unused)
> +{
> +	sysreg_clear_set(sctlr_el2, SCTLR_ELx_nAA, SCTLR_ELx_nAA);
> +	isb();
> +}
> +#endif
> +
>  static void user_feature_fixup(void)
>  {
>  	if (cpus_have_cap(ARM64_WORKAROUND_2658417)) {
> @@ -2427,6 +2435,16 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
>  		ARM64_CPUID_FIELDS(ID_AA64ISAR0_EL1, ATOMIC, IMP)
>  	},
>  #endif /* CONFIG_ARM64_LSE_ATOMICS */
> +#ifdef CONFIG_ARM64_LSE2_NAA
> +	{
> +		.desc = "Support for not-aligned access",
> +		.capability = ARM64_HAS_LSE2,
> +		.type = ARM64_CPUCAP_SYSTEM_FEATURE,
> +		.matches = has_cpuid_feature,
> +		.cpu_enable = cpu_enable_lse2,
> +		ARM64_CPUID_FIELDS(ID_AA64MMFR2_EL1, AT, IMP)
> +	},
> +#endif
>  	{
>  		.desc = "Virtualization Host Extensions",
>  		.capability = ARM64_HAS_VIRT_HOST_EXTN,
> diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps
> index ac3429d892b9..0c7c0a293574 100644
> --- a/arch/arm64/tools/cpucaps
> +++ b/arch/arm64/tools/cpucaps
> @@ -41,6 +41,7 @@ HAS_HCX
>  HAS_LDAPR
>  HAS_LPA2
>  HAS_LSE_ATOMICS
> +HAS_LSE2
>  HAS_MOPS
>  HAS_NESTED_VIRT
>  HAS_PAN
> -- 
> 2.33.0
> 
>
tiantao (H) Sept. 6, 2024, 10:58 a.m. UTC | #2
在 2024/9/6 17:44, Mark Rutland 写道:
> On Fri, Sep 06, 2024 at 05:08:12PM +0800, Tian Tao wrote:
>> When FEAT_LSE2 is implemented and Bit 6 of sctlr_elx is nAA, the
>> full name of the Not-aligned access. nAA bit has two values:
>> 0b0 Unaligned accesses by the specified instructions generate an
>> Alignment fault.
>> 0b1 Unaligned accesses by the specified instructions do not generate
>> an Alignment fault.
>>
>> this patch sets the nAA bit to 1,The following instructions will not
>> generate an Alignment fault if all bytes being accessed are not within
>> a single 16-byte quantity:
>> • LDAPR, LDAPRH, LDAPUR, LDAPURH, LDAPURSH, LDAPURSW, LDAR, LDARH,LDLAR,
>> LDLARH.
>> • STLLR, STLLRH, STLR, STLRH, STLUR, and STLURH
>>
>> Signed-off-by: Tian Tao <tiantao6@hisilicon.com>
> What is going to depend on this? Nothing in the kernel depends on being
> able to make unaligned accesses with these instructions, and (since you
> haven't added a HWCAP), userspace has no idea that these accesses won't
> generate an alignment fault.
>
> Mark.

I've come across a situation where the simplified code is as follows:

  long  address = (long) mmap(NULL,1024*1024*2,PROT_READ|PROT_WRITE, 
MAP_PRIVATE|MAP_ANONYMOUS,-1,0);

long new_address = address + 9;

  long *p = (long*) new_address;
  long v = -1;

  __atomic_store(p, &v, __ATOMIC_RELEASE);


coredump occurs after executing __atomic_store, but the user code can't 
be changed,

so I'm trying to enable NAA to solve this problem.

>> ---
>>   arch/arm64/Kconfig              | 10 ++++++++++
>>   arch/arm64/include/asm/sysreg.h |  1 +
>>   arch/arm64/kernel/cpufeature.c  | 18 ++++++++++++++++++
>>   arch/arm64/tools/cpucaps        |  1 +
>>   4 files changed, 30 insertions(+)
>>
>> diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
>> index 77d7ef0b16c2..7afe73ebcd79 100644
>> --- a/arch/arm64/Kconfig
>> +++ b/arch/arm64/Kconfig
>> @@ -2023,6 +2023,16 @@ config ARM64_TLB_RANGE
>>   	  The feature introduces new assembly instructions, and they were
>>   	  support when binutils >= 2.30.
>>   
>> +config ARM64_LSE2_NAA
>> +	bool "Enable support for not-aligned access"
>> +	depends on AS_HAS_ARMV8_4
>> +	help
>> +	 LSE2 is an extension to the original LSE (Large System Extensions) feature,
>> +	 introduced in ARMv8.4.
>> +
>> +	 Enable this feature will not generate an Alignment fault if all bytes being
>> +	 accessed are not within a single 16-byte quantity.
>> +
>>   endmenu # "ARMv8.4 architectural features"
>>   
>>   menu "ARMv8.5 architectural features"
>> diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
>> index 8cced8aa75a9..42e3a1959aa8 100644
>> --- a/arch/arm64/include/asm/sysreg.h
>> +++ b/arch/arm64/include/asm/sysreg.h
>> @@ -854,6 +854,7 @@
>>   #define SCTLR_ELx_ENDB	 (BIT(13))
>>   #define SCTLR_ELx_I	 (BIT(12))
>>   #define SCTLR_ELx_EOS	 (BIT(11))
>> +#define SCTLR_ELx_nAA    (BIT(6))
>>   #define SCTLR_ELx_SA	 (BIT(3))
>>   #define SCTLR_ELx_C	 (BIT(2))
>>   #define SCTLR_ELx_A	 (BIT(1))
>> diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
>> index 646ecd3069fd..558869a7c7f0 100644
>> --- a/arch/arm64/kernel/cpufeature.c
>> +++ b/arch/arm64/kernel/cpufeature.c
>> @@ -2299,6 +2299,14 @@ static void cpu_enable_mte(struct arm64_cpu_capabilities const *cap)
>>   }
>>   #endif /* CONFIG_ARM64_MTE */
>>   
>> +#ifdef CONFIG_ARM64_LSE2_NAA
>> +static void cpu_enable_lse2(const struct arm64_cpu_capabilities *__unused)
>> +{
>> +	sysreg_clear_set(sctlr_el2, SCTLR_ELx_nAA, SCTLR_ELx_nAA);
>> +	isb();
>> +}
>> +#endif
>> +
>>   static void user_feature_fixup(void)
>>   {
>>   	if (cpus_have_cap(ARM64_WORKAROUND_2658417)) {
>> @@ -2427,6 +2435,16 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
>>   		ARM64_CPUID_FIELDS(ID_AA64ISAR0_EL1, ATOMIC, IMP)
>>   	},
>>   #endif /* CONFIG_ARM64_LSE_ATOMICS */
>> +#ifdef CONFIG_ARM64_LSE2_NAA
>> +	{
>> +		.desc = "Support for not-aligned access",
>> +		.capability = ARM64_HAS_LSE2,
>> +		.type = ARM64_CPUCAP_SYSTEM_FEATURE,
>> +		.matches = has_cpuid_feature,
>> +		.cpu_enable = cpu_enable_lse2,
>> +		ARM64_CPUID_FIELDS(ID_AA64MMFR2_EL1, AT, IMP)
>> +	},
>> +#endif
>>   	{
>>   		.desc = "Virtualization Host Extensions",
>>   		.capability = ARM64_HAS_VIRT_HOST_EXTN,
>> diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps
>> index ac3429d892b9..0c7c0a293574 100644
>> --- a/arch/arm64/tools/cpucaps
>> +++ b/arch/arm64/tools/cpucaps
>> @@ -41,6 +41,7 @@ HAS_HCX
>>   HAS_LDAPR
>>   HAS_LPA2
>>   HAS_LSE_ATOMICS
>> +HAS_LSE2
>>   HAS_MOPS
>>   HAS_NESTED_VIRT
>>   HAS_PAN
>> -- 
>> 2.33.0
>>
>>
> .
>
Mark Rutland Sept. 6, 2024, 11:09 a.m. UTC | #3
On Fri, Sep 06, 2024 at 06:58:19PM +0800, tiantao (H) wrote:
> 
> 在 2024/9/6 17:44, Mark Rutland 写道:
> > On Fri, Sep 06, 2024 at 05:08:12PM +0800, Tian Tao wrote:
> > > When FEAT_LSE2 is implemented and Bit 6 of sctlr_elx is nAA, the
> > > full name of the Not-aligned access. nAA bit has two values:
> > > 0b0 Unaligned accesses by the specified instructions generate an
> > > Alignment fault.
> > > 0b1 Unaligned accesses by the specified instructions do not generate
> > > an Alignment fault.
> > > 
> > > this patch sets the nAA bit to 1,The following instructions will not
> > > generate an Alignment fault if all bytes being accessed are not within
> > > a single 16-byte quantity:
> > > • LDAPR, LDAPRH, LDAPUR, LDAPURH, LDAPURSH, LDAPURSW, LDAR, LDARH,LDLAR,
> > > LDLARH.
> > > • STLLR, STLLRH, STLR, STLRH, STLUR, and STLURH
> > > 
> > > Signed-off-by: Tian Tao <tiantao6@hisilicon.com>
> > What is going to depend on this? Nothing in the kernel depends on being
> > able to make unaligned accesses with these instructions, and (since you
> > haven't added a HWCAP), userspace has no idea that these accesses won't
> > generate an alignment fault.
> > 
> > Mark.
> 
> I've come across a situation where the simplified code is as follows:
> 
>  long  address = (long) mmap(NULL,1024*1024*2,PROT_READ|PROT_WRITE,
> MAP_PRIVATE|MAP_ANONYMOUS,-1,0);
> 
> long new_address = address + 9;
> 
>  long *p = (long*) new_address;
>  long v = -1;
> 
>  __atomic_store(p, &v, __ATOMIC_RELEASE);
> 
> coredump occurs after executing __atomic_store, but the user code can't be
> changed,

Where is that code and why can't it be changed? That code has never
worked and would always have generated a coredump, and even with this
patch it cannot work on hardware without LSE2.

> so I'm trying to enable NAA to solve this problem.

AFAICT that's making a kernel change to paper over a userspace bug.
As-is I don't think that's a good justification for changing nAA.

Mark.

> 
> > > ---
> > >   arch/arm64/Kconfig              | 10 ++++++++++
> > >   arch/arm64/include/asm/sysreg.h |  1 +
> > >   arch/arm64/kernel/cpufeature.c  | 18 ++++++++++++++++++
> > >   arch/arm64/tools/cpucaps        |  1 +
> > >   4 files changed, 30 insertions(+)
> > > 
> > > diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
> > > index 77d7ef0b16c2..7afe73ebcd79 100644
> > > --- a/arch/arm64/Kconfig
> > > +++ b/arch/arm64/Kconfig
> > > @@ -2023,6 +2023,16 @@ config ARM64_TLB_RANGE
> > >   	  The feature introduces new assembly instructions, and they were
> > >   	  support when binutils >= 2.30.
> > > +config ARM64_LSE2_NAA
> > > +	bool "Enable support for not-aligned access"
> > > +	depends on AS_HAS_ARMV8_4
> > > +	help
> > > +	 LSE2 is an extension to the original LSE (Large System Extensions) feature,
> > > +	 introduced in ARMv8.4.
> > > +
> > > +	 Enable this feature will not generate an Alignment fault if all bytes being
> > > +	 accessed are not within a single 16-byte quantity.
> > > +
> > >   endmenu # "ARMv8.4 architectural features"
> > >   menu "ARMv8.5 architectural features"
> > > diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
> > > index 8cced8aa75a9..42e3a1959aa8 100644
> > > --- a/arch/arm64/include/asm/sysreg.h
> > > +++ b/arch/arm64/include/asm/sysreg.h
> > > @@ -854,6 +854,7 @@
> > >   #define SCTLR_ELx_ENDB	 (BIT(13))
> > >   #define SCTLR_ELx_I	 (BIT(12))
> > >   #define SCTLR_ELx_EOS	 (BIT(11))
> > > +#define SCTLR_ELx_nAA    (BIT(6))
> > >   #define SCTLR_ELx_SA	 (BIT(3))
> > >   #define SCTLR_ELx_C	 (BIT(2))
> > >   #define SCTLR_ELx_A	 (BIT(1))
> > > diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
> > > index 646ecd3069fd..558869a7c7f0 100644
> > > --- a/arch/arm64/kernel/cpufeature.c
> > > +++ b/arch/arm64/kernel/cpufeature.c
> > > @@ -2299,6 +2299,14 @@ static void cpu_enable_mte(struct arm64_cpu_capabilities const *cap)
> > >   }
> > >   #endif /* CONFIG_ARM64_MTE */
> > > +#ifdef CONFIG_ARM64_LSE2_NAA
> > > +static void cpu_enable_lse2(const struct arm64_cpu_capabilities *__unused)
> > > +{
> > > +	sysreg_clear_set(sctlr_el2, SCTLR_ELx_nAA, SCTLR_ELx_nAA);
> > > +	isb();
> > > +}
> > > +#endif
> > > +
> > >   static void user_feature_fixup(void)
> > >   {
> > >   	if (cpus_have_cap(ARM64_WORKAROUND_2658417)) {
> > > @@ -2427,6 +2435,16 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
> > >   		ARM64_CPUID_FIELDS(ID_AA64ISAR0_EL1, ATOMIC, IMP)
> > >   	},
> > >   #endif /* CONFIG_ARM64_LSE_ATOMICS */
> > > +#ifdef CONFIG_ARM64_LSE2_NAA
> > > +	{
> > > +		.desc = "Support for not-aligned access",
> > > +		.capability = ARM64_HAS_LSE2,
> > > +		.type = ARM64_CPUCAP_SYSTEM_FEATURE,
> > > +		.matches = has_cpuid_feature,
> > > +		.cpu_enable = cpu_enable_lse2,
> > > +		ARM64_CPUID_FIELDS(ID_AA64MMFR2_EL1, AT, IMP)
> > > +	},
> > > +#endif
> > >   	{
> > >   		.desc = "Virtualization Host Extensions",
> > >   		.capability = ARM64_HAS_VIRT_HOST_EXTN,
> > > diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps
> > > index ac3429d892b9..0c7c0a293574 100644
> > > --- a/arch/arm64/tools/cpucaps
> > > +++ b/arch/arm64/tools/cpucaps
> > > @@ -41,6 +41,7 @@ HAS_HCX
> > >   HAS_LDAPR
> > >   HAS_LPA2
> > >   HAS_LSE_ATOMICS
> > > +HAS_LSE2
> > >   HAS_MOPS
> > >   HAS_NESTED_VIRT
> > >   HAS_PAN
> > > -- 
> > > 2.33.0
> > > 
> > > 
> > .
> >
tiantao (H) Sept. 6, 2024, 11:18 a.m. UTC | #4
在 2024/9/6 19:09, Mark Rutland 写道:
> On Fri, Sep 06, 2024 at 06:58:19PM +0800, tiantao (H) wrote:
>> 在 2024/9/6 17:44, Mark Rutland 写道:
>>> On Fri, Sep 06, 2024 at 05:08:12PM +0800, Tian Tao wrote:
>>>> When FEAT_LSE2 is implemented and Bit 6 of sctlr_elx is nAA, the
>>>> full name of the Not-aligned access. nAA bit has two values:
>>>> 0b0 Unaligned accesses by the specified instructions generate an
>>>> Alignment fault.
>>>> 0b1 Unaligned accesses by the specified instructions do not generate
>>>> an Alignment fault.
>>>>
>>>> this patch sets the nAA bit to 1,The following instructions will not
>>>> generate an Alignment fault if all bytes being accessed are not within
>>>> a single 16-byte quantity:
>>>> • LDAPR, LDAPRH, LDAPUR, LDAPURH, LDAPURSH, LDAPURSW, LDAR, LDARH,LDLAR,
>>>> LDLARH.
>>>> • STLLR, STLLRH, STLR, STLRH, STLUR, and STLURH
>>>>
>>>> Signed-off-by: Tian Tao <tiantao6@hisilicon.com>
>>> What is going to depend on this? Nothing in the kernel depends on being
>>> able to make unaligned accesses with these instructions, and (since you
>>> haven't added a HWCAP), userspace has no idea that these accesses won't
>>> generate an alignment fault.
>>>
>>> Mark.
>> I've come across a situation where the simplified code is as follows:
>>
>>   long  address = (long) mmap(NULL,1024*1024*2,PROT_READ|PROT_WRITE,
>> MAP_PRIVATE|MAP_ANONYMOUS,-1,0);
>>
>> long new_address = address + 9;
>>
>>   long *p = (long*) new_address;
>>   long v = -1;
>>
>>   __atomic_store(p, &v, __ATOMIC_RELEASE);
>>
>> coredump occurs after executing __atomic_store, but the user code can't be
>> changed,
> Where is that code and why can't it be changed? That code has never
> worked and would always have generated a coredump, and even with this
> patch it cannot work on hardware without LSE2.
>   
This code works fine on x86 platforms and does not coredump.
>> so I'm trying to enable NAA to solve this problem.
> AFAICT that's making a kernel change to paper over a userspace bug.
> As-is I don't think that's a good justification for changing nAA.
>
> Mark.
armv8.4 support nAA as a feature that should be enabled in the kernel?
>>>> ---
>>>>    arch/arm64/Kconfig              | 10 ++++++++++
>>>>    arch/arm64/include/asm/sysreg.h |  1 +
>>>>    arch/arm64/kernel/cpufeature.c  | 18 ++++++++++++++++++
>>>>    arch/arm64/tools/cpucaps        |  1 +
>>>>    4 files changed, 30 insertions(+)
>>>>
>>>> diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
>>>> index 77d7ef0b16c2..7afe73ebcd79 100644
>>>> --- a/arch/arm64/Kconfig
>>>> +++ b/arch/arm64/Kconfig
>>>> @@ -2023,6 +2023,16 @@ config ARM64_TLB_RANGE
>>>>    	  The feature introduces new assembly instructions, and they were
>>>>    	  support when binutils >= 2.30.
>>>> +config ARM64_LSE2_NAA
>>>> +	bool "Enable support for not-aligned access"
>>>> +	depends on AS_HAS_ARMV8_4
>>>> +	help
>>>> +	 LSE2 is an extension to the original LSE (Large System Extensions) feature,
>>>> +	 introduced in ARMv8.4.
>>>> +
>>>> +	 Enable this feature will not generate an Alignment fault if all bytes being
>>>> +	 accessed are not within a single 16-byte quantity.
>>>> +
>>>>    endmenu # "ARMv8.4 architectural features"
>>>>    menu "ARMv8.5 architectural features"
>>>> diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
>>>> index 8cced8aa75a9..42e3a1959aa8 100644
>>>> --- a/arch/arm64/include/asm/sysreg.h
>>>> +++ b/arch/arm64/include/asm/sysreg.h
>>>> @@ -854,6 +854,7 @@
>>>>    #define SCTLR_ELx_ENDB	 (BIT(13))
>>>>    #define SCTLR_ELx_I	 (BIT(12))
>>>>    #define SCTLR_ELx_EOS	 (BIT(11))
>>>> +#define SCTLR_ELx_nAA    (BIT(6))
>>>>    #define SCTLR_ELx_SA	 (BIT(3))
>>>>    #define SCTLR_ELx_C	 (BIT(2))
>>>>    #define SCTLR_ELx_A	 (BIT(1))
>>>> diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
>>>> index 646ecd3069fd..558869a7c7f0 100644
>>>> --- a/arch/arm64/kernel/cpufeature.c
>>>> +++ b/arch/arm64/kernel/cpufeature.c
>>>> @@ -2299,6 +2299,14 @@ static void cpu_enable_mte(struct arm64_cpu_capabilities const *cap)
>>>>    }
>>>>    #endif /* CONFIG_ARM64_MTE */
>>>> +#ifdef CONFIG_ARM64_LSE2_NAA
>>>> +static void cpu_enable_lse2(const struct arm64_cpu_capabilities *__unused)
>>>> +{
>>>> +	sysreg_clear_set(sctlr_el2, SCTLR_ELx_nAA, SCTLR_ELx_nAA);
>>>> +	isb();
>>>> +}
>>>> +#endif
>>>> +
>>>>    static void user_feature_fixup(void)
>>>>    {
>>>>    	if (cpus_have_cap(ARM64_WORKAROUND_2658417)) {
>>>> @@ -2427,6 +2435,16 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
>>>>    		ARM64_CPUID_FIELDS(ID_AA64ISAR0_EL1, ATOMIC, IMP)
>>>>    	},
>>>>    #endif /* CONFIG_ARM64_LSE_ATOMICS */
>>>> +#ifdef CONFIG_ARM64_LSE2_NAA
>>>> +	{
>>>> +		.desc = "Support for not-aligned access",
>>>> +		.capability = ARM64_HAS_LSE2,
>>>> +		.type = ARM64_CPUCAP_SYSTEM_FEATURE,
>>>> +		.matches = has_cpuid_feature,
>>>> +		.cpu_enable = cpu_enable_lse2,
>>>> +		ARM64_CPUID_FIELDS(ID_AA64MMFR2_EL1, AT, IMP)
>>>> +	},
>>>> +#endif
>>>>    	{
>>>>    		.desc = "Virtualization Host Extensions",
>>>>    		.capability = ARM64_HAS_VIRT_HOST_EXTN,
>>>> diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps
>>>> index ac3429d892b9..0c7c0a293574 100644
>>>> --- a/arch/arm64/tools/cpucaps
>>>> +++ b/arch/arm64/tools/cpucaps
>>>> @@ -41,6 +41,7 @@ HAS_HCX
>>>>    HAS_LDAPR
>>>>    HAS_LPA2
>>>>    HAS_LSE_ATOMICS
>>>> +HAS_LSE2
>>>>    HAS_MOPS
>>>>    HAS_NESTED_VIRT
>>>>    HAS_PAN
>>>> -- 
>>>> 2.33.0
>>>>
>>>>
>>> .
>>>
> .
>
Mark Rutland Sept. 6, 2024, 11:42 a.m. UTC | #5
On Fri, Sep 06, 2024 at 06:58:19PM +0800, tiantao (H) wrote:
> 
> 在 2024/9/6 17:44, Mark Rutland 写道:
> > On Fri, Sep 06, 2024 at 05:08:12PM +0800, Tian Tao wrote:
> > > When FEAT_LSE2 is implemented and Bit 6 of sctlr_elx is nAA, the
> > > full name of the Not-aligned access. nAA bit has two values:
> > > 0b0 Unaligned accesses by the specified instructions generate an
> > > Alignment fault.
> > > 0b1 Unaligned accesses by the specified instructions do not generate
> > > an Alignment fault.
> > > 
> > > this patch sets the nAA bit to 1,The following instructions will not
> > > generate an Alignment fault if all bytes being accessed are not within
> > > a single 16-byte quantity:
> > > • LDAPR, LDAPRH, LDAPUR, LDAPURH, LDAPURSH, LDAPURSW, LDAR, LDARH,LDLAR,
> > > LDLARH.
> > > • STLLR, STLLRH, STLR, STLRH, STLUR, and STLURH
> > > 
> > > Signed-off-by: Tian Tao <tiantao6@hisilicon.com>
> > What is going to depend on this? Nothing in the kernel depends on being
> > able to make unaligned accesses with these instructions, and (since you
> > haven't added a HWCAP), userspace has no idea that these accesses won't
> > generate an alignment fault.
> > 
> > Mark.
> 
> I've come across a situation where the simplified code is as follows:
> 
>  long  address = (long) mmap(NULL,1024*1024*2,PROT_READ|PROT_WRITE,
> MAP_PRIVATE|MAP_ANONYMOUS,-1,0);
> 
> long new_address = address + 9;
> 
>  long *p = (long*) new_address;
>  long v = -1;
> 
>  __atomic_store(p, &v, __ATOMIC_RELEASE);

Hold on; looking at the ARM ARM (ARM DDI 0487K.a), the example and the
patch are both bogus. NAK to this patch, explanation below.

Per section B2.2.1.1 "Changes to single-copy atomicity in Armv8.4", all of the
LSE2 relaxations on alignment require:

| all bytes being accessed are within a 16-byte quantity that is aligned
| to 16 bytes

In your example you perform an 8-byte access at an offset of 9 bytes,
which means the access is *not* contained within 16 bytes, and is *not*
guaranteed to be atomic. That code simply has to be fixed, the kernel
cannot magically make it safe.

Regardless of that, the nAA bit *only* causes an alignment fault for
accesses which cannot be atomic. If a CPU has LSE2 and SCTLR_ELx.nAA=0,
an unaligned access within 16 bytes (which would be atomic) does not
cause an alignment fault. That's pretty clear from the description of
nAA and the AArch64.UnalignedAccessFaults() pseudocode:

| boolean AArch64.UnalignedAccessFaults(AccessDescriptor accdesc, bits(64) address, integer size)
|     if AlignmentEnforced() then
|         return TRUE;
|     elsif accdesc.acctype == AccessType_GCS then
|         return TRUE;
|     elsif accdesc.rcw then
|         return TRUE;
|     elsif accdesc.ls64 then
|         return TRUE;
|     elsif accdesc.exclusive || accdesc.atomicop then
|         return !IsFeatureImplemented(FEAT_LSE2) || !AllInAlignedQuantity(address, size, 16);
|     elsif accdesc.acqsc || accdesc.acqpc || accdesc.relsc then
|         return (!IsFeatureImplemented(FEAT_LSE2) ||
|                 (SCTLR_ELx[].nAA == '0' && !AllInAlignedQuantity(address, size, 16)));
|     else
|         return FALSE;

Note that when FEAT_LSE2 is implemented, unaligned atomic accesss within
a 16-byte window never raise an alignment fault, regardless of the value
of the nAA bit.

Setting the nAA bit only hides where atomicity is lost, and does not
make code function correctly. Consequently, that only serves to hide
bugs, making those harder to detect, debug, and fix. Thus I see no
reason to set the nAA bit.

So as above, NAK to this path. Please fir your broken userspace code.

Mark.
tiantao (H) Sept. 6, 2024, 12:20 p.m. UTC | #6
在 2024/9/6 19:42, Mark Rutland 写道:
> On Fri, Sep 06, 2024 at 06:58:19PM +0800, tiantao (H) wrote:
>> 在 2024/9/6 17:44, Mark Rutland 写道:
>>> On Fri, Sep 06, 2024 at 05:08:12PM +0800, Tian Tao wrote:
>>>> When FEAT_LSE2 is implemented and Bit 6 of sctlr_elx is nAA, the
>>>> full name of the Not-aligned access. nAA bit has two values:
>>>> 0b0 Unaligned accesses by the specified instructions generate an
>>>> Alignment fault.
>>>> 0b1 Unaligned accesses by the specified instructions do not generate
>>>> an Alignment fault.
>>>>
>>>> this patch sets the nAA bit to 1,The following instructions will not
>>>> generate an Alignment fault if all bytes being accessed are not within
>>>> a single 16-byte quantity:
>>>> • LDAPR, LDAPRH, LDAPUR, LDAPURH, LDAPURSH, LDAPURSW, LDAR, LDARH,LDLAR,
>>>> LDLARH.
>>>> • STLLR, STLLRH, STLR, STLRH, STLUR, and STLURH
>>>>
>>>> Signed-off-by: Tian Tao <tiantao6@hisilicon.com>
>>> What is going to depend on this? Nothing in the kernel depends on being
>>> able to make unaligned accesses with these instructions, and (since you
>>> haven't added a HWCAP), userspace has no idea that these accesses won't
>>> generate an alignment fault.
>>>
>>> Mark.
>> I've come across a situation where the simplified code is as follows:
>>
>>   long  address = (long) mmap(NULL,1024*1024*2,PROT_READ|PROT_WRITE,
>> MAP_PRIVATE|MAP_ANONYMOUS,-1,0);
>>
>> long new_address = address + 9;
>>
>>   long *p = (long*) new_address;
>>   long v = -1;
>>
>>   __atomic_store(p, &v, __ATOMIC_RELEASE);
> Hold on; looking at the ARM ARM (ARM DDI 0487K.a), the example and the
> patch are both bogus. NAK to this patch, explanation below.
>
> Per section B2.2.1.1 "Changes to single-copy atomicity in Armv8.4", all of the
> LSE2 relaxations on alignment require:
>
> | all bytes being accessed are within a 16-byte quantity that is aligned
> | to 16 bytes
>
> In your example you perform an 8-byte access at an offset of 9 bytes,
> which means the access is *not* contained within 16 bytes, and is *not*
> guaranteed to be atomic. That code simply has to be fixed, the kernel
> cannot magically make it safe.
>
> Regardless of that, the nAA bit *only* causes an alignment fault for
> accesses which cannot be atomic. If a CPU has LSE2 and SCTLR_ELx.nAA=0,
> an unaligned access within 16 bytes (which would be atomic) does not
> cause an alignment fault. That's pretty clear from the description of
> nAA and the AArch64.UnalignedAccessFaults() pseudocode:

Sorry, this example is just for verifying nnA, it's not an example of a 
real scenario,

we have scenarios that don't require atomicity to be guaranteed, they 
just require that coredump doesn't occur when non-aligned

> | boolean AArch64.UnalignedAccessFaults(AccessDescriptor accdesc, bits(64) address, integer size)
> |     if AlignmentEnforced() then
> |         return TRUE;
> |     elsif accdesc.acctype == AccessType_GCS then
> |         return TRUE;
> |     elsif accdesc.rcw then
> |         return TRUE;
> |     elsif accdesc.ls64 then
> |         return TRUE;
> |     elsif accdesc.exclusive || accdesc.atomicop then
> |         return !IsFeatureImplemented(FEAT_LSE2) || !AllInAlignedQuantity(address, size, 16);
> |     elsif accdesc.acqsc || accdesc.acqpc || accdesc.relsc then
> |         return (!IsFeatureImplemented(FEAT_LSE2) ||
> |                 (SCTLR_ELx[].nAA == '0' && !AllInAlignedQuantity(address, size, 16)));
> |     else
> |         return FALSE;
>
> Note that when FEAT_LSE2 is implemented, unaligned atomic accesss within
> a 16-byte window never raise an alignment fault, regardless of the value
> of the nAA bit.

> Setting the nAA bit only hides where atomicity is lost, and does not
> make code function correctly. Consequently, that only serves to hide
> bugs, making those harder to detect, debug, and fix. Thus I see no
> reason to set the nAA bit.
>
> So as above, NAK to this path. Please fir your broken userspace code.
>
> Mark.
>
> .
Mark Rutland Sept. 6, 2024, 1:05 p.m. UTC | #7
On Fri, Sep 06, 2024 at 08:20:19PM +0800, tiantao (H) wrote:
> 
> 在 2024/9/6 19:42, Mark Rutland 写道:
> > On Fri, Sep 06, 2024 at 06:58:19PM +0800, tiantao (H) wrote:
> > > 在 2024/9/6 17:44, Mark Rutland 写道:
> > > > On Fri, Sep 06, 2024 at 05:08:12PM +0800, Tian Tao wrote:
> > > I've come across a situation where the simplified code is as follows:
> > > 
> > >   long  address = (long) mmap(NULL,1024*1024*2,PROT_READ|PROT_WRITE,
> > > MAP_PRIVATE|MAP_ANONYMOUS,-1,0);
> > > 
> > > long new_address = address + 9;
> > > 
> > >   long *p = (long*) new_address;
> > >   long v = -1;
> > > 
> > >   __atomic_store(p, &v, __ATOMIC_RELEASE);
> > Hold on; looking at the ARM ARM (ARM DDI 0487K.a), the example and the
> > patch are both bogus. NAK to this patch, explanation below.
> > 
> > Per section B2.2.1.1 "Changes to single-copy atomicity in Armv8.4", all of the
> > LSE2 relaxations on alignment require:
> > 
> > | all bytes being accessed are within a 16-byte quantity that is aligned
> > | to 16 bytes
> > 
> > In your example you perform an 8-byte access at an offset of 9 bytes,
> > which means the access is *not* contained within 16 bytes, and is *not*
> > guaranteed to be atomic. That code simply has to be fixed, the kernel
> > cannot magically make it safe.
> > 
> > Regardless of that, the nAA bit *only* causes an alignment fault for
> > accesses which cannot be atomic. If a CPU has LSE2 and SCTLR_ELx.nAA=0,
> > an unaligned access within 16 bytes (which would be atomic) does not
> > cause an alignment fault. That's pretty clear from the description of
> > nAA and the AArch64.UnalignedAccessFaults() pseudocode:
> 
> Sorry, this example is just for verifying nnA, it's not an example of a real
> scenario,
> 
> we have scenarios that don't require atomicity to be guaranteed, they just
> require that coredump doesn't occur when non-aligned

Please give a concrete explanation of such a scenario, with an
explanation of why atomicity is not required.

Mark.
diff mbox series

Patch

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 77d7ef0b16c2..7afe73ebcd79 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -2023,6 +2023,16 @@  config ARM64_TLB_RANGE
 	  The feature introduces new assembly instructions, and they were
 	  support when binutils >= 2.30.
 
+config ARM64_LSE2_NAA
+	bool "Enable support for not-aligned access"
+	depends on AS_HAS_ARMV8_4
+	help
+	 LSE2 is an extension to the original LSE (Large System Extensions) feature,
+	 introduced in ARMv8.4.
+
+	 Enable this feature will not generate an Alignment fault if all bytes being
+	 accessed are not within a single 16-byte quantity.
+
 endmenu # "ARMv8.4 architectural features"
 
 menu "ARMv8.5 architectural features"
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index 8cced8aa75a9..42e3a1959aa8 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -854,6 +854,7 @@ 
 #define SCTLR_ELx_ENDB	 (BIT(13))
 #define SCTLR_ELx_I	 (BIT(12))
 #define SCTLR_ELx_EOS	 (BIT(11))
+#define SCTLR_ELx_nAA    (BIT(6))
 #define SCTLR_ELx_SA	 (BIT(3))
 #define SCTLR_ELx_C	 (BIT(2))
 #define SCTLR_ELx_A	 (BIT(1))
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 646ecd3069fd..558869a7c7f0 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -2299,6 +2299,14 @@  static void cpu_enable_mte(struct arm64_cpu_capabilities const *cap)
 }
 #endif /* CONFIG_ARM64_MTE */
 
+#ifdef CONFIG_ARM64_LSE2_NAA
+static void cpu_enable_lse2(const struct arm64_cpu_capabilities *__unused)
+{
+	sysreg_clear_set(sctlr_el2, SCTLR_ELx_nAA, SCTLR_ELx_nAA);
+	isb();
+}
+#endif
+
 static void user_feature_fixup(void)
 {
 	if (cpus_have_cap(ARM64_WORKAROUND_2658417)) {
@@ -2427,6 +2435,16 @@  static const struct arm64_cpu_capabilities arm64_features[] = {
 		ARM64_CPUID_FIELDS(ID_AA64ISAR0_EL1, ATOMIC, IMP)
 	},
 #endif /* CONFIG_ARM64_LSE_ATOMICS */
+#ifdef CONFIG_ARM64_LSE2_NAA
+	{
+		.desc = "Support for not-aligned access",
+		.capability = ARM64_HAS_LSE2,
+		.type = ARM64_CPUCAP_SYSTEM_FEATURE,
+		.matches = has_cpuid_feature,
+		.cpu_enable = cpu_enable_lse2,
+		ARM64_CPUID_FIELDS(ID_AA64MMFR2_EL1, AT, IMP)
+	},
+#endif
 	{
 		.desc = "Virtualization Host Extensions",
 		.capability = ARM64_HAS_VIRT_HOST_EXTN,
diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps
index ac3429d892b9..0c7c0a293574 100644
--- a/arch/arm64/tools/cpucaps
+++ b/arch/arm64/tools/cpucaps
@@ -41,6 +41,7 @@  HAS_HCX
 HAS_LDAPR
 HAS_LPA2
 HAS_LSE_ATOMICS
+HAS_LSE2
 HAS_MOPS
 HAS_NESTED_VIRT
 HAS_PAN