diff mbox series

[v2,2/6] arm64: HWCAP: add support for AT_HWCAP2

Message ID 1550751657-30252-3-git-send-email-andrew.murray@arm.com (mailing list archive)
State New, archived
Headers show
Series Initial support for CVADP | expand

Commit Message

Andrew Murray Feb. 21, 2019, 12:20 p.m. UTC
As we will exhaust the first 32 bits of AT_HWCAP let's start
exposing AT_HWCAP2 to userspace to give us up to 64 caps.

Whilst it's possible to use the remaining 32 bits of AT_HWCAP, we
prefer to expand into AT_HWCAP2 in order to provide a consistent
view to userspace between ILP32 and LP64. However internal to the
kernel we prefer to continue to use the full space of elf_hwcap.

To reduce complexity and allow for future expansion, we now
represent hwcaps in the kernel as ordinals and use a
KERNEL_HWCAP_ prefix. This allows us to support automatic feature
based module loading for all our hwcaps.

We introduce cpu_set_feature to set hwcaps which compliments the
existing cpu_have_feature helper. These helpers allow us to clean
up existing direct uses of elf_hwcap and reduce any future effort
required to move beyond 64 caps.

For convenience we also introduce cpu_{have,set}_feature_name which
makes use of the cpu_feature macro to allow providing a hwcap name
without a {KERNEL_}HWCAP_ prefix.

Signed-off-by: Andrew Murray <andrew.murray@arm.com>
---
 arch/arm64/crypto/aes-ce-ccm-glue.c      |  2 +-
 arch/arm64/crypto/aes-neonbs-glue.c      |  2 +-
 arch/arm64/crypto/chacha-neon-glue.c     |  2 +-
 arch/arm64/crypto/crct10dif-ce-glue.c    |  2 +-
 arch/arm64/crypto/ghash-ce-glue.c        |  6 +--
 arch/arm64/crypto/nhpoly1305-neon-glue.c |  2 +-
 arch/arm64/crypto/sha256-glue.c          |  4 +-
 arch/arm64/include/asm/cpufeature.h      | 19 ++++++---
 arch/arm64/include/asm/hwcap.h           | 40 ++++++++++++++++++-
 arch/arm64/include/uapi/asm/hwcap.h      |  2 +-
 arch/arm64/kernel/cpufeature.c           | 66 ++++++++++++++++----------------
 arch/arm64/kernel/cpuinfo.c              |  2 +-
 arch/arm64/kernel/fpsimd.c               |  4 +-
 drivers/clocksource/arm_arch_timer.c     |  8 ++++
 14 files changed, 108 insertions(+), 53 deletions(-)

Comments

Dave Martin Feb. 21, 2019, 6:45 p.m. UTC | #1
On Thu, Feb 21, 2019 at 12:20:53PM +0000, Andrew Murray wrote:
> As we will exhaust the first 32 bits of AT_HWCAP let's start
> exposing AT_HWCAP2 to userspace to give us up to 64 caps.
>
> Whilst it's possible to use the remaining 32 bits of AT_HWCAP, we
> prefer to expand into AT_HWCAP2 in order to provide a consistent
> view to userspace between ILP32 and LP64. However internal to the
> kernel we prefer to continue to use the full space of elf_hwcap.
>
> To reduce complexity and allow for future expansion, we now
> represent hwcaps in the kernel as ordinals and use a
> KERNEL_HWCAP_ prefix. This allows us to support automatic feature
> based module loading for all our hwcaps.
>
> We introduce cpu_set_feature to set hwcaps which compliments the
> existing cpu_have_feature helper. These helpers allow us to clean
> up existing direct uses of elf_hwcap and reduce any future effort
> required to move beyond 64 caps.
>
> For convenience we also introduce cpu_{have,set}_feature_name which
> makes use of the cpu_feature macro to allow providing a hwcap name
> without a {KERNEL_}HWCAP_ prefix.
>
> Signed-off-by: Andrew Murray <andrew.murray@arm.com>
> ---
>  arch/arm64/crypto/aes-ce-ccm-glue.c      |  2 +-
>  arch/arm64/crypto/aes-neonbs-glue.c      |  2 +-
>  arch/arm64/crypto/chacha-neon-glue.c     |  2 +-
>  arch/arm64/crypto/crct10dif-ce-glue.c    |  2 +-
>  arch/arm64/crypto/ghash-ce-glue.c        |  6 +--
>  arch/arm64/crypto/nhpoly1305-neon-glue.c |  2 +-
>  arch/arm64/crypto/sha256-glue.c          |  4 +-
>  arch/arm64/include/asm/cpufeature.h      | 19 ++++++---
>  arch/arm64/include/asm/hwcap.h           | 40 ++++++++++++++++++-
>  arch/arm64/include/uapi/asm/hwcap.h      |  2 +-
>  arch/arm64/kernel/cpufeature.c           | 66 ++++++++++++++++----------------
>  arch/arm64/kernel/cpuinfo.c              |  2 +-
>  arch/arm64/kernel/fpsimd.c               |  4 +-
>  drivers/clocksource/arm_arch_timer.c     |  8 ++++
>  14 files changed, 108 insertions(+), 53 deletions(-)

[...]

> diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
> index dfcfba7..dd21a32 100644
> --- a/arch/arm64/include/asm/cpufeature.h
> +++ b/arch/arm64/include/asm/cpufeature.h
> @@ -17,12 +17,12 @@
>  /*
>   * In the arm64 world (as in the ARM world), elf_hwcap is used both internally
>   * in the kernel and for user space to keep track of which optional features
> - * are supported by the current system. So let's map feature 'x' to HWCAP_x.
> - * Note that HWCAP_x constants are bit fields so we need to take the log.
> + * are supported by the current system. So let's map feature 'x' to
> + * KERNEL_HWCAP_x.

This doesn't read quite right now.

The purpose of this paragraph seems to be that the kernel and user
views have the same encoding, so we can just map x to UAPI HWCAP_x
definition.

This isn't true any more: we now consider the kernel (in elf_hwcap) and
user encodings (in AT_HWCAP) distinct, but for backwards compatibility
reasons HWCAP_x == BIT(KERNEL_HWCAP_x) for the first 32 hwcaps.

So it would be worth rewriting this whole comment to describe the new
situation, rather than just tweaking it.


I'm not sure whether it is more natural to put the comment in
<asm/hwcap.h> or here: you could put it in one place with a brief
pointer in the other.

>   */
>
> -#define MAX_CPU_FEATURES(8 * sizeof(elf_hwcap))
> -#define cpu_feature(x)ilog2(HWCAP_ ## x)
> +#define MAX_CPU_FEATURES64
> +#define cpu_feature(x)(KERNEL_HWCAP_ ## x)
>
>  #ifndef __ASSEMBLY__
>
> @@ -396,10 +396,19 @@ extern struct static_key_false arm64_const_caps_ready;
>
>  bool this_cpu_has_cap(unsigned int cap);
>
> +static inline void cpu_set_feature(unsigned int num)
> +{
> +WARN_ON(num >= MAX_CPU_FEATURES);
> +elf_hwcap |= BIT(num);
> +}
> +#define cpu_set_feature_name(name) cpu_set_feature(cpu_feature(name))
> +
>  static inline bool cpu_have_feature(unsigned int num)
>  {
> -return elf_hwcap & (1UL << num);
> +WARN_ON(num >= MAX_CPU_FEATURES);
> +return elf_hwcap & BIT(num);
>  }
> +#define cpu_have_feature_name(name) cpu_have_feature(cpu_feature(name))

This is bikeshedding, but I'd say that CPUs don't have (feature) names;
they have features (which have names).

So I might prefer to call these something like:

cpu_set_named_feature()
cpu_have_named_feature()

(May not be worth it unless you respin the series for another reason
though.)

>  /* System capability check for constant caps */
>  static inline bool __cpus_have_const_cap(int num)
> diff --git a/arch/arm64/include/asm/hwcap.h b/arch/arm64/include/asm/hwcap.h
> index 400b80b..7549c72 100644
> --- a/arch/arm64/include/asm/hwcap.h
> +++ b/arch/arm64/include/asm/hwcap.h
> @@ -39,12 +39,50 @@
>  #define COMPAT_HWCAP2_SHA2(1 << 3)
>  #define COMPAT_HWCAP2_CRC32(1 << 4)
>
> +/*
> + * KERNEL_HWCAP flags - for elf_hwcap (in kernel)
> + */
> +#define KERNEL_HWCAP_FPilog2(HWCAP_FP)
> +#define KERNEL_HWCAP_ASIMDilog2(HWCAP_ASIMD)

[...]

> +#define KERNEL_HWCAP_SBilog2(HWCAP_SB)
> +#define KERNEL_HWCAP_PACAilog2(HWCAP_PACA)
> +#define KERNEL_HWCAP_PACGilog2(HWCAP_PACG)
> +#define KERNEL_HWCAP_DCPODP(ilog2(HWCAP2_DCPODP) + 32)

For ABI purposes, we should take the opportunity to document the status
of the currently unused bits.

For interoperation with the glibc ifunc resolver ABI, we may want to
reserve a bit among AT_HWCAP [63:32] or AT_HWCAP2 [31:0] that will
never be used by the kernel and always passed to userspace as 0.

I'm envisaging code such as

foo resolver(unsigned long hwcaps, unsigned int num_at_hwcaps,
unsigned long const *at_hwcaps)
{
if ((hwcaps & _GLIBC_EXTRA_HWCAPS) &&
num_at_hwcaps >= 2 &&
at_hwcaps[1] && HWCAP2_FOO)
/* feature present */
}

We would need that _GLIBC_EXTRA_HWCAPS to distinguish the second and
third arguments from uninitialised junk that would be passed by older
glibc versions.

Glibc might or might not choose to try and wegde AT_HWCAP2 in the top
bits of the first argument instead of bits [63:32] of AT_HWCAP (which
we expect to be zero for now, but could still be made reachable via the
at_hwcaps pointer).

Coordination would be needed if glibc carries on using the
<uapi/asm/hwcap.h> HWCAP{,2}_foo defines for here while doing tricks
of this sort.

Szabolcs may have a view on whether this is needed / useful.


If so, we should document any required guarantees now so that we don't
accidentally violate them during future maintenance.  For the benefit
of userspace folks, it may be a good idea to have some clear statement
in Documentation/arm64/ also.

Because of the ABI implications here, if would also be good idea to copy
the libc-alpha mailing list, and possibly also linux-api.

> +
>  #ifndef __ASSEMBLY__
>  /*
>   * This yields a mask that user programs can use to figure out what
>   * instruction set this cpu supports.
>   */
> -#define ELF_HWCAP(elf_hwcap)
> +#define ELF_HWCAPlower_32_bits(elf_hwcap)
> +#define ELF_HWCAP2upper_32_bits(elf_hwcap)

Should we have #include <linux/kernel.h> here somewhere?

[...]

> diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c
> index 9a7d4dc..4e8d3b4 100644
> --- a/drivers/clocksource/arm_arch_timer.c
> +++ b/drivers/clocksource/arm_arch_timer.c
> @@ -778,7 +778,11 @@ static void arch_timer_evtstrm_enable(int divider)
>  cntkctl |= (divider << ARCH_TIMER_EVT_TRIGGER_SHIFT)
>  | ARCH_TIMER_VIRT_EVT_EN;
>  arch_timer_set_cntkctl(cntkctl);
> +#ifdef CONFIG_ARM64
> +cpu_set_feature_name(EVTSTRM);
> +#else
>  elf_hwcap |= HWCAP_EVTSTRM;
> +#endif

This is a little nasty.

You could give arch/arm its own cpu_set_feature_name() (or whatever it's
called), depending on how keen Russell is to pick it up.

Or for this particular case, stick with the old code (which we now get
away with because elf_hwcap is still exported).  The case of an
HWCAP_foo flag name that happens to have the same semantics on both arm
and arm64 is a pretty esoteric one, so it's not the end of the world if
the standard helpers don't deal with it.

To avoid future accidents you could replace the #ifdef with a comment at
each site (which will also allow people to track down this patch when
looking at that code).

>  #ifdef CONFIG_COMPAT
>  compat_elf_hwcap |= COMPAT_HWCAP_EVTSTRM;
>  #endif
> @@ -1000,7 +1004,11 @@ static int arch_timer_cpu_pm_notify(struct notifier_block *self,
>  } else if (action == CPU_PM_ENTER_FAILED || action == CPU_PM_EXIT) {
>  arch_timer_set_cntkctl(__this_cpu_read(saved_cntkctl));
>
> +#ifdef CONFIG_ARM64
> +if (cpu_have_feature_name(EVTSTRM))
> +#else
>  if (elf_hwcap & HWCAP_EVTSTRM)
> +#endif

Ditto.

[...]

Cheers
---Dave
IMPORTANT NOTICE: The contents of this email and any attachments are confidential and may also be privileged. If you are not the intended recipient, please notify the sender immediately and do not disclose the contents to any other person, use it for any purpose, or store or copy the information in any medium. Thank you.
Szabolcs Nagy Feb. 22, 2019, 10:35 a.m. UTC | #2
On 21/02/2019 18:45, Dave P Martin wrote:
> For ABI purposes, we should take the opportunity to document the status
> of the currently unused bits.
> 
> For interoperation with the glibc ifunc resolver ABI, we may want to
> reserve a bit among AT_HWCAP [63:32] or AT_HWCAP2 [31:0] that will
> never be used by the kernel and always passed to userspace as 0.

if hwcap2 is introduced at bit 32, i'd expect the top 32 bits
of hwcap1 to be 0 on lp64 (and thus libc may use those bits
for something internally or in the ifunc abi).

> I'm envisaging code such as
> 
> 	foo resolver(unsigned long hwcaps, unsigned int num_at_hwcaps,
> 			unsigned long const *at_hwcaps)
> 	{
> 		if ((hwcaps & _GLIBC_EXTRA_HWCAPS) &&
> 				num_at_hwcaps >= 2 &&
> 				at_hwcaps[1] && HWCAP2_FOO)
> 			/* feature present */
> 	}
> 
> We would need that _GLIBC_EXTRA_HWCAPS to distinguish the second and
> third arguments from uninitialised junk that would be passed by older
> glibc versions.

yes, i plan to do such a change.

> Glibc might or might not choose to try and wegde AT_HWCAP2 in the top
> bits of the first argument instead of bits [63:32] of AT_HWCAP (which
> we expect to be zero for now, but could still be made reachable via the
> at_hwcaps pointer).

the public api via getauxval and hwcap macro defines
will not do tricks that break lp64 vs ilp32 portability.
(that would defeat the purpose of hwcap2)

> Coordination would be needed if glibc carries on using the
> <uapi/asm/hwcap.h> HWCAP{,2}_foo defines for here while doing tricks
> of this sort.
> 
> Szabolcs may have a view on whether this is needed / useful.

for now coordination is not needed, glibc does not
use uapi hwcap.h directly, but copies it and it won't
do tricks that change hwcap values anyway, only adds
one new flag for ifunc resolvers.

> If so, we should document any required guarantees now so that we don't
> accidentally violate them during future maintenance.  For the benefit
> of userspace folks, it may be a good idea to have some clear statement
> in Documentation/arm64/ also.

on lp64 glibc will expect hwcap top 32bit to be 0.
this can be changed in the future if we no longer
care about ilp32 and at that point we may need
some coordination between linux and glibc so
the ifunc resolver abi does not break.

> Because of the ABI implications here, if would also be good idea to copy
> the libc-alpha mailing list, and possibly also linux-api.

yes.
Andrew Murray March 27, 2019, 2:53 p.m. UTC | #3
On Thu, Feb 21, 2019 at 06:45:03PM +0000, Dave P Martin wrote:
> On Thu, Feb 21, 2019 at 12:20:53PM +0000, Andrew Murray wrote:
> > As we will exhaust the first 32 bits of AT_HWCAP let's start
> > exposing AT_HWCAP2 to userspace to give us up to 64 caps.
> > 
> > Whilst it's possible to use the remaining 32 bits of AT_HWCAP, we
> > prefer to expand into AT_HWCAP2 in order to provide a consistent
> > view to userspace between ILP32 and LP64. However internal to the
> > kernel we prefer to continue to use the full space of elf_hwcap.
> > 
> > To reduce complexity and allow for future expansion, we now
> > represent hwcaps in the kernel as ordinals and use a
> > KERNEL_HWCAP_ prefix. This allows us to support automatic feature
> > based module loading for all our hwcaps.
> > 
> > We introduce cpu_set_feature to set hwcaps which compliments the
> > existing cpu_have_feature helper. These helpers allow us to clean
> > up existing direct uses of elf_hwcap and reduce any future effort
> > required to move beyond 64 caps.
> > 
> > For convenience we also introduce cpu_{have,set}_feature_name which
> > makes use of the cpu_feature macro to allow providing a hwcap name
> > without a {KERNEL_}HWCAP_ prefix.
> > 
> > Signed-off-by: Andrew Murray <andrew.murray@arm.com>
> > ---
> >  arch/arm64/crypto/aes-ce-ccm-glue.c      |  2 +-
> >  arch/arm64/crypto/aes-neonbs-glue.c      |  2 +-
> >  arch/arm64/crypto/chacha-neon-glue.c     |  2 +-
> >  arch/arm64/crypto/crct10dif-ce-glue.c    |  2 +-
> >  arch/arm64/crypto/ghash-ce-glue.c        |  6 +--
> >  arch/arm64/crypto/nhpoly1305-neon-glue.c |  2 +-
> >  arch/arm64/crypto/sha256-glue.c          |  4 +-
> >  arch/arm64/include/asm/cpufeature.h      | 19 ++++++---
> >  arch/arm64/include/asm/hwcap.h           | 40 ++++++++++++++++++-
> >  arch/arm64/include/uapi/asm/hwcap.h      |  2 +-
> >  arch/arm64/kernel/cpufeature.c           | 66 ++++++++++++++++----------------
> >  arch/arm64/kernel/cpuinfo.c              |  2 +-
> >  arch/arm64/kernel/fpsimd.c               |  4 +-
> >  drivers/clocksource/arm_arch_timer.c     |  8 ++++
> >  14 files changed, 108 insertions(+), 53 deletions(-)
> 
> [...]
> 
> > diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
> > index dfcfba7..dd21a32 100644
> > --- a/arch/arm64/include/asm/cpufeature.h
> > +++ b/arch/arm64/include/asm/cpufeature.h
> > @@ -17,12 +17,12 @@
> >  /*
> >   * In the arm64 world (as in the ARM world), elf_hwcap is used both internally
> >   * in the kernel and for user space to keep track of which optional features
> > - * are supported by the current system. So let's map feature 'x' to HWCAP_x.
> > - * Note that HWCAP_x constants are bit fields so we need to take the log.
> > + * are supported by the current system. So let's map feature 'x' to
> > + * KERNEL_HWCAP_x.
> 
> This doesn't read quite right now.
> 
> The purpose of this paragraph seems to be that the kernel and user
> views have the same encoding, so we can just map x to UAPI HWCAP_x
> definition.
> 
> This isn't true any more: we now consider the kernel (in elf_hwcap) and
> user encodings (in AT_HWCAP) distinct, but for backwards compatibility
> reasons HWCAP_x == BIT(KERNEL_HWCAP_x) for the first 32 hwcaps.
> 
> So it would be worth rewriting this whole comment to describe the new
> situation, rather than just tweaking it.
> 
> 
> I'm not sure whether it is more natural to put the comment in
> <asm/hwcap.h> or here: you could put it in one place with a brief
> pointer in the other.

How about the following description in asm/hwcap.h (subject to outcome of the
helpers discussion)? (non-uapi) hwcap.h seems the right place for this as it's
where you'd go to find out what the HWCAP's are called.

 /*
- * KERNEL_HWCAP flags - for elf_hwcap (in kernel)
+ * For userspace we represent hwcaps as a collection of HWCAP{,2}_x bitfields
+ * as described in uapi/asm/hwcap.h. For the kernel we represent hwcaps as
+ * natural numbers (in a single range of size MAX_CPU_FEATURES) defined here
+ * with prefix KERNEL_HWCAP_ mapped to their HWCAP{,2}_x counterpart.
+ *
+ * Hwcaps should be set and tested within the kernel via the
+ * cpu_{set,have}_named_feature(feature) where feature is the unique suffix
+ * of KERNEL_HWCAP_{feature}.
  */

And I'd be tempted to complete remove the comment in cpufeature.h - this relates
to the mapping of cpu_feature(x) to KERNEL_HWCAP_ which is linear and self
explanatory?

> 
> >   */
> >  
> > -#define MAX_CPU_FEATURES	(8 * sizeof(elf_hwcap))
> > -#define cpu_feature(x)		ilog2(HWCAP_ ## x)
> > +#define MAX_CPU_FEATURES	64
> > +#define cpu_feature(x)		(KERNEL_HWCAP_ ## x)
> >  
> >  #ifndef __ASSEMBLY__
> >  
> > @@ -396,10 +396,19 @@ extern struct static_key_false arm64_const_caps_ready;
> >  
> >  bool this_cpu_has_cap(unsigned int cap);
> >  
> > +static inline void cpu_set_feature(unsigned int num)
> > +{
> > +	WARN_ON(num >= MAX_CPU_FEATURES);
> > +	elf_hwcap |= BIT(num);
> > +}
> > +#define cpu_set_feature_name(name) cpu_set_feature(cpu_feature(name))
> > +
> >  static inline bool cpu_have_feature(unsigned int num)
> >  {
> > -	return elf_hwcap & (1UL << num);
> > +	WARN_ON(num >= MAX_CPU_FEATURES);
> > +	return elf_hwcap & BIT(num);
> >  }
> > +#define cpu_have_feature_name(name) cpu_have_feature(cpu_feature(name))
> 
> This is bikeshedding, but I'd say that CPUs don't have (feature) names;
> they have features (which have names).
> 
> So I might prefer to call these something like:
> 
> 	cpu_set_named_feature()
> 	cpu_have_named_feature()
> 
> (May not be worth it unless you respin the series for another reason
> though.)

I'll make this change, thanks for the suggestion.

> 
> >  /* System capability check for constant caps */
> >  static inline bool __cpus_have_const_cap(int num)
> > diff --git a/arch/arm64/include/asm/hwcap.h b/arch/arm64/include/asm/hwcap.h
> > index 400b80b..7549c72 100644
> > --- a/arch/arm64/include/asm/hwcap.h
> > +++ b/arch/arm64/include/asm/hwcap.h
> > @@ -39,12 +39,50 @@
> >  #define COMPAT_HWCAP2_SHA2	(1 << 3)
> >  #define COMPAT_HWCAP2_CRC32	(1 << 4)
> >  
> > +/*
> > + * KERNEL_HWCAP flags - for elf_hwcap (in kernel)
> > + */
> > +#define KERNEL_HWCAP_FP			ilog2(HWCAP_FP)
> > +#define KERNEL_HWCAP_ASIMD		ilog2(HWCAP_ASIMD)
> 
> [...]
> 
> > +#define KERNEL_HWCAP_SB			ilog2(HWCAP_SB)
> > +#define KERNEL_HWCAP_PACA		ilog2(HWCAP_PACA)
> > +#define KERNEL_HWCAP_PACG		ilog2(HWCAP_PACG)
> > +#define KERNEL_HWCAP_DCPODP		(ilog2(HWCAP2_DCPODP) + 32)
> 
> For ABI purposes, we should take the opportunity to document the status
> of the currently unused bits.
> 
> For interoperation with the glibc ifunc resolver ABI, we may want to
> reserve a bit among AT_HWCAP [63:32] or AT_HWCAP2 [31:0] that will
> never be used by the kernel and always passed to userspace as 0.
> 
> I'm envisaging code such as
> 
> 	foo resolver(unsigned long hwcaps, unsigned int num_at_hwcaps,
> 			unsigned long const *at_hwcaps)
> 	{
> 		if ((hwcaps & _GLIBC_EXTRA_HWCAPS) &&
> 				num_at_hwcaps >= 2 &&
> 				at_hwcaps[1] && HWCAP2_FOO)
> 			/* feature present */
> 	}
> 
> We would need that _GLIBC_EXTRA_HWCAPS to distinguish the second and
> third arguments from uninitialised junk that would be passed by older
> glibc versions.
> 
> Glibc might or might not choose to try and wegde AT_HWCAP2 in the top
> bits of the first argument instead of bits [63:32] of AT_HWCAP (which
> we expect to be zero for now, but could still be made reachable via the
> at_hwcaps pointer).
> 
> Coordination would be needed if glibc carries on using the
> <uapi/asm/hwcap.h> HWCAP{,2}_foo defines for here while doing tricks
> of this sort.
> 
> Szabolcs may have a view on whether this is needed / useful.
> 
> 
> If so, we should document any required guarantees now so that we don't
> accidentally violate them during future maintenance.  For the benefit
> of userspace folks, it may be a good idea to have some clear statement
> in Documentation/arm64/ also.
> 
> Because of the ABI implications here, if would also be good idea to copy
> the libc-alpha mailing list, and possibly also linux-api.
> 
> > +
> >  #ifndef __ASSEMBLY__
> >  /*
> >   * This yields a mask that user programs can use to figure out what
> >   * instruction set this cpu supports.
> >   */
> > -#define ELF_HWCAP		(elf_hwcap)
> > +#define ELF_HWCAP		lower_32_bits(elf_hwcap)
> > +#define ELF_HWCAP2		upper_32_bits(elf_hwcap)
> 
> Should we have #include <linux/kernel.h> here somewhere?

Yes, I'll have to add it after the #ifndef __ASSEMBLY__ otherwise it seems
to break anything that includes kernel.h from .S files.

> 
> [...]
> 
> > diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c
> > index 9a7d4dc..4e8d3b4 100644
> > --- a/drivers/clocksource/arm_arch_timer.c
> > +++ b/drivers/clocksource/arm_arch_timer.c
> > @@ -778,7 +778,11 @@ static void arch_timer_evtstrm_enable(int divider)
> >  	cntkctl |= (divider << ARCH_TIMER_EVT_TRIGGER_SHIFT)
> >  			| ARCH_TIMER_VIRT_EVT_EN;
> >  	arch_timer_set_cntkctl(cntkctl);
> > +#ifdef CONFIG_ARM64
> > +	cpu_set_feature_name(EVTSTRM);
> > +#else
> >  	elf_hwcap |= HWCAP_EVTSTRM;
> > +#endif
> 
> This is a little nasty.
> 
> You could give arch/arm its own cpu_set_feature_name() (or whatever it's
> called), depending on how keen Russell is to pick it up.
> 
> Or for this particular case, stick with the old code (which we now get
> away with because elf_hwcap is still exported).  The case of an
> HWCAP_foo flag name that happens to have the same semantics on both arm
> and arm64 is a pretty esoteric one, so it's not the end of the world if
> the standard helpers don't deal with it.

Assuming we don't drop the encapsulate patch, I guess we could leave this
as it is and look at adding the ARM32 helper a later time?

> 
> To avoid future accidents you could replace the #ifdef with a comment at
> each site (which will also allow people to track down this patch when
> looking at that code).
> 
> >  #ifdef CONFIG_COMPAT
> >  	compat_elf_hwcap |= COMPAT_HWCAP_EVTSTRM;
> >  #endif
> > @@ -1000,7 +1004,11 @@ static int arch_timer_cpu_pm_notify(struct notifier_block *self,
> >  	} else if (action == CPU_PM_ENTER_FAILED || action == CPU_PM_EXIT) {
> >  		arch_timer_set_cntkctl(__this_cpu_read(saved_cntkctl));
> >  
> > +#ifdef CONFIG_ARM64
> > +		if (cpu_have_feature_name(EVTSTRM))
> > +#else
> >  		if (elf_hwcap & HWCAP_EVTSTRM)
> > +#endif
> 
> Ditto.

Thanks,

Andrew Murray

> 
> [...]
> 
> Cheers
> ---Dave
Andrew Murray March 27, 2019, 3:02 p.m. UTC | #4
On Fri, Feb 22, 2019 at 10:35:01AM +0000, Szabolcs Nagy wrote:
> On 21/02/2019 18:45, Dave P Martin wrote:
> > For ABI purposes, we should take the opportunity to document the status
> > of the currently unused bits.
> > 
> > For interoperation with the glibc ifunc resolver ABI, we may want to
> > reserve a bit among AT_HWCAP [63:32] or AT_HWCAP2 [31:0] that will
> > never be used by the kernel and always passed to userspace as 0.
> 
> if hwcap2 is introduced at bit 32, i'd expect the top 32 bits
> of hwcap1 to be 0 on lp64 (and thus libc may use those bits
> for something internally or in the ifunc abi).
> 
> > I'm envisaging code such as
> > 
> > 	foo resolver(unsigned long hwcaps, unsigned int num_at_hwcaps,
> > 			unsigned long const *at_hwcaps)
> > 	{
> > 		if ((hwcaps & _GLIBC_EXTRA_HWCAPS) &&
> > 				num_at_hwcaps >= 2 &&
> > 				at_hwcaps[1] && HWCAP2_FOO)
> > 			/* feature present */
> > 	}
> > 
> > We would need that _GLIBC_EXTRA_HWCAPS to distinguish the second and
> > third arguments from uninitialised junk that would be passed by older
> > glibc versions.
> 
> yes, i plan to do such a change.
> 
> > Glibc might or might not choose to try and wegde AT_HWCAP2 in the top
> > bits of the first argument instead of bits [63:32] of AT_HWCAP (which
> > we expect to be zero for now, but could still be made reachable via the
> > at_hwcaps pointer).
> 
> the public api via getauxval and hwcap macro defines
> will not do tricks that break lp64 vs ilp32 portability.
> (that would defeat the purpose of hwcap2)
> 
> > Coordination would be needed if glibc carries on using the
> > <uapi/asm/hwcap.h> HWCAP{,2}_foo defines for here while doing tricks
> > of this sort.
> > 
> > Szabolcs may have a view on whether this is needed / useful.
> 
> for now coordination is not needed, glibc does not
> use uapi hwcap.h directly, but copies it and it won't
> do tricks that change hwcap values anyway, only adds
> one new flag for ifunc resolvers.
> 
> > If so, we should document any required guarantees now so that we don't
> > accidentally violate them during future maintenance.  For the benefit
> > of userspace folks, it may be a good idea to have some clear statement
> > in Documentation/arm64/ also.
> 
> on lp64 glibc will expect hwcap top 32bit to be 0.
> this can be changed in the future if we no longer
> care about ilp32 and at that point we may need
> some coordination between linux and glibc so
> the ifunc resolver abi does not break.
> 
> > Because of the ABI implications here, if would also be good idea to copy
> > the libc-alpha mailing list, and possibly also linux-api.
> 
> yes.

I'll add documentation to Documentation/arm64 to indicate that the upper 32bits
of AT_HWCAP will always be 0. Is this correct? 

Thanks,

Andrew Murray
Andrew Murray March 27, 2019, 3:24 p.m. UTC | #5
On Wed, Mar 27, 2019 at 03:02:25PM +0000, Andrew Murray wrote:
> On Fri, Feb 22, 2019 at 10:35:01AM +0000, Szabolcs Nagy wrote:
> > On 21/02/2019 18:45, Dave P Martin wrote:
> > > For ABI purposes, we should take the opportunity to document the status
> > > of the currently unused bits.
> > > 
> > > For interoperation with the glibc ifunc resolver ABI, we may want to
> > > reserve a bit among AT_HWCAP [63:32] or AT_HWCAP2 [31:0] that will
> > > never be used by the kernel and always passed to userspace as 0.
> > 
> > if hwcap2 is introduced at bit 32, i'd expect the top 32 bits
> > of hwcap1 to be 0 on lp64 (and thus libc may use those bits
> > for something internally or in the ifunc abi).
> > 
> > > I'm envisaging code such as
> > > 
> > > 	foo resolver(unsigned long hwcaps, unsigned int num_at_hwcaps,
> > > 			unsigned long const *at_hwcaps)
> > > 	{
> > > 		if ((hwcaps & _GLIBC_EXTRA_HWCAPS) &&
> > > 				num_at_hwcaps >= 2 &&
> > > 				at_hwcaps[1] && HWCAP2_FOO)
> > > 			/* feature present */
> > > 	}
> > > 
> > > We would need that _GLIBC_EXTRA_HWCAPS to distinguish the second and
> > > third arguments from uninitialised junk that would be passed by older
> > > glibc versions.
> > 
> > yes, i plan to do such a change.
> > 
> > > Glibc might or might not choose to try and wegde AT_HWCAP2 in the top
> > > bits of the first argument instead of bits [63:32] of AT_HWCAP (which
> > > we expect to be zero for now, but could still be made reachable via the
> > > at_hwcaps pointer).
> > 
> > the public api via getauxval and hwcap macro defines
> > will not do tricks that break lp64 vs ilp32 portability.
> > (that would defeat the purpose of hwcap2)
> > 
> > > Coordination would be needed if glibc carries on using the
> > > <uapi/asm/hwcap.h> HWCAP{,2}_foo defines for here while doing tricks
> > > of this sort.
> > > 
> > > Szabolcs may have a view on whether this is needed / useful.
> > 
> > for now coordination is not needed, glibc does not
> > use uapi hwcap.h directly, but copies it and it won't
> > do tricks that change hwcap values anyway, only adds
> > one new flag for ifunc resolvers.
> > 
> > > If so, we should document any required guarantees now so that we don't
> > > accidentally violate them during future maintenance.  For the benefit
> > > of userspace folks, it may be a good idea to have some clear statement
> > > in Documentation/arm64/ also.
> > 
> > on lp64 glibc will expect hwcap top 32bit to be 0.
> > this can be changed in the future if we no longer
> > care about ilp32 and at that point we may need
> > some coordination between linux and glibc so
> > the ifunc resolver abi does not break.
> > 
> > > Because of the ABI implications here, if would also be good idea to copy
> > > the libc-alpha mailing list, and possibly also linux-api.
> > 
> > yes.
> 
> I'll add documentation to Documentation/arm64 to indicate that the upper 32bits
> of AT_HWCAP will always be 0. Is this correct? 

How about this (in Documentation/arm64/elf_hwcaps.txt)?

+
+
+4. Unused AT_HWCAP bits
+-----------------------
+
+Each AT_HWCAP and AT_HWCAP2 entry provides for up to 32 hwcaps contained
+in bits [31:0]. For interoperation with userspace we guarantee that the
+top bits [63:32] of AT_HWCAP will always be returned as 0.

Thanks,

Andrew Murray

> 
> Thanks,
> 
> Andrew Murray
> 
> _______________________________________________
> linux-arm-kernel mailing list
> linux-arm-kernel@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
Dave Martin March 28, 2019, 11:27 a.m. UTC | #6
On Wed, Mar 27, 2019 at 03:24:15PM +0000, Andrew Murray wrote:
> On Wed, Mar 27, 2019 at 03:02:25PM +0000, Andrew Murray wrote:

[...]

> > I'll add documentation to Documentation/arm64 to indicate that the upper 32bits
> > of AT_HWCAP will always be 0. Is this correct? 
> 
> How about this (in Documentation/arm64/elf_hwcaps.txt)?
> 
> +
> +
> +4. Unused AT_HWCAP bits
> +-----------------------
> +
> +Each AT_HWCAP and AT_HWCAP2 entry provides for up to 32 hwcaps contained
> +in bits [31:0]. For interoperation with userspace we guarantee that the
> +top bits [63:32] of AT_HWCAP will always be returned as 0.

Since the main reason for reserving bits [63:32] is ILP32, and it's
still unclear when (or if) that will be merged, it feels a bit excessive
to promise that we will never use these bits.

It sounds like glibc has a use for at most one bit in here.

So maybe we can reserve bit 63 (or 32, whatever) and promise that is
zero, but leave the rest uncommitted for now.

Szabolcs, does that sound sensible?

Cheers
---Dave
Dave Martin March 29, 2019, 3:39 p.m. UTC | #7
On Thu, Feb 21, 2019 at 12:20:53PM +0000, Andrew Murray wrote:
> As we will exhaust the first 32 bits of AT_HWCAP let's start
> exposing AT_HWCAP2 to userspace to give us up to 64 caps.
> 
> Whilst it's possible to use the remaining 32 bits of AT_HWCAP, we
> prefer to expand into AT_HWCAP2 in order to provide a consistent
> view to userspace between ILP32 and LP64. However internal to the
> kernel we prefer to continue to use the full space of elf_hwcap.
> 
> To reduce complexity and allow for future expansion, we now
> represent hwcaps in the kernel as ordinals and use a
> KERNEL_HWCAP_ prefix. This allows us to support automatic feature
> based module loading for all our hwcaps.
> 
> We introduce cpu_set_feature to set hwcaps which compliments the
> existing cpu_have_feature helper. These helpers allow us to clean
> up existing direct uses of elf_hwcap and reduce any future effort
> required to move beyond 64 caps.
> 
> For convenience we also introduce cpu_{have,set}_feature_name which
> makes use of the cpu_feature macro to allow providing a hwcap name
> without a {KERNEL_}HWCAP_ prefix.

[...]

Btw, this series will also need to update Documentation/elf_hwcaps.txt.

(You may have been planning to do that anyway, but I just realised.)

Cheers
---Dave
Szabolcs Nagy March 29, 2019, 4:44 p.m. UTC | #8
On 28/03/2019 11:27, Dave Martin wrote:
> On Wed, Mar 27, 2019 at 03:24:15PM +0000, Andrew Murray wrote:
>> On Wed, Mar 27, 2019 at 03:02:25PM +0000, Andrew Murray wrote:
>>> I'll add documentation to Documentation/arm64 to indicate that the upper 32bits
>>> of AT_HWCAP will always be 0. Is this correct? 
>>
>> How about this (in Documentation/arm64/elf_hwcaps.txt)?
>> +
>> +
>> +4. Unused AT_HWCAP bits
>> +-----------------------
>> +
>> +Each AT_HWCAP and AT_HWCAP2 entry provides for up to 32 hwcaps contained
>> +in bits [31:0]. For interoperation with userspace we guarantee that the
>> +top bits [63:32] of AT_HWCAP will always be returned as 0.
> 
> Since the main reason for reserving bits [63:32] is ILP32, and it's
> still unclear when (or if) that will be merged, it feels a bit excessive
> to promise that we will never use these bits.
> 
> It sounds like glibc has a use for at most one bit in here.
> 
> So maybe we can reserve bit 63 (or 32, whatever) and promise that is
> zero, but leave the rest uncommitted for now.
> 
> Szabolcs, does that sound sensible?

i think hwcap bit 63 is already reserved by glibc
internally for tls support, it is not clear to me
if that's still relevant (aarch64 post-dates tls
support, so this might be historical cruft that can
be cleaned up), i only see one comment about it:

1288   /* The last entry in hwcap_extra is reserved for the "tls" pseudo-hwcap which
1289      indicates support for TLS.  This pseudo-hwcap is only used by old versions
1290      under which TLS support was optional.  The entry is no longer needed, but
1291      must remain for compatibility.  */
1292   hwcap_extra[63 - _DL_FIRST_EXTRA] = "tls";

https://sourceware.org/git/?p=glibc.git;a=blob;f=elf/ldconfig.c;h=3bc9e618916ebb2fee29ffe3d114525a08390b43;hb=HEAD#l1288

and generic ld.so.cache handling code uses it:

https://sourceware.org/git/?p=glibc.git;a=blob;f=elf/dl-cache.c;h=d8d1e2344e612d98689cf7d7ad965822d0ab6ed1;hb=HEAD#l265

since i don't understand how this tls bit was used
exactly i think it's better to use a different bit
for aarch64 ifunc abi hacks (e.g. 1ULL << 62)

cc += libc-alpha in case somebody knows more about
this bit.
Phil Blundell March 29, 2019, 4:57 p.m. UTC | #9
On Fri, 2019-03-29 at 16:44 +0000, Szabolcs Nagy wrote:
> i think hwcap bit 63 is already reserved by glibc
> internally for tls support, it is not clear to me
> if that's still relevant (aarch64 post-dates tls
> support, so this might be historical cruft that can
> be cleaned up), i only see one comment about it:

At the time that bit was invented, it was only set when glibc was built
with TLS support enabled (i.e. USE_TLS defined).  It would cause the
dynamic linker to add "tls" as a hwcap subdirectory in its search path, which I guess was useful in the days when TLS-enabled and non-TLS-enabled versions of the same DSO needed to be able to co-exist.

At some later date, I forget when, TLS became mandatory and the USE_TLS
condition went away.  The comment seems to be saying that it was
thought necessary to keep the pseudo-hwcap bit set so that any DSOs in
a "tls" subdirectory would still be found.  That seems like a debatable proposition even at the time, and even more so today, but I guess it was hard to prove the negative.

Even if aarch64 has always had TLS, in principle there might be some
ancient DSOs lurking in "tls" subdirectories which would no longer be
loaded if you were to stop setting that bit.  I'm sure that would be manageable, but equally it doesn't seem that these bits are so scarce that it's worth going to any effort to scavenge old ones.

> i think it's better to use a different bit
> for aarch64 ifunc abi hacks (e.g. 1ULL << 62)

I agree.

p.
Andrew Murray April 1, 2019, 8:14 a.m. UTC | #10
On Fri, Mar 29, 2019 at 04:57:46PM +0000, Phil Blundell wrote:
> On Fri, 2019-03-29 at 16:44 +0000, Szabolcs Nagy wrote:
> > i think hwcap bit 63 is already reserved by glibc
> > internally for tls support, it is not clear to me
> > if that's still relevant (aarch64 post-dates tls
> > support, so this might be historical cruft that can
> > be cleaned up), i only see one comment about it:
> 
> At the time that bit was invented, it was only set when glibc was built
> with TLS support enabled (i.e. USE_TLS defined).  It would cause the
> dynamic linker to add "tls" as a hwcap subdirectory in its search path, which I guess was useful in the days when TLS-enabled and non-TLS-enabled versions of the same DSO needed to be able to co-exist.
> 
> At some later date, I forget when, TLS became mandatory and the USE_TLS
> condition went away.  The comment seems to be saying that it was
> thought necessary to keep the pseudo-hwcap bit set so that any DSOs in
> a "tls" subdirectory would still be found.  That seems like a debatable proposition even at the time, and even more so today, but I guess it was hard to prove the negative.
> 
> Even if aarch64 has always had TLS, in principle there might be some
> ancient DSOs lurking in "tls" subdirectories which would no longer be
> loaded if you were to stop setting that bit.  I'm sure that would be manageable, but equally it doesn't seem that these bits are so scarce that it's worth going to any effort to scavenge old ones.
> 
> > i think it's better to use a different bit
> > for aarch64 ifunc abi hacks (e.g. 1ULL << 62)
> 
> I agree.

OK, I'll respin and reserve bit 1ULL << 62.

Thanks,

Andrew Murray

> 
> p.
> 
>
diff mbox series

Patch

diff --git a/arch/arm64/crypto/aes-ce-ccm-glue.c b/arch/arm64/crypto/aes-ce-ccm-glue.c
index 68b11aa..c77f015 100644
--- a/arch/arm64/crypto/aes-ce-ccm-glue.c
+++ b/arch/arm64/crypto/aes-ce-ccm-glue.c
@@ -374,7 +374,7 @@  static struct aead_alg ccm_aes_alg = {
 
 static int __init aes_mod_init(void)
 {
-	if (!(elf_hwcap & HWCAP_AES))
+	if (!cpu_have_feature_name(AES))
 		return -ENODEV;
 	return crypto_register_aead(&ccm_aes_alg);
 }
diff --git a/arch/arm64/crypto/aes-neonbs-glue.c b/arch/arm64/crypto/aes-neonbs-glue.c
index e7a95a5..7a133cd1 100644
--- a/arch/arm64/crypto/aes-neonbs-glue.c
+++ b/arch/arm64/crypto/aes-neonbs-glue.c
@@ -440,7 +440,7 @@  static int __init aes_init(void)
 	int err;
 	int i;
 
-	if (!(elf_hwcap & HWCAP_ASIMD))
+	if (!cpu_have_feature_name(ASIMD))
 		return -ENODEV;
 
 	err = crypto_register_skciphers(aes_algs, ARRAY_SIZE(aes_algs));
diff --git a/arch/arm64/crypto/chacha-neon-glue.c b/arch/arm64/crypto/chacha-neon-glue.c
index bece1d8..71f12cd 100644
--- a/arch/arm64/crypto/chacha-neon-glue.c
+++ b/arch/arm64/crypto/chacha-neon-glue.c
@@ -173,7 +173,7 @@  static struct skcipher_alg algs[] = {
 
 static int __init chacha_simd_mod_init(void)
 {
-	if (!(elf_hwcap & HWCAP_ASIMD))
+	if (!cpu_have_feature_name(ASIMD))
 		return -ENODEV;
 
 	return crypto_register_skciphers(algs, ARRAY_SIZE(algs));
diff --git a/arch/arm64/crypto/crct10dif-ce-glue.c b/arch/arm64/crypto/crct10dif-ce-glue.c
index b461d62..56f1a7b 100644
--- a/arch/arm64/crypto/crct10dif-ce-glue.c
+++ b/arch/arm64/crypto/crct10dif-ce-glue.c
@@ -88,7 +88,7 @@  static struct shash_alg crc_t10dif_alg = {
 
 static int __init crc_t10dif_mod_init(void)
 {
-	if (elf_hwcap & HWCAP_PMULL)
+	if (cpu_have_feature_name(PMULL))
 		crc_t10dif_pmull = crc_t10dif_pmull_p64;
 	else
 		crc_t10dif_pmull = crc_t10dif_pmull_p8;
diff --git a/arch/arm64/crypto/ghash-ce-glue.c b/arch/arm64/crypto/ghash-ce-glue.c
index 067d893..8426f8a 100644
--- a/arch/arm64/crypto/ghash-ce-glue.c
+++ b/arch/arm64/crypto/ghash-ce-glue.c
@@ -646,10 +646,10 @@  static int __init ghash_ce_mod_init(void)
 {
 	int ret;
 
-	if (!(elf_hwcap & HWCAP_ASIMD))
+	if (!cpu_have_feature_name(ASIMD))
 		return -ENODEV;
 
-	if (elf_hwcap & HWCAP_PMULL)
+	if (cpu_have_feature_name(PMULL))
 		pmull_ghash_update = pmull_ghash_update_p64;
 
 	else
@@ -659,7 +659,7 @@  static int __init ghash_ce_mod_init(void)
 	if (ret)
 		return ret;
 
-	if (elf_hwcap & HWCAP_PMULL) {
+	if (cpu_have_feature_name(PMULL)) {
 		ret = crypto_register_aead(&gcm_aes_alg);
 		if (ret)
 			crypto_unregister_shash(&ghash_alg);
diff --git a/arch/arm64/crypto/nhpoly1305-neon-glue.c b/arch/arm64/crypto/nhpoly1305-neon-glue.c
index 22cc32a..9ce3368 100644
--- a/arch/arm64/crypto/nhpoly1305-neon-glue.c
+++ b/arch/arm64/crypto/nhpoly1305-neon-glue.c
@@ -56,7 +56,7 @@  static struct shash_alg nhpoly1305_alg = {
 
 static int __init nhpoly1305_mod_init(void)
 {
-	if (!(elf_hwcap & HWCAP_ASIMD))
+	if (!cpu_have_feature_name(ASIMD))
 		return -ENODEV;
 
 	return crypto_register_shash(&nhpoly1305_alg);
diff --git a/arch/arm64/crypto/sha256-glue.c b/arch/arm64/crypto/sha256-glue.c
index 4aedeae..20e28d0 100644
--- a/arch/arm64/crypto/sha256-glue.c
+++ b/arch/arm64/crypto/sha256-glue.c
@@ -173,7 +173,7 @@  static int __init sha256_mod_init(void)
 	if (ret)
 		return ret;
 
-	if (elf_hwcap & HWCAP_ASIMD) {
+	if (cpu_have_feature_name(ASIMD)) {
 		ret = crypto_register_shashes(neon_algs, ARRAY_SIZE(neon_algs));
 		if (ret)
 			crypto_unregister_shashes(algs, ARRAY_SIZE(algs));
@@ -183,7 +183,7 @@  static int __init sha256_mod_init(void)
 
 static void __exit sha256_mod_fini(void)
 {
-	if (elf_hwcap & HWCAP_ASIMD)
+	if (cpu_have_feature_name(ASIMD))
 		crypto_unregister_shashes(neon_algs, ARRAY_SIZE(neon_algs));
 	crypto_unregister_shashes(algs, ARRAY_SIZE(algs));
 }
diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
index dfcfba7..dd21a32 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -17,12 +17,12 @@ 
 /*
  * In the arm64 world (as in the ARM world), elf_hwcap is used both internally
  * in the kernel and for user space to keep track of which optional features
- * are supported by the current system. So let's map feature 'x' to HWCAP_x.
- * Note that HWCAP_x constants are bit fields so we need to take the log.
+ * are supported by the current system. So let's map feature 'x' to
+ * KERNEL_HWCAP_x.
  */
 
-#define MAX_CPU_FEATURES	(8 * sizeof(elf_hwcap))
-#define cpu_feature(x)		ilog2(HWCAP_ ## x)
+#define MAX_CPU_FEATURES	64
+#define cpu_feature(x)		(KERNEL_HWCAP_ ## x)
 
 #ifndef __ASSEMBLY__
 
@@ -396,10 +396,19 @@  extern struct static_key_false arm64_const_caps_ready;
 
 bool this_cpu_has_cap(unsigned int cap);
 
+static inline void cpu_set_feature(unsigned int num)
+{
+	WARN_ON(num >= MAX_CPU_FEATURES);
+	elf_hwcap |= BIT(num);
+}
+#define cpu_set_feature_name(name) cpu_set_feature(cpu_feature(name))
+
 static inline bool cpu_have_feature(unsigned int num)
 {
-	return elf_hwcap & (1UL << num);
+	WARN_ON(num >= MAX_CPU_FEATURES);
+	return elf_hwcap & BIT(num);
 }
+#define cpu_have_feature_name(name) cpu_have_feature(cpu_feature(name))
 
 /* System capability check for constant caps */
 static inline bool __cpus_have_const_cap(int num)
diff --git a/arch/arm64/include/asm/hwcap.h b/arch/arm64/include/asm/hwcap.h
index 400b80b..7549c72 100644
--- a/arch/arm64/include/asm/hwcap.h
+++ b/arch/arm64/include/asm/hwcap.h
@@ -39,12 +39,50 @@ 
 #define COMPAT_HWCAP2_SHA2	(1 << 3)
 #define COMPAT_HWCAP2_CRC32	(1 << 4)
 
+/*
+ * KERNEL_HWCAP flags - for elf_hwcap (in kernel)
+ */
+#define KERNEL_HWCAP_FP			ilog2(HWCAP_FP)
+#define KERNEL_HWCAP_ASIMD		ilog2(HWCAP_ASIMD)
+#define KERNEL_HWCAP_EVTSTRM		ilog2(HWCAP_EVTSTRM)
+#define KERNEL_HWCAP_AES		ilog2(HWCAP_AES)
+#define KERNEL_HWCAP_PMULL		ilog2(HWCAP_PMULL)
+#define KERNEL_HWCAP_SHA1		ilog2(HWCAP_SHA1)
+#define KERNEL_HWCAP_SHA2		ilog2(HWCAP_SHA2)
+#define KERNEL_HWCAP_CRC32		ilog2(HWCAP_CRC32)
+#define KERNEL_HWCAP_ATOMICS		ilog2(HWCAP_ATOMICS)
+#define KERNEL_HWCAP_FPHP		ilog2(HWCAP_FPHP)
+#define KERNEL_HWCAP_ASIMDHP		ilog2(HWCAP_ASIMDHP)
+#define KERNEL_HWCAP_CPUID		ilog2(HWCAP_CPUID)
+#define KERNEL_HWCAP_ASIMDRDM		ilog2(HWCAP_ASIMDRDM)
+#define KERNEL_HWCAP_JSCVT		ilog2(HWCAP_JSCVT)
+#define KERNEL_HWCAP_FCMA		ilog2(HWCAP_FCMA)
+#define KERNEL_HWCAP_LRCPC		ilog2(HWCAP_LRCPC)
+#define KERNEL_HWCAP_DCPOP		ilog2(HWCAP_DCPOP)
+#define KERNEL_HWCAP_SHA3		ilog2(HWCAP_SHA3)
+#define KERNEL_HWCAP_SM3		ilog2(HWCAP_SM3)
+#define KERNEL_HWCAP_SM4		ilog2(HWCAP_SM4)
+#define KERNEL_HWCAP_ASIMDDP		ilog2(HWCAP_ASIMDDP)
+#define KERNEL_HWCAP_SHA512		ilog2(HWCAP_SHA512)
+#define KERNEL_HWCAP_SVE		ilog2(HWCAP_SVE)
+#define KERNEL_HWCAP_ASIMDFHM		ilog2(HWCAP_ASIMDFHM)
+#define KERNEL_HWCAP_DIT		ilog2(HWCAP_DIT)
+#define KERNEL_HWCAP_USCAT		ilog2(HWCAP_USCAT)
+#define KERNEL_HWCAP_ILRCPC		ilog2(HWCAP_ILRCPC)
+#define KERNEL_HWCAP_FLAGM		ilog2(HWCAP_FLAGM)
+#define KERNEL_HWCAP_SSBS		ilog2(HWCAP_SSBS)
+#define KERNEL_HWCAP_SB			ilog2(HWCAP_SB)
+#define KERNEL_HWCAP_PACA		ilog2(HWCAP_PACA)
+#define KERNEL_HWCAP_PACG		ilog2(HWCAP_PACG)
+#define KERNEL_HWCAP_DCPODP		(ilog2(HWCAP2_DCPODP) + 32)
+
 #ifndef __ASSEMBLY__
 /*
  * This yields a mask that user programs can use to figure out what
  * instruction set this cpu supports.
  */
-#define ELF_HWCAP		(elf_hwcap)
+#define ELF_HWCAP		lower_32_bits(elf_hwcap)
+#define ELF_HWCAP2		upper_32_bits(elf_hwcap)
 
 #ifdef CONFIG_COMPAT
 #define COMPAT_ELF_HWCAP	(compat_elf_hwcap)
diff --git a/arch/arm64/include/uapi/asm/hwcap.h b/arch/arm64/include/uapi/asm/hwcap.h
index 5f0750c..453b45a 100644
--- a/arch/arm64/include/uapi/asm/hwcap.h
+++ b/arch/arm64/include/uapi/asm/hwcap.h
@@ -18,7 +18,7 @@ 
 #define _UAPI__ASM_HWCAP_H
 
 /*
- * HWCAP flags - for elf_hwcap (in kernel) and AT_HWCAP
+ * HWCAP flags - for AT_HWCAP
  */
 #define HWCAP_FP		(1 << 0)
 #define HWCAP_ASIMD		(1 << 1)
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index f6d84e2..6a477a3 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -1536,39 +1536,39 @@  static const struct arm64_cpu_capabilities ptr_auth_hwcap_gen_matches[] = {
 #endif
 
 static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = {
-	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_AES_SHIFT, FTR_UNSIGNED, 2, CAP_HWCAP, HWCAP_PMULL),
-	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_AES_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_AES),
-	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA1_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_SHA1),
-	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA2_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_SHA2),
-	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA2_SHIFT, FTR_UNSIGNED, 2, CAP_HWCAP, HWCAP_SHA512),
-	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_CRC32_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_CRC32),
-	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_ATOMICS_SHIFT, FTR_UNSIGNED, 2, CAP_HWCAP, HWCAP_ATOMICS),
-	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_RDM_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_ASIMDRDM),
-	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA3_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_SHA3),
-	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SM3_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_SM3),
-	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SM4_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_SM4),
-	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_DP_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_ASIMDDP),
-	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_FHM_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_ASIMDFHM),
-	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_TS_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_FLAGM),
-	HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, FTR_SIGNED, 0, CAP_HWCAP, HWCAP_FP),
-	HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, FTR_SIGNED, 1, CAP_HWCAP, HWCAP_FPHP),
-	HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_ASIMD_SHIFT, FTR_SIGNED, 0, CAP_HWCAP, HWCAP_ASIMD),
-	HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_ASIMD_SHIFT, FTR_SIGNED, 1, CAP_HWCAP, HWCAP_ASIMDHP),
-	HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_DIT_SHIFT, FTR_SIGNED, 1, CAP_HWCAP, HWCAP_DIT),
-	HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_DPB_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_DCPOP),
-	HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_JSCVT_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_JSCVT),
-	HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_FCMA_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_FCMA),
-	HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_LRCPC_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_LRCPC),
-	HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_LRCPC_SHIFT, FTR_UNSIGNED, 2, CAP_HWCAP, HWCAP_ILRCPC),
-	HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_SB_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_SB),
-	HWCAP_CAP(SYS_ID_AA64MMFR2_EL1, ID_AA64MMFR2_AT_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_USCAT),
+	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_AES_SHIFT, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_PMULL),
+	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_AES_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_AES),
+	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA1_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SHA1),
+	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA2_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SHA2),
+	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA2_SHIFT, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_SHA512),
+	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_CRC32_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_CRC32),
+	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_ATOMICS_SHIFT, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_ATOMICS),
+	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_RDM_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ASIMDRDM),
+	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA3_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SHA3),
+	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SM3_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SM3),
+	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SM4_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SM4),
+	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_DP_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ASIMDDP),
+	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_FHM_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ASIMDFHM),
+	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_TS_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_FLAGM),
+	HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, FTR_SIGNED, 0, CAP_HWCAP, KERNEL_HWCAP_FP),
+	HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, FTR_SIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_FPHP),
+	HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_ASIMD_SHIFT, FTR_SIGNED, 0, CAP_HWCAP, KERNEL_HWCAP_ASIMD),
+	HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_ASIMD_SHIFT, FTR_SIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ASIMDHP),
+	HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_DIT_SHIFT, FTR_SIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_DIT),
+	HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_DPB_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_DCPOP),
+	HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_JSCVT_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_JSCVT),
+	HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_FCMA_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_FCMA),
+	HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_LRCPC_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_LRCPC),
+	HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_LRCPC_SHIFT, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_ILRCPC),
+	HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_SB_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SB),
+	HWCAP_CAP(SYS_ID_AA64MMFR2_EL1, ID_AA64MMFR2_AT_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_USCAT),
 #ifdef CONFIG_ARM64_SVE
-	HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_SVE_SHIFT, FTR_UNSIGNED, ID_AA64PFR0_SVE, CAP_HWCAP, HWCAP_SVE),
+	HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_SVE_SHIFT, FTR_UNSIGNED, ID_AA64PFR0_SVE, CAP_HWCAP, KERNEL_HWCAP_SVE),
 #endif
-	HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_SSBS_SHIFT, FTR_UNSIGNED, ID_AA64PFR1_SSBS_PSTATE_INSNS, CAP_HWCAP, HWCAP_SSBS),
+	HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_SSBS_SHIFT, FTR_UNSIGNED, ID_AA64PFR1_SSBS_PSTATE_INSNS, CAP_HWCAP, KERNEL_HWCAP_SSBS),
 #ifdef CONFIG_ARM64_PTR_AUTH
-	HWCAP_MULTI_CAP(ptr_auth_hwcap_addr_matches, CAP_HWCAP, HWCAP_PACA),
-	HWCAP_MULTI_CAP(ptr_auth_hwcap_gen_matches, CAP_HWCAP, HWCAP_PACG),
+	HWCAP_MULTI_CAP(ptr_auth_hwcap_addr_matches, CAP_HWCAP, KERNEL_HWCAP_PACA),
+	HWCAP_MULTI_CAP(ptr_auth_hwcap_gen_matches, CAP_HWCAP, KERNEL_HWCAP_PACG),
 #endif
 	{},
 };
@@ -1588,7 +1588,7 @@  static void __init cap_set_elf_hwcap(const struct arm64_cpu_capabilities *cap)
 {
 	switch (cap->hwcap_type) {
 	case CAP_HWCAP:
-		elf_hwcap |= cap->hwcap;
+		cpu_set_feature(cap->hwcap);
 		break;
 #ifdef CONFIG_COMPAT
 	case CAP_COMPAT_HWCAP:
@@ -1611,7 +1611,7 @@  static bool cpus_have_elf_hwcap(const struct arm64_cpu_capabilities *cap)
 
 	switch (cap->hwcap_type) {
 	case CAP_HWCAP:
-		rc = (elf_hwcap & cap->hwcap) != 0;
+		rc = cpu_have_feature(cap->hwcap);
 		break;
 #ifdef CONFIG_COMPAT
 	case CAP_COMPAT_HWCAP:
@@ -1632,7 +1632,7 @@  static bool cpus_have_elf_hwcap(const struct arm64_cpu_capabilities *cap)
 static void __init setup_elf_hwcaps(const struct arm64_cpu_capabilities *hwcaps)
 {
 	/* We support emulation of accesses to CPU ID feature registers */
-	elf_hwcap |= HWCAP_CPUID;
+	cpu_set_feature_name(CPUID);
 	for (; hwcaps->matches; hwcaps++)
 		if (hwcaps->matches(hwcaps, cpucap_default_scope(hwcaps)))
 			cap_set_elf_hwcap(hwcaps);
diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c
index ca0685f..810db95 100644
--- a/arch/arm64/kernel/cpuinfo.c
+++ b/arch/arm64/kernel/cpuinfo.c
@@ -167,7 +167,7 @@  static int c_show(struct seq_file *m, void *v)
 #endif /* CONFIG_COMPAT */
 		} else {
 			for (j = 0; hwcap_str[j]; j++)
-				if (elf_hwcap & (1 << j))
+				if (cpu_have_feature(j))
 					seq_printf(m, " %s", hwcap_str[j]);
 		}
 		seq_puts(m, "\n");
diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index 5ebe73b..940b547 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -1258,14 +1258,14 @@  static inline void fpsimd_hotplug_init(void) { }
  */
 static int __init fpsimd_init(void)
 {
-	if (elf_hwcap & HWCAP_FP) {
+	if (cpu_have_feature_name(FP)) {
 		fpsimd_pm_init();
 		fpsimd_hotplug_init();
 	} else {
 		pr_notice("Floating-point is not implemented\n");
 	}
 
-	if (!(elf_hwcap & HWCAP_ASIMD))
+	if (!cpu_have_feature_name(ASIMD))
 		pr_notice("Advanced SIMD is not implemented\n");
 
 	return sve_sysctl_init();
diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c
index 9a7d4dc..4e8d3b4 100644
--- a/drivers/clocksource/arm_arch_timer.c
+++ b/drivers/clocksource/arm_arch_timer.c
@@ -778,7 +778,11 @@  static void arch_timer_evtstrm_enable(int divider)
 	cntkctl |= (divider << ARCH_TIMER_EVT_TRIGGER_SHIFT)
 			| ARCH_TIMER_VIRT_EVT_EN;
 	arch_timer_set_cntkctl(cntkctl);
+#ifdef CONFIG_ARM64
+	cpu_set_feature_name(EVTSTRM);
+#else
 	elf_hwcap |= HWCAP_EVTSTRM;
+#endif
 #ifdef CONFIG_COMPAT
 	compat_elf_hwcap |= COMPAT_HWCAP_EVTSTRM;
 #endif
@@ -1000,7 +1004,11 @@  static int arch_timer_cpu_pm_notify(struct notifier_block *self,
 	} else if (action == CPU_PM_ENTER_FAILED || action == CPU_PM_EXIT) {
 		arch_timer_set_cntkctl(__this_cpu_read(saved_cntkctl));
 
+#ifdef CONFIG_ARM64
+		if (cpu_have_feature_name(EVTSTRM))
+#else
 		if (elf_hwcap & HWCAP_EVTSTRM)
+#endif
 			cpumask_set_cpu(smp_processor_id(), &evtstrm_available);
 	}
 	return NOTIFY_OK;