diff mbox series

[2/6] RISC-V: Enable cbo.zero in usermode

Message ID 20230809115516.214537-10-ajones@ventanamicro.com (mailing list archive)
State Superseded
Commit 090925374690a40b3ac96348874333f331184f4c
Headers show
Series RISC-V: Enable cbo.zero in usermode | expand

Checks

Context Check Description
conchuod/cover_letter success Series has a cover letter
conchuod/tree_selection success Guessed tree name to be for-next at HEAD 174e8ac0272d
conchuod/fixes_present success Fixes tag not required for -next series
conchuod/maintainers_pattern success MAINTAINERS pattern errors before the patch: 4 and now 4
conchuod/verify_signedoff success Signed-off-by tag matches author and committer
conchuod/kdoc success Errors and warnings before: 0 this patch: 0
conchuod/build_rv64_clang_allmodconfig success Errors and warnings before: 2810 this patch: 2810
conchuod/module_param success Was 0 now: 0
conchuod/build_rv64_gcc_allmodconfig fail Errors and warnings before: 15877 this patch: 15878
conchuod/build_rv32_defconfig success Build OK
conchuod/dtb_warn_rv64 success Errors and warnings before: 12 this patch: 12
conchuod/header_inline success No static functions without inline keyword in header files
conchuod/checkpatch warning CHECK: Consider using #include <linux/cpufeature.h> instead of <asm/cpufeature.h>
conchuod/build_rv64_nommu_k210_defconfig success Build OK
conchuod/verify_fixes success No Fixes tag
conchuod/build_rv64_nommu_virt_defconfig success Build OK

Commit Message

Andrew Jones Aug. 9, 2023, 11:55 a.m. UTC
When Zicboz is present, enable its instruction (cbo.zero) in
usermode by setting its respective senvcfg bit. We don't bother
trying to set this bit per-task, which would also require an
interface for tasks to request enabling and/or disabling. Instead,
permanently set the bit for each hart which has the extension when
bringing it online.

Signed-off-by: Andrew Jones <ajones@ventanamicro.com>
---
 arch/riscv/include/asm/cpufeature.h |  2 ++
 arch/riscv/include/asm/csr.h        |  1 +
 arch/riscv/include/asm/hwcap.h      | 16 ++++++++++++++++
 arch/riscv/kernel/cpufeature.c      |  6 ++++++
 arch/riscv/kernel/setup.c           |  4 ++++
 arch/riscv/kernel/smpboot.c         |  4 ++++
 6 files changed, 33 insertions(+)

Comments

Evan Green Aug. 9, 2023, 4 p.m. UTC | #1
On Wed, Aug 9, 2023 at 4:55 AM Andrew Jones <ajones@ventanamicro.com> wrote:
>
> When Zicboz is present, enable its instruction (cbo.zero) in
> usermode by setting its respective senvcfg bit. We don't bother
> trying to set this bit per-task, which would also require an
> interface for tasks to request enabling and/or disabling. Instead,
> permanently set the bit for each hart which has the extension when
> bringing it online.
>
> Signed-off-by: Andrew Jones <ajones@ventanamicro.com>
> ---
>  arch/riscv/include/asm/cpufeature.h |  2 ++
>  arch/riscv/include/asm/csr.h        |  1 +
>  arch/riscv/include/asm/hwcap.h      | 16 ++++++++++++++++
>  arch/riscv/kernel/cpufeature.c      |  6 ++++++
>  arch/riscv/kernel/setup.c           |  4 ++++
>  arch/riscv/kernel/smpboot.c         |  4 ++++
>  6 files changed, 33 insertions(+)
>
> diff --git a/arch/riscv/include/asm/cpufeature.h b/arch/riscv/include/asm/cpufeature.h
> index 23fed53b8815..788fd575c21a 100644
> --- a/arch/riscv/include/asm/cpufeature.h
> +++ b/arch/riscv/include/asm/cpufeature.h
> @@ -30,4 +30,6 @@ DECLARE_PER_CPU(long, misaligned_access_speed);
>  /* Per-cpu ISA extensions. */
>  extern struct riscv_isainfo hart_isa[NR_CPUS];
>
> +void riscv_user_isa_enable(void);
> +
>  #endif
> diff --git a/arch/riscv/include/asm/csr.h b/arch/riscv/include/asm/csr.h
> index 7bac43a3176e..e187e76e3df4 100644
> --- a/arch/riscv/include/asm/csr.h
> +++ b/arch/riscv/include/asm/csr.h
> @@ -273,6 +273,7 @@
>  #define CSR_SIE                        0x104
>  #define CSR_STVEC              0x105
>  #define CSR_SCOUNTEREN         0x106
> +#define CSR_SENVCFG            0x10a
>  #define CSR_SSCRATCH           0x140
>  #define CSR_SEPC               0x141
>  #define CSR_SCAUSE             0x142
> diff --git a/arch/riscv/include/asm/hwcap.h b/arch/riscv/include/asm/hwcap.h
> index f041bfa7f6a0..4929faecb75f 100644
> --- a/arch/riscv/include/asm/hwcap.h
> +++ b/arch/riscv/include/asm/hwcap.h
> @@ -66,6 +66,7 @@
>  #ifndef __ASSEMBLY__
>
>  #include <linux/jump_label.h>
> +#include <asm/cpufeature.h>
>
>  unsigned long riscv_get_elf_hwcap(void);
>
> @@ -130,6 +131,21 @@ riscv_has_extension_unlikely(const unsigned long ext)
>         return true;
>  }
>
> +static __always_inline bool riscv_this_cpu_has_extension_likely(const unsigned long ext)
> +{
> +       if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE) && riscv_has_extension_likely(ext))
> +               return true;
> +
> +       return __riscv_isa_extension_available(hart_isa[smp_processor_id()].isa, ext);
> +}
> +
> +static __always_inline bool riscv_this_cpu_has_extension_unlikely(const unsigned long ext)
> +{
> +       if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE) && riscv_has_extension_unlikely(ext))
> +               return true;
> +
> +       return __riscv_isa_extension_available(hart_isa[smp_processor_id()].isa, ext);
> +}

Another way to do this would be to add a parameter to
riscv_has_extension_*() (as there are very few users), then these new
functions can turn around and call those with the new parameter set to
hart_isa[smp_processor_id()].isa. It's a tossup, so up to you. The
only advantage to it I can argue is it keeps the code flows more
unified.

-Evan
Andrew Jones Aug. 9, 2023, 4:58 p.m. UTC | #2
On Wed, Aug 09, 2023 at 09:00:35AM -0700, Evan Green wrote:
> On Wed, Aug 9, 2023 at 4:55 AM Andrew Jones <ajones@ventanamicro.com> wrote:
...
> > +static __always_inline bool riscv_this_cpu_has_extension_likely(const unsigned long ext)
> > +{
> > +       if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE) && riscv_has_extension_likely(ext))
> > +               return true;
> > +
> > +       return __riscv_isa_extension_available(hart_isa[smp_processor_id()].isa, ext);
> > +}
> > +
> > +static __always_inline bool riscv_this_cpu_has_extension_unlikely(const unsigned long ext)
> > +{
> > +       if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE) && riscv_has_extension_unlikely(ext))
> > +               return true;
> > +
> > +       return __riscv_isa_extension_available(hart_isa[smp_processor_id()].isa, ext);
> > +}
> 
> Another way to do this would be to add a parameter to
> riscv_has_extension_*() (as there are very few users), then these new
> functions can turn around and call those with the new parameter set to
> hart_isa[smp_processor_id()].isa. It's a tossup, so up to you. The
> only advantage to it I can argue is it keeps the code flows more
> unified.
>

I like unification, but I think I'd prefer we create wrappers and
try to avoid callers needing to construct hart_isa[].isa parameters
themselves. I'm also not a big fan of the NULL parameter needed when
riscv_isa_extension_available() is invoked for the riscv_isa bitmap.
So we need:

  1. check if an extension is in riscv_isa
  2. check if an extension is in a bitmap provided by the caller
  3. check if an extension is in this cpu's isa bitmap
  4. check if an extension is in the isa bitmap of a cpu provided by the
     caller

The only one we can optimize with alternatives is (1), so it definitely
gets wrappers (riscv_has_extension_likely/unlikely()). (3) and (4) can
also get wrappers which first try the optimized (1), like I have above.
Actually (3)'s wrapper could be based on (4)'s, or only provide wrappers
for (4)

 static __always_inline bool riscv_cpu_has_extension_likely(int cpu, const unsigned long ext)
 {
     if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE) && riscv_has_extension_likely(ext))
         return true;

     return __riscv_isa_extension_available(hart_isa[cpu].isa, ext);
 }

 static __always_inline bool riscv_cpu_has_extension_unlikely(int cpu, const unsigned long ext)
 {
     if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE) && riscv_has_extension_unlikely(ext))
         return true;

     return __riscv_isa_extension_available(hart_isa[cpu].isa, ext);
 }

and then use smp_processor_id() directly in the callers that need
to check this_cpu's extensions.

For case (2), I'd advocate we rename __riscv_isa_extension_available() to
riscv_has_extension() and drop the riscv_isa_extension_available() macro
in order to avoid having some calls with RISCV_ISA_EXT_* spelled out and
others that rely on the pasting. And, ideally, we'd convert most
riscv_has_extension(NULL, ext) calls to riscv_has_extension_[un]likely().

Thanks,
drew
Conor Dooley Aug. 9, 2023, 6:12 p.m. UTC | #3
On Wed, Aug 09, 2023 at 06:58:15PM +0200, Andrew Jones wrote:
> On Wed, Aug 09, 2023 at 09:00:35AM -0700, Evan Green wrote:
> > On Wed, Aug 9, 2023 at 4:55 AM Andrew Jones <ajones@ventanamicro.com> wrote:
> ...
> > > +static __always_inline bool riscv_this_cpu_has_extension_likely(const unsigned long ext)
> > > +{
> > > +       if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE) && riscv_has_extension_likely(ext))
> > > +               return true;
> > > +
> > > +       return __riscv_isa_extension_available(hart_isa[smp_processor_id()].isa, ext);
> > > +}
> > > +
> > > +static __always_inline bool riscv_this_cpu_has_extension_unlikely(const unsigned long ext)
> > > +{
> > > +       if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE) && riscv_has_extension_unlikely(ext))
> > > +               return true;
> > > +
> > > +       return __riscv_isa_extension_available(hart_isa[smp_processor_id()].isa, ext);
> > > +}
> > 
> > Another way to do this would be to add a parameter to
> > riscv_has_extension_*() (as there are very few users), then these new
> > functions can turn around and call those with the new parameter set to
> > hart_isa[smp_processor_id()].isa. It's a tossup, so up to you. The
> > only advantage to it I can argue is it keeps the code flows more
> > unified.
> >
> 
> I like unification, but I think I'd prefer we create wrappers and
> try to avoid callers needing to construct hart_isa[].isa parameters
> themselves. I'm also not a big fan of the NULL parameter needed when
> riscv_isa_extension_available() is invoked for the riscv_isa bitmap.
> So we need:
> 
>   1. check if an extension is in riscv_isa
>   2. check if an extension is in a bitmap provided by the caller
>   3. check if an extension is in this cpu's isa bitmap
>   4. check if an extension is in the isa bitmap of a cpu provided by the
>      caller
> 
> The only one we can optimize with alternatives is (1), so it definitely
> gets wrappers (riscv_has_extension_likely/unlikely()). (3) and (4) can
> also get wrappers which first try the optimized (1), like I have above.
> Actually (3)'s wrapper could be based on (4)'s, or only provide wrappers
> for (4)
> 
>  static __always_inline bool riscv_cpu_has_extension_likely(int cpu, const unsigned long ext)
>  {
>      if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE) && riscv_has_extension_likely(ext))
>          return true;
> 
>      return __riscv_isa_extension_available(hart_isa[cpu].isa, ext);
>  }
> 
>  static __always_inline bool riscv_cpu_has_extension_unlikely(int cpu, const unsigned long ext)
>  {
>      if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE) && riscv_has_extension_unlikely(ext))

Why are you gating on CONFIG_RISCV_ALTERNATIVE here?

>          return true;
> 
>      return __riscv_isa_extension_available(hart_isa[cpu].isa, ext);
>  }
> 
> and then use smp_processor_id() directly in the callers that need
> to check this_cpu's extensions.
> 
> For case (2), I'd advocate we rename __riscv_isa_extension_available() to
> riscv_has_extension() and drop the riscv_isa_extension_available() macro
> in order to avoid having some calls with RISCV_ISA_EXT_* spelled out and
> others that rely on the pasting.

> And, ideally, we'd convert most
> riscv_has_extension(NULL, ext) calls to riscv_has_extension_[un]likely().

> I'm also not a big fan of the NULL parameter needed when
> riscv_isa_extension_available() is invoked for the riscv_isa bitmap

Rather than actually act on my concerns about
__riscv_isa_extension_available(), I've been busy devoting my spare
time to playing MMOs with the excuse of not wanting to fiddle further
with cpufeature.c et al until Palmer merged the new DT property stuff,
but splitting out your case 1 above seems like it would really help
there. The NULL argument case is the one I think has the potential to
be a footgun in the face of config options.
Split out we can document that purpose of each function & hopefully
have one set of functions that deals with "this extension was detected
to be present in the hardware" and one that does "this extension was
detected & supported by this particular kernel".

I'll try to take a proper look at this series tomorrow :)

Cheers,
Conor.
Evan Green Aug. 9, 2023, 7:40 p.m. UTC | #4
On Wed, Aug 9, 2023 at 9:58 AM Andrew Jones <ajones@ventanamicro.com> wrote:
>
> On Wed, Aug 09, 2023 at 09:00:35AM -0700, Evan Green wrote:
> > On Wed, Aug 9, 2023 at 4:55 AM Andrew Jones <ajones@ventanamicro.com> wrote:
> ...
> > > +static __always_inline bool riscv_this_cpu_has_extension_likely(const unsigned long ext)
> > > +{
> > > +       if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE) && riscv_has_extension_likely(ext))
> > > +               return true;
> > > +
> > > +       return __riscv_isa_extension_available(hart_isa[smp_processor_id()].isa, ext);
> > > +}
> > > +
> > > +static __always_inline bool riscv_this_cpu_has_extension_unlikely(const unsigned long ext)
> > > +{
> > > +       if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE) && riscv_has_extension_unlikely(ext))
> > > +               return true;
> > > +
> > > +       return __riscv_isa_extension_available(hart_isa[smp_processor_id()].isa, ext);
> > > +}
> >
> > Another way to do this would be to add a parameter to
> > riscv_has_extension_*() (as there are very few users), then these new
> > functions can turn around and call those with the new parameter set to
> > hart_isa[smp_processor_id()].isa. It's a tossup, so up to you. The
> > only advantage to it I can argue is it keeps the code flows more
> > unified.
> >
>
> I like unification, but I think I'd prefer we create wrappers and
> try to avoid callers needing to construct hart_isa[].isa parameters
> themselves. I'm also not a big fan of the NULL parameter needed when
> riscv_isa_extension_available() is invoked for the riscv_isa bitmap.
> So we need:
>
>   1. check if an extension is in riscv_isa
>   2. check if an extension is in a bitmap provided by the caller
>   3. check if an extension is in this cpu's isa bitmap
>   4. check if an extension is in the isa bitmap of a cpu provided by the
>      caller
>
> The only one we can optimize with alternatives is (1), so it definitely
> gets wrappers (riscv_has_extension_likely/unlikely()). (3) and (4) can
> also get wrappers which first try the optimized (1), like I have above.
> Actually (3)'s wrapper could be based on (4)'s, or only provide wrappers
> for (4)
>
>  static __always_inline bool riscv_cpu_has_extension_likely(int cpu, const unsigned long ext)
>  {
>      if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE) && riscv_has_extension_likely(ext))
>          return true;
>
>      return __riscv_isa_extension_available(hart_isa[cpu].isa, ext);
>  }
>
>  static __always_inline bool riscv_cpu_has_extension_unlikely(int cpu, const unsigned long ext)
>  {
>      if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE) && riscv_has_extension_unlikely(ext))
>          return true;
>
>      return __riscv_isa_extension_available(hart_isa[cpu].isa, ext);
>  }
>
> and then use smp_processor_id() directly in the callers that need
> to check this_cpu's extensions.
>
> For case (2), I'd advocate we rename __riscv_isa_extension_available() to
> riscv_has_extension() and drop the riscv_isa_extension_available() macro
> in order to avoid having some calls with RISCV_ISA_EXT_* spelled out and
> others that rely on the pasting. And, ideally, we'd convert most
> riscv_has_extension(NULL, ext) calls to riscv_has_extension_[un]likely().

Sounds ok to me!
-Evan
Andrew Jones Aug. 10, 2023, 7:31 a.m. UTC | #5
On Wed, Aug 09, 2023 at 07:12:58PM +0100, Conor Dooley wrote:
> On Wed, Aug 09, 2023 at 06:58:15PM +0200, Andrew Jones wrote:
> > On Wed, Aug 09, 2023 at 09:00:35AM -0700, Evan Green wrote:
> > > On Wed, Aug 9, 2023 at 4:55 AM Andrew Jones <ajones@ventanamicro.com> wrote:
> > ...
> > > > +static __always_inline bool riscv_this_cpu_has_extension_likely(const unsigned long ext)
> > > > +{
> > > > +       if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE) && riscv_has_extension_likely(ext))
> > > > +               return true;
> > > > +
> > > > +       return __riscv_isa_extension_available(hart_isa[smp_processor_id()].isa, ext);
> > > > +}
> > > > +
> > > > +static __always_inline bool riscv_this_cpu_has_extension_unlikely(const unsigned long ext)
> > > > +{
> > > > +       if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE) && riscv_has_extension_unlikely(ext))
> > > > +               return true;
> > > > +
> > > > +       return __riscv_isa_extension_available(hart_isa[smp_processor_id()].isa, ext);
> > > > +}
> > > 
> > > Another way to do this would be to add a parameter to
> > > riscv_has_extension_*() (as there are very few users), then these new
> > > functions can turn around and call those with the new parameter set to
> > > hart_isa[smp_processor_id()].isa. It's a tossup, so up to you. The
> > > only advantage to it I can argue is it keeps the code flows more
> > > unified.
> > >
> > 
> > I like unification, but I think I'd prefer we create wrappers and
> > try to avoid callers needing to construct hart_isa[].isa parameters
> > themselves. I'm also not a big fan of the NULL parameter needed when
> > riscv_isa_extension_available() is invoked for the riscv_isa bitmap.
> > So we need:
> > 
> >   1. check if an extension is in riscv_isa
> >   2. check if an extension is in a bitmap provided by the caller
> >   3. check if an extension is in this cpu's isa bitmap
> >   4. check if an extension is in the isa bitmap of a cpu provided by the
> >      caller
> > 
> > The only one we can optimize with alternatives is (1), so it definitely
> > gets wrappers (riscv_has_extension_likely/unlikely()). (3) and (4) can
> > also get wrappers which first try the optimized (1), like I have above.
> > Actually (3)'s wrapper could be based on (4)'s, or only provide wrappers
> > for (4)
> > 
> >  static __always_inline bool riscv_cpu_has_extension_likely(int cpu, const unsigned long ext)
> >  {
> >      if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE) && riscv_has_extension_likely(ext))
> >          return true;
> > 
> >      return __riscv_isa_extension_available(hart_isa[cpu].isa, ext);
> >  }
> > 
> >  static __always_inline bool riscv_cpu_has_extension_unlikely(int cpu, const unsigned long ext)
> >  {
> >      if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE) && riscv_has_extension_unlikely(ext))
> 
> Why are you gating on CONFIG_RISCV_ALTERNATIVE here?

This ensures we remove the riscv_has_extension_[un]likely() call
when that call would end up using its
__riscv_isa_extension_available(NULL, ext) fallback. If that fallback
where to return false, then we'd still need to make the
__riscv_isa_extension_available(hart_isa[cpu].isa, ext) call, doubling
the cost. Whereas, when we gate on CONFIG_RISCV_ALTERNATIVE, we know that
riscv_has_extension_[un]likely() will use an alternative to check the
global set of extensions. When the extension is there, the compiler
ensures that everything vanishes. When it's not, we'll fallback to a
single search of the cpu's isa bitmap.

> 
> >          return true;
> > 
> >      return __riscv_isa_extension_available(hart_isa[cpu].isa, ext);
> >  }
> > 
> > and then use smp_processor_id() directly in the callers that need
> > to check this_cpu's extensions.
> > 
> > For case (2), I'd advocate we rename __riscv_isa_extension_available() to
> > riscv_has_extension() and drop the riscv_isa_extension_available() macro
> > in order to avoid having some calls with RISCV_ISA_EXT_* spelled out and
> > others that rely on the pasting.
> 
> > And, ideally, we'd convert most
> > riscv_has_extension(NULL, ext) calls to riscv_has_extension_[un]likely().
> 
> > I'm also not a big fan of the NULL parameter needed when
> > riscv_isa_extension_available() is invoked for the riscv_isa bitmap
> 
> Rather than actually act on my concerns about
> __riscv_isa_extension_available(), I've been busy devoting my spare
> time to playing MMOs with the excuse of not wanting to fiddle further
> with cpufeature.c et al until Palmer merged the new DT property stuff,
> but splitting out your case 1 above seems like it would really help
> there. The NULL argument case is the one I think has the potential to
> be a footgun in the face of config options.
> Split out we can document that purpose of each function & hopefully
> have one set of functions that deals with "this extension was detected
> to be present in the hardware" and one that does "this extension was
> detected & supported by this particular kernel".

Sounds good to me!

> 
> I'll try to take a proper look at this series tomorrow :)
>

Thanks!
drew
Conor Dooley Aug. 10, 2023, 9:34 a.m. UTC | #6
On Thu, Aug 10, 2023 at 09:31:54AM +0200, Andrew Jones wrote:
> On Wed, Aug 09, 2023 at 07:12:58PM +0100, Conor Dooley wrote:
> > On Wed, Aug 09, 2023 at 06:58:15PM +0200, Andrew Jones wrote:
> > > On Wed, Aug 09, 2023 at 09:00:35AM -0700, Evan Green wrote:
> > > > On Wed, Aug 9, 2023 at 4:55 AM Andrew Jones <ajones@ventanamicro.com> wrote:
> > > ...
> > > > > +static __always_inline bool riscv_this_cpu_has_extension_likely(const unsigned long ext)
> > > > > +{
> > > > > +       if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE) && riscv_has_extension_likely(ext))
> > > > > +               return true;
> > > > > +
> > > > > +       return __riscv_isa_extension_available(hart_isa[smp_processor_id()].isa, ext);
> > > > > +}
> > > > > +
> > > > > +static __always_inline bool riscv_this_cpu_has_extension_unlikely(const unsigned long ext)
> > > > > +{
> > > > > +       if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE) && riscv_has_extension_unlikely(ext))
> > > > > +               return true;
> > > > > +
> > > > > +       return __riscv_isa_extension_available(hart_isa[smp_processor_id()].isa, ext);
> > > > > +}
> > > > 
> > > > Another way to do this would be to add a parameter to
> > > > riscv_has_extension_*() (as there are very few users), then these new
> > > > functions can turn around and call those with the new parameter set to
> > > > hart_isa[smp_processor_id()].isa. It's a tossup, so up to you. The
> > > > only advantage to it I can argue is it keeps the code flows more
> > > > unified.
> > > >
> > > 
> > > I like unification, but I think I'd prefer we create wrappers and
> > > try to avoid callers needing to construct hart_isa[].isa parameters
> > > themselves. I'm also not a big fan of the NULL parameter needed when
> > > riscv_isa_extension_available() is invoked for the riscv_isa bitmap.
> > > So we need:
> > > 
> > >   1. check if an extension is in riscv_isa
> > >   2. check if an extension is in a bitmap provided by the caller
> > >   3. check if an extension is in this cpu's isa bitmap
> > >   4. check if an extension is in the isa bitmap of a cpu provided by the
> > >      caller
> > > 
> > > The only one we can optimize with alternatives is (1), so it definitely
> > > gets wrappers (riscv_has_extension_likely/unlikely()). (3) and (4) can
> > > also get wrappers which first try the optimized (1), like I have above.
> > > Actually (3)'s wrapper could be based on (4)'s, or only provide wrappers
> > > for (4)
> > > 
> > >  static __always_inline bool riscv_cpu_has_extension_likely(int cpu, const unsigned long ext)
> > >  {
> > >      if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE) && riscv_has_extension_likely(ext))
> > >          return true;
> > > 
> > >      return __riscv_isa_extension_available(hart_isa[cpu].isa, ext);
> > >  }
> > > 
> > >  static __always_inline bool riscv_cpu_has_extension_unlikely(int cpu, const unsigned long ext)
> > >  {
> > >      if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE) && riscv_has_extension_unlikely(ext))
> > 
> > Why are you gating on CONFIG_RISCV_ALTERNATIVE here?
> 
> This ensures we remove the riscv_has_extension_[un]likely() call
> when that call would end up using its
> __riscv_isa_extension_available(NULL, ext) fallback. If that fallback
> where to return false, then we'd still need to make the
> __riscv_isa_extension_available(hart_isa[cpu].isa, ext) call, doubling
> the cost. Whereas, when we gate on CONFIG_RISCV_ALTERNATIVE, we know that
> riscv_has_extension_[un]likely() will use an alternative to check the
> global set of extensions. When the extension is there, the compiler
> ensures that everything vanishes. When it's not, we'll fallback to a
> single search of the cpu's isa bitmap.

Right, that is what I suspected that you were trying to accomplish here.
I was not just not entirely sure whether it was or you'd just missed the
fallback path. In my original mail I was just going to say "Please add a
comment here as to why you want to avoid the fallback", but figured I
should figure out your intent first!

Just to note, alternatives are available on all !XIP kernels now, so
it's only in the case that the fallback path will be activated.

> > >          return true;
> > > 
> > >      return __riscv_isa_extension_available(hart_isa[cpu].isa, ext);
> > >  }
> > > 
> > > and then use smp_processor_id() directly in the callers that need
> > > to check this_cpu's extensions.
> > > 
> > > For case (2), I'd advocate we rename __riscv_isa_extension_available() to
> > > riscv_has_extension() and drop the riscv_isa_extension_available() macro
> > > in order to avoid having some calls with RISCV_ISA_EXT_* spelled out and
> > > others that rely on the pasting.
> > 
> > > And, ideally, we'd convert most
> > > riscv_has_extension(NULL, ext) calls to riscv_has_extension_[un]likely().
> > 
> > > I'm also not a big fan of the NULL parameter needed when
> > > riscv_isa_extension_available() is invoked for the riscv_isa bitmap
> > 
> > Rather than actually act on my concerns about
> > __riscv_isa_extension_available(), I've been busy devoting my spare
> > time to playing MMOs with the excuse of not wanting to fiddle further
> > with cpufeature.c et al until Palmer merged the new DT property stuff,
> > but splitting out your case 1 above seems like it would really help
> > there. The NULL argument case is the one I think has the potential to
> > be a footgun in the face of config options.
> > Split out we can document that purpose of each function & hopefully
> > have one set of functions that deals with "this extension was detected
> > to be present in the hardware" and one that does "this extension was
> > detected & supported by this particular kernel".
> 
> Sounds good to me!

I figure said change should be independent of what's going on in this
series?
Andrew Jones Aug. 10, 2023, 10:54 a.m. UTC | #7
On Thu, Aug 10, 2023 at 10:34:33AM +0100, Conor Dooley wrote:
> On Thu, Aug 10, 2023 at 09:31:54AM +0200, Andrew Jones wrote:
> > On Wed, Aug 09, 2023 at 07:12:58PM +0100, Conor Dooley wrote:
> > > On Wed, Aug 09, 2023 at 06:58:15PM +0200, Andrew Jones wrote:
> > > > On Wed, Aug 09, 2023 at 09:00:35AM -0700, Evan Green wrote:
> > > > > On Wed, Aug 9, 2023 at 4:55 AM Andrew Jones <ajones@ventanamicro.com> wrote:
> > > > ...
> > > > > > +static __always_inline bool riscv_this_cpu_has_extension_likely(const unsigned long ext)
> > > > > > +{
> > > > > > +       if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE) && riscv_has_extension_likely(ext))
> > > > > > +               return true;
> > > > > > +
> > > > > > +       return __riscv_isa_extension_available(hart_isa[smp_processor_id()].isa, ext);
> > > > > > +}
> > > > > > +
> > > > > > +static __always_inline bool riscv_this_cpu_has_extension_unlikely(const unsigned long ext)
> > > > > > +{
> > > > > > +       if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE) && riscv_has_extension_unlikely(ext))
> > > > > > +               return true;
> > > > > > +
> > > > > > +       return __riscv_isa_extension_available(hart_isa[smp_processor_id()].isa, ext);
> > > > > > +}
> > > > > 
> > > > > Another way to do this would be to add a parameter to
> > > > > riscv_has_extension_*() (as there are very few users), then these new
> > > > > functions can turn around and call those with the new parameter set to
> > > > > hart_isa[smp_processor_id()].isa. It's a tossup, so up to you. The
> > > > > only advantage to it I can argue is it keeps the code flows more
> > > > > unified.
> > > > >
> > > > 
> > > > I like unification, but I think I'd prefer we create wrappers and
> > > > try to avoid callers needing to construct hart_isa[].isa parameters
> > > > themselves. I'm also not a big fan of the NULL parameter needed when
> > > > riscv_isa_extension_available() is invoked for the riscv_isa bitmap.
> > > > So we need:
> > > > 
> > > >   1. check if an extension is in riscv_isa
> > > >   2. check if an extension is in a bitmap provided by the caller
> > > >   3. check if an extension is in this cpu's isa bitmap
> > > >   4. check if an extension is in the isa bitmap of a cpu provided by the
> > > >      caller
> > > > 
> > > > The only one we can optimize with alternatives is (1), so it definitely
> > > > gets wrappers (riscv_has_extension_likely/unlikely()). (3) and (4) can
> > > > also get wrappers which first try the optimized (1), like I have above.
> > > > Actually (3)'s wrapper could be based on (4)'s, or only provide wrappers
> > > > for (4)
> > > > 
> > > >  static __always_inline bool riscv_cpu_has_extension_likely(int cpu, const unsigned long ext)
> > > >  {
> > > >      if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE) && riscv_has_extension_likely(ext))
> > > >          return true;
> > > > 
> > > >      return __riscv_isa_extension_available(hart_isa[cpu].isa, ext);
> > > >  }
> > > > 
> > > >  static __always_inline bool riscv_cpu_has_extension_unlikely(int cpu, const unsigned long ext)
> > > >  {
> > > >      if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE) && riscv_has_extension_unlikely(ext))
> > > 
> > > Why are you gating on CONFIG_RISCV_ALTERNATIVE here?
> > 
> > This ensures we remove the riscv_has_extension_[un]likely() call
> > when that call would end up using its
> > __riscv_isa_extension_available(NULL, ext) fallback. If that fallback
> > where to return false, then we'd still need to make the
> > __riscv_isa_extension_available(hart_isa[cpu].isa, ext) call, doubling
> > the cost. Whereas, when we gate on CONFIG_RISCV_ALTERNATIVE, we know that
> > riscv_has_extension_[un]likely() will use an alternative to check the
> > global set of extensions. When the extension is there, the compiler
> > ensures that everything vanishes. When it's not, we'll fallback to a
> > single search of the cpu's isa bitmap.
> 
> Right, that is what I suspected that you were trying to accomplish here.
> I was not just not entirely sure whether it was or you'd just missed the
> fallback path. In my original mail I was just going to say "Please add a
> comment here as to why you want to avoid the fallback", but figured I
> should figure out your intent first!

Thanks, I'll add a comment for v2.

> 
> Just to note, alternatives are available on all !XIP kernels now, so
> it's only in the case that the fallback path will be activated.
> 
> > > >          return true;
> > > > 
> > > >      return __riscv_isa_extension_available(hart_isa[cpu].isa, ext);
> > > >  }
> > > > 
> > > > and then use smp_processor_id() directly in the callers that need
> > > > to check this_cpu's extensions.
> > > > 
> > > > For case (2), I'd advocate we rename __riscv_isa_extension_available() to
> > > > riscv_has_extension() and drop the riscv_isa_extension_available() macro
> > > > in order to avoid having some calls with RISCV_ISA_EXT_* spelled out and
> > > > others that rely on the pasting.
> > > 
> > > > And, ideally, we'd convert most
> > > > riscv_has_extension(NULL, ext) calls to riscv_has_extension_[un]likely().
> > > 
> > > > I'm also not a big fan of the NULL parameter needed when
> > > > riscv_isa_extension_available() is invoked for the riscv_isa bitmap
> > > 
> > > Rather than actually act on my concerns about
> > > __riscv_isa_extension_available(), I've been busy devoting my spare
> > > time to playing MMOs with the excuse of not wanting to fiddle further
> > > with cpufeature.c et al until Palmer merged the new DT property stuff,
> > > but splitting out your case 1 above seems like it would really help
> > > there. The NULL argument case is the one I think has the potential to
> > > be a footgun in the face of config options.
> > > Split out we can document that purpose of each function & hopefully
> > > have one set of functions that deals with "this extension was detected
> > > to be present in the hardware" and one that does "this extension was
> > > detected & supported by this particular kernel".
> > 
> > Sounds good to me!
> 
> I figure said change should be independent of what's going on in this
> series?

Yeah, I considered doing it as part of this series, but then decided to
only start down the path with two new wrappers, which, for v2, I'll
change to the cpu parameter taking variant. The rest of the rework
would probably be best to do as a separate series, which I can start
soon.

Thanks,
drew
Conor Dooley Aug. 10, 2023, 1:23 p.m. UTC | #8
On Thu, Aug 10, 2023 at 12:54:56PM +0200, Andrew Jones wrote:
> On Thu, Aug 10, 2023 at 10:34:33AM +0100, Conor Dooley wrote:


> > I figure said change should be independent of what's going on in this
> > series?
> 
> Yeah, I considered doing it as part of this series, but then decided to
> only start down the path with two new wrappers, which, for v2, I'll
> change to the cpu parameter taking variant. The rest of the rework
> would probably be best to do as a separate series, which I can start
> soon.

If you're offering, who am I to refuse!
diff mbox series

Patch

diff --git a/arch/riscv/include/asm/cpufeature.h b/arch/riscv/include/asm/cpufeature.h
index 23fed53b8815..788fd575c21a 100644
--- a/arch/riscv/include/asm/cpufeature.h
+++ b/arch/riscv/include/asm/cpufeature.h
@@ -30,4 +30,6 @@  DECLARE_PER_CPU(long, misaligned_access_speed);
 /* Per-cpu ISA extensions. */
 extern struct riscv_isainfo hart_isa[NR_CPUS];
 
+void riscv_user_isa_enable(void);
+
 #endif
diff --git a/arch/riscv/include/asm/csr.h b/arch/riscv/include/asm/csr.h
index 7bac43a3176e..e187e76e3df4 100644
--- a/arch/riscv/include/asm/csr.h
+++ b/arch/riscv/include/asm/csr.h
@@ -273,6 +273,7 @@ 
 #define CSR_SIE			0x104
 #define CSR_STVEC		0x105
 #define CSR_SCOUNTEREN		0x106
+#define CSR_SENVCFG		0x10a
 #define CSR_SSCRATCH		0x140
 #define CSR_SEPC		0x141
 #define CSR_SCAUSE		0x142
diff --git a/arch/riscv/include/asm/hwcap.h b/arch/riscv/include/asm/hwcap.h
index f041bfa7f6a0..4929faecb75f 100644
--- a/arch/riscv/include/asm/hwcap.h
+++ b/arch/riscv/include/asm/hwcap.h
@@ -66,6 +66,7 @@ 
 #ifndef __ASSEMBLY__
 
 #include <linux/jump_label.h>
+#include <asm/cpufeature.h>
 
 unsigned long riscv_get_elf_hwcap(void);
 
@@ -130,6 +131,21 @@  riscv_has_extension_unlikely(const unsigned long ext)
 	return true;
 }
 
+static __always_inline bool riscv_this_cpu_has_extension_likely(const unsigned long ext)
+{
+	if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE) && riscv_has_extension_likely(ext))
+		return true;
+
+	return __riscv_isa_extension_available(hart_isa[smp_processor_id()].isa, ext);
+}
+
+static __always_inline bool riscv_this_cpu_has_extension_unlikely(const unsigned long ext)
+{
+	if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE) && riscv_has_extension_unlikely(ext))
+		return true;
+
+	return __riscv_isa_extension_available(hart_isa[smp_processor_id()].isa, ext);
+}
 #endif
 
 #endif /* _ASM_RISCV_HWCAP_H */
diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c
index 31843e9cc80c..fc0bf300acc7 100644
--- a/arch/riscv/kernel/cpufeature.c
+++ b/arch/riscv/kernel/cpufeature.c
@@ -391,6 +391,12 @@  unsigned long riscv_get_elf_hwcap(void)
 	return hwcap;
 }
 
+void riscv_user_isa_enable(void)
+{
+	if (riscv_this_cpu_has_extension_unlikely(RISCV_ISA_EXT_ZICBOZ))
+		csr_set(CSR_SENVCFG, ENVCFG_CBZE);
+}
+
 #ifdef CONFIG_RISCV_ALTERNATIVE
 /*
  * Alternative patch sites consider 48 bits when determining when to patch
diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c
index 971fe776e2f8..2f053f0763a1 100644
--- a/arch/riscv/kernel/setup.c
+++ b/arch/riscv/kernel/setup.c
@@ -25,6 +25,7 @@ 
 #include <asm/acpi.h>
 #include <asm/alternative.h>
 #include <asm/cacheflush.h>
+#include <asm/cpufeature.h>
 #include <asm/cpu_ops.h>
 #include <asm/early_ioremap.h>
 #include <asm/pgtable.h>
@@ -308,9 +309,12 @@  void __init setup_arch(char **cmdline_p)
 	riscv_fill_hwcap();
 	init_rt_signal_env();
 	apply_boot_alternatives();
+
 	if (IS_ENABLED(CONFIG_RISCV_ISA_ZICBOM) &&
 	    riscv_isa_extension_available(NULL, ZICBOM))
 		riscv_noncoherent_supported();
+
+	riscv_user_isa_enable();
 }
 
 static int __init topology_init(void)
diff --git a/arch/riscv/kernel/smpboot.c b/arch/riscv/kernel/smpboot.c
index f4d6acb38dd0..502b04abda0b 100644
--- a/arch/riscv/kernel/smpboot.c
+++ b/arch/riscv/kernel/smpboot.c
@@ -25,6 +25,8 @@ 
 #include <linux/of.h>
 #include <linux/sched/task_stack.h>
 #include <linux/sched/mm.h>
+
+#include <asm/cpufeature.h>
 #include <asm/cpu_ops.h>
 #include <asm/irq.h>
 #include <asm/mmu_context.h>
@@ -252,6 +254,8 @@  asmlinkage __visible void smp_callin(void)
 			elf_hwcap &= ~COMPAT_HWCAP_ISA_V;
 	}
 
+	riscv_user_isa_enable();
+
 	/*
 	 * Remote TLB flushes are ignored while the CPU is offline, so emit
 	 * a local TLB flush right now just in case.