Message ID | 20210512225545.6c23d51f@xhacker (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | [v2] riscv: Turn has_fpu into a static key if FPU=y | expand |
On Wed, 12 May 2021 07:55:45 PDT (-0700), jszhang3@mail.ustc.edu.cn wrote: > From: Jisheng Zhang <jszhang@kernel.org> > > The has_fpu check sits at hot code path: switch_to(). Currently, has_fpu > is a bool variable if FPU=y, switch_to() checks it each time, we can > optimize out this check by turning the has_fpu into a static key. > > Signed-off-by: Jisheng Zhang <jszhang@kernel.org> > --- > Since v1: > - use static_branch_likely() Sorry, I missed this one. I was actually considering pointing that out, but I wasn't sure likely was the way to go -- the old code had the save code out of line, and the unlikely one did too. That said, I'm probably just over-thinking it: most machines have FPUs, so having the branch as likely seems reasonable. I've taken this one instead. > - Improve the commit msg > > arch/riscv/include/asm/switch_to.h | 11 ++++++++--- > arch/riscv/kernel/cpufeature.c | 4 ++-- > arch/riscv/kernel/process.c | 2 +- > arch/riscv/kernel/signal.c | 4 ++-- > 4 files changed, 13 insertions(+), 8 deletions(-) > > diff --git a/arch/riscv/include/asm/switch_to.h b/arch/riscv/include/asm/switch_to.h > index 407bcc96a710..0a3f4f95c555 100644 > --- a/arch/riscv/include/asm/switch_to.h > +++ b/arch/riscv/include/asm/switch_to.h > @@ -6,6 +6,7 @@ > #ifndef _ASM_RISCV_SWITCH_TO_H > #define _ASM_RISCV_SWITCH_TO_H > > +#include <linux/jump_label.h> > #include <linux/sched/task_stack.h> > #include <asm/processor.h> > #include <asm/ptrace.h> > @@ -55,9 +56,13 @@ static inline void __switch_to_aux(struct task_struct *prev, > fstate_restore(next, task_pt_regs(next)); > } > > -extern bool has_fpu; > +extern struct static_key_false cpu_hwcap_fpu; > +static __always_inline bool has_fpu(void) > +{ > + return static_branch_likely(&cpu_hwcap_fpu); > +} > #else > -#define has_fpu false > +static __always_inline bool has_fpu(void) { return false; } > #define fstate_save(task, regs) do { } while (0) > #define fstate_restore(task, regs) do { } while (0) > #define __switch_to_aux(__prev, __next) do { } while (0) > @@ -70,7 +75,7 @@ extern struct task_struct *__switch_to(struct task_struct *, > do { \ > struct task_struct *__prev = (prev); \ > struct task_struct *__next = (next); \ > - if (has_fpu) \ > + if (has_fpu()) \ > __switch_to_aux(__prev, __next); \ > ((last) = __switch_to(__prev, __next)); \ > } while (0) > diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c > index ac202f44a670..a2848dc36927 100644 > --- a/arch/riscv/kernel/cpufeature.c > +++ b/arch/riscv/kernel/cpufeature.c > @@ -19,7 +19,7 @@ unsigned long elf_hwcap __read_mostly; > static DECLARE_BITMAP(riscv_isa, RISCV_ISA_EXT_MAX) __read_mostly; > > #ifdef CONFIG_FPU > -bool has_fpu __read_mostly; > +__ro_after_init DEFINE_STATIC_KEY_FALSE(cpu_hwcap_fpu); > #endif > > /** > @@ -146,6 +146,6 @@ void riscv_fill_hwcap(void) > > #ifdef CONFIG_FPU > if (elf_hwcap & (COMPAT_HWCAP_ISA_F | COMPAT_HWCAP_ISA_D)) > - has_fpu = true; > + static_branch_enable(&cpu_hwcap_fpu); > #endif > } > diff --git a/arch/riscv/kernel/process.c b/arch/riscv/kernel/process.c > index f9cd57c9c67d..03ac3aa611f5 100644 > --- a/arch/riscv/kernel/process.c > +++ b/arch/riscv/kernel/process.c > @@ -87,7 +87,7 @@ void start_thread(struct pt_regs *regs, unsigned long pc, > unsigned long sp) > { > regs->status = SR_PIE; > - if (has_fpu) { > + if (has_fpu()) { > regs->status |= SR_FS_INITIAL; > /* > * Restore the initial value to the FP register > diff --git a/arch/riscv/kernel/signal.c b/arch/riscv/kernel/signal.c > index 65942b3748b4..c2d5ecbe5526 100644 > --- a/arch/riscv/kernel/signal.c > +++ b/arch/riscv/kernel/signal.c > @@ -90,7 +90,7 @@ static long restore_sigcontext(struct pt_regs *regs, > /* sc_regs is structured the same as the start of pt_regs */ > err = __copy_from_user(regs, &sc->sc_regs, sizeof(sc->sc_regs)); > /* Restore the floating-point state. */ > - if (has_fpu) > + if (has_fpu()) > err |= restore_fp_state(regs, &sc->sc_fpregs); > return err; > } > @@ -143,7 +143,7 @@ static long setup_sigcontext(struct rt_sigframe __user *frame, > /* sc_regs is structured the same as the start of pt_regs */ > err = __copy_to_user(&sc->sc_regs, regs, sizeof(sc->sc_regs)); > /* Save the floating-point state. */ > - if (has_fpu) > + if (has_fpu()) > err |= save_fp_state(regs, &sc->sc_fpregs); > return err; > }
diff --git a/arch/riscv/include/asm/switch_to.h b/arch/riscv/include/asm/switch_to.h index 407bcc96a710..0a3f4f95c555 100644 --- a/arch/riscv/include/asm/switch_to.h +++ b/arch/riscv/include/asm/switch_to.h @@ -6,6 +6,7 @@ #ifndef _ASM_RISCV_SWITCH_TO_H #define _ASM_RISCV_SWITCH_TO_H +#include <linux/jump_label.h> #include <linux/sched/task_stack.h> #include <asm/processor.h> #include <asm/ptrace.h> @@ -55,9 +56,13 @@ static inline void __switch_to_aux(struct task_struct *prev, fstate_restore(next, task_pt_regs(next)); } -extern bool has_fpu; +extern struct static_key_false cpu_hwcap_fpu; +static __always_inline bool has_fpu(void) +{ + return static_branch_likely(&cpu_hwcap_fpu); +} #else -#define has_fpu false +static __always_inline bool has_fpu(void) { return false; } #define fstate_save(task, regs) do { } while (0) #define fstate_restore(task, regs) do { } while (0) #define __switch_to_aux(__prev, __next) do { } while (0) @@ -70,7 +75,7 @@ extern struct task_struct *__switch_to(struct task_struct *, do { \ struct task_struct *__prev = (prev); \ struct task_struct *__next = (next); \ - if (has_fpu) \ + if (has_fpu()) \ __switch_to_aux(__prev, __next); \ ((last) = __switch_to(__prev, __next)); \ } while (0) diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c index ac202f44a670..a2848dc36927 100644 --- a/arch/riscv/kernel/cpufeature.c +++ b/arch/riscv/kernel/cpufeature.c @@ -19,7 +19,7 @@ unsigned long elf_hwcap __read_mostly; static DECLARE_BITMAP(riscv_isa, RISCV_ISA_EXT_MAX) __read_mostly; #ifdef CONFIG_FPU -bool has_fpu __read_mostly; +__ro_after_init DEFINE_STATIC_KEY_FALSE(cpu_hwcap_fpu); #endif /** @@ -146,6 +146,6 @@ void riscv_fill_hwcap(void) #ifdef CONFIG_FPU if (elf_hwcap & (COMPAT_HWCAP_ISA_F | COMPAT_HWCAP_ISA_D)) - has_fpu = true; + static_branch_enable(&cpu_hwcap_fpu); #endif } diff --git a/arch/riscv/kernel/process.c b/arch/riscv/kernel/process.c index f9cd57c9c67d..03ac3aa611f5 100644 --- a/arch/riscv/kernel/process.c +++ b/arch/riscv/kernel/process.c @@ -87,7 +87,7 @@ void start_thread(struct pt_regs *regs, unsigned long pc, unsigned long sp) { regs->status = SR_PIE; - if (has_fpu) { + if (has_fpu()) { regs->status |= SR_FS_INITIAL; /* * Restore the initial value to the FP register diff --git a/arch/riscv/kernel/signal.c b/arch/riscv/kernel/signal.c index 65942b3748b4..c2d5ecbe5526 100644 --- a/arch/riscv/kernel/signal.c +++ b/arch/riscv/kernel/signal.c @@ -90,7 +90,7 @@ static long restore_sigcontext(struct pt_regs *regs, /* sc_regs is structured the same as the start of pt_regs */ err = __copy_from_user(regs, &sc->sc_regs, sizeof(sc->sc_regs)); /* Restore the floating-point state. */ - if (has_fpu) + if (has_fpu()) err |= restore_fp_state(regs, &sc->sc_fpregs); return err; } @@ -143,7 +143,7 @@ static long setup_sigcontext(struct rt_sigframe __user *frame, /* sc_regs is structured the same as the start of pt_regs */ err = __copy_to_user(&sc->sc_regs, regs, sizeof(sc->sc_regs)); /* Save the floating-point state. */ - if (has_fpu) + if (has_fpu()) err |= save_fp_state(regs, &sc->sc_fpregs); return err; }