diff mbox series

[V2,2/3] riscv: Add ARCH_HAS_PRETCHW support with Zibop

Message ID 20231231082955.16516-3-guoren@kernel.org (mailing list archive)
State Changes Requested, archived
Headers show
Series riscv: Add Zicbop & prefetchw support | expand

Checks

Context Check Description
conchuod/vmtest-fixes-PR fail merge-conflict

Commit Message

Guo Ren Dec. 31, 2023, 8:29 a.m. UTC
From: Guo Ren <guoren@linux.alibaba.com>

Enable Linux prefetchw primitive with Zibop cpufeature, which preloads
cache line into L1 cache for the next write operation.

Signed-off-by: Guo Ren <guoren@linux.alibaba.com>
Signed-off-by: Guo Ren <guoren@kernel.org>
---
 arch/riscv/include/asm/processor.h | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

Comments

Guo Ren Jan. 1, 2024, 2:29 a.m. UTC | #1
On Sun, Dec 31, 2023 at 4:30 PM <guoren@kernel.org> wrote:
>
> From: Guo Ren <guoren@linux.alibaba.com>
>
> Enable Linux prefetchw primitive with Zibop cpufeature, which preloads
> cache line into L1 cache for the next write operation.
>
> Signed-off-by: Guo Ren <guoren@linux.alibaba.com>
> Signed-off-by: Guo Ren <guoren@kernel.org>
> ---
>  arch/riscv/include/asm/processor.h | 16 ++++++++++++++++
>  1 file changed, 16 insertions(+)
>
> diff --git a/arch/riscv/include/asm/processor.h b/arch/riscv/include/asm/processor.h
> index f19f861cda54..8d3a2ab37678 100644
> --- a/arch/riscv/include/asm/processor.h
> +++ b/arch/riscv/include/asm/processor.h
> @@ -13,6 +13,9 @@
>  #include <vdso/processor.h>
>
>  #include <asm/ptrace.h>
> +#include <asm/insn-def.h>
> +#include <asm/alternative-macros.h>
> +#include <asm/hwcap.h>
>
>  #ifdef CONFIG_64BIT
>  #define DEFAULT_MAP_WINDOW     (UL(1) << (MMAP_VA_BITS - 1))
> @@ -106,6 +109,19 @@ static inline void arch_thread_struct_whitelist(unsigned long *offset,
>  #define KSTK_EIP(tsk)          (task_pt_regs(tsk)->epc)
>  #define KSTK_ESP(tsk)          (task_pt_regs(tsk)->sp)
>
> +#ifdef CONFIG_RISCV_ISA_ZICBOP
> +#define ARCH_HAS_PREFETCHW
> +
> +#define PREFETCHW_ASM(x)                                               \
> +       ALTERNATIVE(__nops(1), CBO_PREFETCH_W(x, 0), 0,                 \
> +                   RISCV_ISA_EXT_ZICBOP, CONFIG_RISCV_ISA_ZICBOP)
The PREFETCHW_ASM(x) definition should be out of "ifdef
CONFIG_RISCV_ISA_ZICBOP... #endif", because xchg_small may use this
macro without CONFIG_RISCV_ISA_ZICBOP.

> +
> +
> +static inline void prefetchw(const void *x)
> +{
> +       __asm__ __volatile__(PREFETCHW_ASM(%0) : : "r" (x) : "memory");
> +}
> +#endif /* CONFIG_RISCV_ISA_ZICBOP */
>
>  /* Do necessary setup to start up a newly executed thread. */
>  extern void start_thread(struct pt_regs *regs,
> --
> 2.40.1
>
Andrew Jones Jan. 2, 2024, 10:45 a.m. UTC | #2
s/Zibop/Zicbop/ <<<$SUBJECT

On Sun, Dec 31, 2023 at 03:29:52AM -0500, guoren@kernel.org wrote:
> From: Guo Ren <guoren@linux.alibaba.com>
> 
> Enable Linux prefetchw primitive with Zibop cpufeature, which preloads

Also s/Zibop/Zicbop/ here

> cache line into L1 cache for the next write operation.
> 
> Signed-off-by: Guo Ren <guoren@linux.alibaba.com>
> Signed-off-by: Guo Ren <guoren@kernel.org>
> ---
>  arch/riscv/include/asm/processor.h | 16 ++++++++++++++++
>  1 file changed, 16 insertions(+)
> 
> diff --git a/arch/riscv/include/asm/processor.h b/arch/riscv/include/asm/processor.h
> index f19f861cda54..8d3a2ab37678 100644
> --- a/arch/riscv/include/asm/processor.h
> +++ b/arch/riscv/include/asm/processor.h
> @@ -13,6 +13,9 @@
>  #include <vdso/processor.h>
>  
>  #include <asm/ptrace.h>
> +#include <asm/insn-def.h>
> +#include <asm/alternative-macros.h>
> +#include <asm/hwcap.h>
>  
>  #ifdef CONFIG_64BIT
>  #define DEFAULT_MAP_WINDOW	(UL(1) << (MMAP_VA_BITS - 1))
> @@ -106,6 +109,19 @@ static inline void arch_thread_struct_whitelist(unsigned long *offset,
>  #define KSTK_EIP(tsk)		(task_pt_regs(tsk)->epc)
>  #define KSTK_ESP(tsk)		(task_pt_regs(tsk)->sp)
>  
> +#ifdef CONFIG_RISCV_ISA_ZICBOP
> +#define ARCH_HAS_PREFETCHW
> +
> +#define PREFETCHW_ASM(x)						\
> +	ALTERNATIVE(__nops(1), CBO_PREFETCH_W(x, 0), 0,			\
> +		    RISCV_ISA_EXT_ZICBOP, CONFIG_RISCV_ISA_ZICBOP)
> +
> +
> +static inline void prefetchw(const void *x)
> +{
> +	__asm__ __volatile__(PREFETCHW_ASM(%0) : : "r" (x) : "memory");
> +}

Shouldn't we create an interface which exposes the offset input of
the instruction, allowing a sequence of calls to be unrolled? But
I guess that could be put off until there's a need for it.

> +#endif /* CONFIG_RISCV_ISA_ZICBOP */
>  
>  /* Do necessary setup to start up a newly executed thread. */
>  extern void start_thread(struct pt_regs *regs,
> -- 
> 2.40.1
> 

Thanks,
drew
Guo Ren Jan. 3, 2024, 6:19 a.m. UTC | #3
On Tue, Jan 2, 2024 at 6:45 PM Andrew Jones <ajones@ventanamicro.com> wrote:
>
>
> s/Zibop/Zicbop/ <<<$SUBJECT
okay

>
> On Sun, Dec 31, 2023 at 03:29:52AM -0500, guoren@kernel.org wrote:
> > From: Guo Ren <guoren@linux.alibaba.com>
> >
> > Enable Linux prefetchw primitive with Zibop cpufeature, which preloads
>
> Also s/Zibop/Zicbop/ here
okay, thx.

>
> > cache line into L1 cache for the next write operation.
> >
> > Signed-off-by: Guo Ren <guoren@linux.alibaba.com>
> > Signed-off-by: Guo Ren <guoren@kernel.org>
> > ---
> >  arch/riscv/include/asm/processor.h | 16 ++++++++++++++++
> >  1 file changed, 16 insertions(+)
> >
> > diff --git a/arch/riscv/include/asm/processor.h b/arch/riscv/include/asm/processor.h
> > index f19f861cda54..8d3a2ab37678 100644
> > --- a/arch/riscv/include/asm/processor.h
> > +++ b/arch/riscv/include/asm/processor.h
> > @@ -13,6 +13,9 @@
> >  #include <vdso/processor.h>
> >
> >  #include <asm/ptrace.h>
> > +#include <asm/insn-def.h>
> > +#include <asm/alternative-macros.h>
> > +#include <asm/hwcap.h>
> >
> >  #ifdef CONFIG_64BIT
> >  #define DEFAULT_MAP_WINDOW   (UL(1) << (MMAP_VA_BITS - 1))
> > @@ -106,6 +109,19 @@ static inline void arch_thread_struct_whitelist(unsigned long *offset,
> >  #define KSTK_EIP(tsk)                (task_pt_regs(tsk)->epc)
> >  #define KSTK_ESP(tsk)                (task_pt_regs(tsk)->sp)
> >
> > +#ifdef CONFIG_RISCV_ISA_ZICBOP
> > +#define ARCH_HAS_PREFETCHW
> > +
> > +#define PREFETCHW_ASM(x)                                             \
> > +     ALTERNATIVE(__nops(1), CBO_PREFETCH_W(x, 0), 0,                 \
> > +                 RISCV_ISA_EXT_ZICBOP, CONFIG_RISCV_ISA_ZICBOP)
> > +
> > +
> > +static inline void prefetchw(const void *x)
> > +{
> > +     __asm__ __volatile__(PREFETCHW_ASM(%0) : : "r" (x) : "memory");
> > +}
>
> Shouldn't we create an interface which exposes the offset input of
> the instruction, allowing a sequence of calls to be unrolled? But
> I guess that could be put off until there's a need for it.
I want to put it off until there's a user. Let's keep the whole
imm[11:0] zero for the current.

>
> > +#endif /* CONFIG_RISCV_ISA_ZICBOP */
> >
> >  /* Do necessary setup to start up a newly executed thread. */
> >  extern void start_thread(struct pt_regs *regs,
> > --
> > 2.40.1
> >
>
> Thanks,
> drew
Leonardo Bras Jan. 3, 2024, 7:04 p.m. UTC | #4
On Mon, Jan 01, 2024 at 10:29:21AM +0800, Guo Ren wrote:
> On Sun, Dec 31, 2023 at 4:30 PM <guoren@kernel.org> wrote:
> >
> > From: Guo Ren <guoren@linux.alibaba.com>
> >
> > Enable Linux prefetchw primitive with Zibop cpufeature, which preloads
> > cache line into L1 cache for the next write operation.
> >
> > Signed-off-by: Guo Ren <guoren@linux.alibaba.com>
> > Signed-off-by: Guo Ren <guoren@kernel.org>
> > ---
> >  arch/riscv/include/asm/processor.h | 16 ++++++++++++++++
> >  1 file changed, 16 insertions(+)
> >
> > diff --git a/arch/riscv/include/asm/processor.h b/arch/riscv/include/asm/processor.h
> > index f19f861cda54..8d3a2ab37678 100644
> > --- a/arch/riscv/include/asm/processor.h
> > +++ b/arch/riscv/include/asm/processor.h
> > @@ -13,6 +13,9 @@
> >  #include <vdso/processor.h>
> >
> >  #include <asm/ptrace.h>
> > +#include <asm/insn-def.h>
> > +#include <asm/alternative-macros.h>
> > +#include <asm/hwcap.h>
> >
> >  #ifdef CONFIG_64BIT
> >  #define DEFAULT_MAP_WINDOW     (UL(1) << (MMAP_VA_BITS - 1))
> > @@ -106,6 +109,19 @@ static inline void arch_thread_struct_whitelist(unsigned long *offset,
> >  #define KSTK_EIP(tsk)          (task_pt_regs(tsk)->epc)
> >  #define KSTK_ESP(tsk)          (task_pt_regs(tsk)->sp)
> >
> > +#ifdef CONFIG_RISCV_ISA_ZICBOP
> > +#define ARCH_HAS_PREFETCHW
> > +
> > +#define PREFETCHW_ASM(x)                                               \
> > +       ALTERNATIVE(__nops(1), CBO_PREFETCH_W(x, 0), 0,                 \
> > +                   RISCV_ISA_EXT_ZICBOP, CONFIG_RISCV_ISA_ZICBOP)
> The PREFETCHW_ASM(x) definition should be out of "ifdef
> CONFIG_RISCV_ISA_ZICBOP... #endif", because xchg_small may use this
> macro without CONFIG_RISCV_ISA_ZICBOP.
> 

Agree :)

> > +
> > +
> > +static inline void prefetchw(const void *x)
> > +{
> > +       __asm__ __volatile__(PREFETCHW_ASM(%0) : : "r" (x) : "memory");
> > +}
> > +#endif /* CONFIG_RISCV_ISA_ZICBOP */
> >
> >  /* Do necessary setup to start up a newly executed thread. */
> >  extern void start_thread(struct pt_regs *regs,
> > --
> > 2.40.1
> >
> 
> 
> -- 
> Best Regards
>  Guo Ren
>
Andrew Jones Jan. 3, 2024, 7:56 p.m. UTC | #5
On Wed, Jan 03, 2024 at 02:19:49PM +0800, Guo Ren wrote:
> On Tue, Jan 2, 2024 at 6:45 PM Andrew Jones <ajones@ventanamicro.com> wrote:
> >
> >
> > s/Zibop/Zicbop/ <<<$SUBJECT
> okay
> 
> >
> > On Sun, Dec 31, 2023 at 03:29:52AM -0500, guoren@kernel.org wrote:
> > > From: Guo Ren <guoren@linux.alibaba.com>
> > >
> > > Enable Linux prefetchw primitive with Zibop cpufeature, which preloads
> >
> > Also s/Zibop/Zicbop/ here
> okay, thx.
> 
> >
> > > cache line into L1 cache for the next write operation.
> > >
> > > Signed-off-by: Guo Ren <guoren@linux.alibaba.com>
> > > Signed-off-by: Guo Ren <guoren@kernel.org>
> > > ---
> > >  arch/riscv/include/asm/processor.h | 16 ++++++++++++++++
> > >  1 file changed, 16 insertions(+)
> > >
> > > diff --git a/arch/riscv/include/asm/processor.h b/arch/riscv/include/asm/processor.h
> > > index f19f861cda54..8d3a2ab37678 100644
> > > --- a/arch/riscv/include/asm/processor.h
> > > +++ b/arch/riscv/include/asm/processor.h
> > > @@ -13,6 +13,9 @@
> > >  #include <vdso/processor.h>
> > >
> > >  #include <asm/ptrace.h>
> > > +#include <asm/insn-def.h>
> > > +#include <asm/alternative-macros.h>
> > > +#include <asm/hwcap.h>
> > >
> > >  #ifdef CONFIG_64BIT
> > >  #define DEFAULT_MAP_WINDOW   (UL(1) << (MMAP_VA_BITS - 1))
> > > @@ -106,6 +109,19 @@ static inline void arch_thread_struct_whitelist(unsigned long *offset,
> > >  #define KSTK_EIP(tsk)                (task_pt_regs(tsk)->epc)
> > >  #define KSTK_ESP(tsk)                (task_pt_regs(tsk)->sp)
> > >
> > > +#ifdef CONFIG_RISCV_ISA_ZICBOP
> > > +#define ARCH_HAS_PREFETCHW
> > > +
> > > +#define PREFETCHW_ASM(x)                                             \
> > > +     ALTERNATIVE(__nops(1), CBO_PREFETCH_W(x, 0), 0,                 \
> > > +                 RISCV_ISA_EXT_ZICBOP, CONFIG_RISCV_ISA_ZICBOP)
> > > +
> > > +
> > > +static inline void prefetchw(const void *x)
> > > +{
> > > +     __asm__ __volatile__(PREFETCHW_ASM(%0) : : "r" (x) : "memory");
> > > +}
> >
> > Shouldn't we create an interface which exposes the offset input of
> > the instruction, allowing a sequence of calls to be unrolled? But
> > I guess that could be put off until there's a need for it.
> I want to put it off until there's a user. Let's keep the whole
> imm[11:0] zero for the current.

Yeah, my suggestion didn't make sense in this context anyway since we need
to match the interface in linux/prefetch.h. Considering linux/prefetch.h,
is there some reason we don't also add prefetch() at the same time?

Thanks,
drew

> 
> >
> > > +#endif /* CONFIG_RISCV_ISA_ZICBOP */
> > >
> > >  /* Do necessary setup to start up a newly executed thread. */
> > >  extern void start_thread(struct pt_regs *regs,
> > > --
> > > 2.40.1
> > >
> >
> > Thanks,
> > drew
> 
> 
> 
> -- 
> Best Regards
>  Guo Ren
Andrew Jones Jan. 5, 2024, 1:31 p.m. UTC | #6
On Tue, Jan 02, 2024 at 11:45:08AM +0100, Andrew Jones wrote:
> 
> s/Zibop/Zicbop/ <<<$SUBJECT
> 
> On Sun, Dec 31, 2023 at 03:29:52AM -0500, guoren@kernel.org wrote:
> > From: Guo Ren <guoren@linux.alibaba.com>
> > 
> > Enable Linux prefetchw primitive with Zibop cpufeature, which preloads
> 
> Also s/Zibop/Zicbop/ here
> 
> > cache line into L1 cache for the next write operation.
> > 
> > Signed-off-by: Guo Ren <guoren@linux.alibaba.com>
> > Signed-off-by: Guo Ren <guoren@kernel.org>
> > ---
> >  arch/riscv/include/asm/processor.h | 16 ++++++++++++++++
> >  1 file changed, 16 insertions(+)
> > 
> > diff --git a/arch/riscv/include/asm/processor.h b/arch/riscv/include/asm/processor.h
> > index f19f861cda54..8d3a2ab37678 100644
> > --- a/arch/riscv/include/asm/processor.h
> > +++ b/arch/riscv/include/asm/processor.h
> > @@ -13,6 +13,9 @@
> >  #include <vdso/processor.h>
> >  
> >  #include <asm/ptrace.h>
> > +#include <asm/insn-def.h>
> > +#include <asm/alternative-macros.h>
> > +#include <asm/hwcap.h>
> >  
> >  #ifdef CONFIG_64BIT
> >  #define DEFAULT_MAP_WINDOW	(UL(1) << (MMAP_VA_BITS - 1))
> > @@ -106,6 +109,19 @@ static inline void arch_thread_struct_whitelist(unsigned long *offset,
> >  #define KSTK_EIP(tsk)		(task_pt_regs(tsk)->epc)
> >  #define KSTK_ESP(tsk)		(task_pt_regs(tsk)->sp)
> >  
> > +#ifdef CONFIG_RISCV_ISA_ZICBOP
> > +#define ARCH_HAS_PREFETCHW
> > +
> > +#define PREFETCHW_ASM(x)						\
> > +	ALTERNATIVE(__nops(1), CBO_PREFETCH_W(x, 0), 0,			\
> > +		    RISCV_ISA_EXT_ZICBOP, CONFIG_RISCV_ISA_ZICBOP)
> > +
> > +
> > +static inline void prefetchw(const void *x)
> > +{
> > +	__asm__ __volatile__(PREFETCHW_ASM(%0) : : "r" (x) : "memory");
> > +}
> 
> Shouldn't we create an interface which exposes the offset input of
> the instruction, allowing a sequence of calls to be unrolled? But
> I guess that could be put off until there's a need for it.

If we did expose offset, then, because it must be constant and also must
only have bits 5-11 set, then we could add a static assert. Something like

 #define prefetchw_offset(base, offset) \
 ({ \
     static_assert(__builtin_constant_p(offset) && !(offset & ~GENMASK(11, 5))); \
     __asm__ __volatile__(PREFETCHW_ASM(%0, %1) : : "r" (x), "I" (offset) : "memory"); \
 })

Probably overkill though...

Thanks,
drew
diff mbox series

Patch

diff --git a/arch/riscv/include/asm/processor.h b/arch/riscv/include/asm/processor.h
index f19f861cda54..8d3a2ab37678 100644
--- a/arch/riscv/include/asm/processor.h
+++ b/arch/riscv/include/asm/processor.h
@@ -13,6 +13,9 @@ 
 #include <vdso/processor.h>
 
 #include <asm/ptrace.h>
+#include <asm/insn-def.h>
+#include <asm/alternative-macros.h>
+#include <asm/hwcap.h>
 
 #ifdef CONFIG_64BIT
 #define DEFAULT_MAP_WINDOW	(UL(1) << (MMAP_VA_BITS - 1))
@@ -106,6 +109,19 @@  static inline void arch_thread_struct_whitelist(unsigned long *offset,
 #define KSTK_EIP(tsk)		(task_pt_regs(tsk)->epc)
 #define KSTK_ESP(tsk)		(task_pt_regs(tsk)->sp)
 
+#ifdef CONFIG_RISCV_ISA_ZICBOP
+#define ARCH_HAS_PREFETCHW
+
+#define PREFETCHW_ASM(x)						\
+	ALTERNATIVE(__nops(1), CBO_PREFETCH_W(x, 0), 0,			\
+		    RISCV_ISA_EXT_ZICBOP, CONFIG_RISCV_ISA_ZICBOP)
+
+
+static inline void prefetchw(const void *x)
+{
+	__asm__ __volatile__(PREFETCHW_ASM(%0) : : "r" (x) : "memory");
+}
+#endif /* CONFIG_RISCV_ISA_ZICBOP */
 
 /* Do necessary setup to start up a newly executed thread. */
 extern void start_thread(struct pt_regs *regs,