diff mbox series

[V11,09/17] riscv: qspinlock: errata: Add ERRATA_THEAD_WRITE_ONCE fixup

Message ID 20230910082911.3378782-10-guoren@kernel.org (mailing list archive)
State Changes Requested
Headers show
Series riscv: Add Native/Paravirt qspinlock support | expand

Checks

Context Check Description
conchuod/tree_selection fail Failed to apply to next/pending-fixes, riscv/for-next or riscv/master

Commit Message

Guo Ren Sept. 10, 2023, 8:29 a.m. UTC
From: Guo Ren <guoren@linux.alibaba.com>

The early version of T-Head C9xx cores has a store merge buffer
delay problem. The store merge buffer could improve the store queue
performance by merging multi-store requests, but when there are not
continued store requests, the prior single store request would be
waiting in the store queue for a long time. That would cause
significant problems for communication between multi-cores. This
problem was found on sg2042 & th1520 platforms with the qspinlock
lock torture test.

So appending a fence w.o could immediately flush the store merge
buffer and let other cores see the write result.

This will apply the WRITE_ONCE errata to handle the non-standard
behavior via appending a fence w.o instruction for WRITE_ONCE().

Signed-off-by: Guo Ren <guoren@linux.alibaba.com>
Signed-off-by: Guo Ren <guoren@kernel.org>
---
 arch/riscv/Kconfig.errata              | 19 +++++++++++++++++++
 arch/riscv/errata/thead/errata.c       | 20 ++++++++++++++++++++
 arch/riscv/include/asm/errata_list.h   | 13 -------------
 arch/riscv/include/asm/rwonce.h        | 24 ++++++++++++++++++++++++
 arch/riscv/include/asm/vendorid_list.h | 14 ++++++++++++++
 include/asm-generic/rwonce.h           |  2 ++
 6 files changed, 79 insertions(+), 13 deletions(-)
 create mode 100644 arch/riscv/include/asm/rwonce.h

Comments

Leonardo Bras Sept. 14, 2023, 8:32 a.m. UTC | #1
On Sun, Sep 10, 2023 at 04:29:03AM -0400, guoren@kernel.org wrote:
> From: Guo Ren <guoren@linux.alibaba.com>
> 
> The early version of T-Head C9xx cores has a store merge buffer
> delay problem. The store merge buffer could improve the store queue
> performance by merging multi-store requests, but when there are not
> continued store requests, the prior single store request would be
> waiting in the store queue for a long time. That would cause
> significant problems for communication between multi-cores. This
> problem was found on sg2042 & th1520 platforms with the qspinlock
> lock torture test.
> 
> So appending a fence w.o could immediately flush the store merge
> buffer and let other cores see the write result.
> 
> This will apply the WRITE_ONCE errata to handle the non-standard
> behavior via appending a fence w.o instruction for WRITE_ONCE().
> 
> Signed-off-by: Guo Ren <guoren@linux.alibaba.com>
> Signed-off-by: Guo Ren <guoren@kernel.org>
> ---
>  arch/riscv/Kconfig.errata              | 19 +++++++++++++++++++
>  arch/riscv/errata/thead/errata.c       | 20 ++++++++++++++++++++
>  arch/riscv/include/asm/errata_list.h   | 13 -------------
>  arch/riscv/include/asm/rwonce.h        | 24 ++++++++++++++++++++++++
>  arch/riscv/include/asm/vendorid_list.h | 14 ++++++++++++++
>  include/asm-generic/rwonce.h           |  2 ++
>  6 files changed, 79 insertions(+), 13 deletions(-)
>  create mode 100644 arch/riscv/include/asm/rwonce.h
> 
> diff --git a/arch/riscv/Kconfig.errata b/arch/riscv/Kconfig.errata
> index 1aa85a427ff3..c919cc3f1a3a 100644
> --- a/arch/riscv/Kconfig.errata
> +++ b/arch/riscv/Kconfig.errata
> @@ -77,4 +77,23 @@ config ERRATA_THEAD_PMU
>  
>  	  If you don't know what to do here, say "Y".
>  
> +config ERRATA_THEAD_WRITE_ONCE
> +	bool "Apply T-Head WRITE_ONCE errata"
> +	depends on ERRATA_THEAD
> +	default y
> +	help
> +	  The early version of T-Head C9xx cores has a store merge buffer
> +	  delay problem. The store merge buffer could improve the store queue
> +	  performance by merging multi-store requests, but when there are no
> +	  continued store requests, the prior single store request would be
> +	  waiting in the store queue for a long time. That would cause
> +	  significant problems for communication between multi-cores. Appending
> +	  a fence w.o could immediately flush the store merge buffer and let
> +	  other cores see the write result.
> +
> +	  This will apply the WRITE_ONCE errata to handle the non-standard
> +	  behavior via appending a fence w.o instruction for WRITE_ONCE().
> +
> +	  If you don't know what to do here, say "Y".
> +
>  endmenu # "CPU errata selection"
> diff --git a/arch/riscv/errata/thead/errata.c b/arch/riscv/errata/thead/errata.c
> index be84b14f0118..751eb5a7f614 100644
> --- a/arch/riscv/errata/thead/errata.c
> +++ b/arch/riscv/errata/thead/errata.c
> @@ -69,6 +69,23 @@ static bool errata_probe_pmu(unsigned int stage,
>  	return true;
>  }
>  
> +static bool errata_probe_write_once(unsigned int stage,
> +				    unsigned long arch_id, unsigned long impid)
> +{
> +	if (!IS_ENABLED(CONFIG_ERRATA_THEAD_WRITE_ONCE))
> +		return false;
> +
> +	/* target-c9xx cores report arch_id and impid as 0 */
> +	if (arch_id != 0 || impid != 0)
> +		return false;
> +
> +	if (stage == RISCV_ALTERNATIVES_BOOT ||
> +	    stage == RISCV_ALTERNATIVES_MODULE)
> +		return true;
> +
> +	return false;
> +}
> +
>  static u32 thead_errata_probe(unsigned int stage,
>  			      unsigned long archid, unsigned long impid)
>  {
> @@ -83,6 +100,9 @@ static u32 thead_errata_probe(unsigned int stage,
>  	if (errata_probe_pmu(stage, archid, impid))
>  		cpu_req_errata |= BIT(ERRATA_THEAD_PMU);
>  
> +	if (errata_probe_write_once(stage, archid, impid))
> +		cpu_req_errata |= BIT(ERRATA_THEAD_WRITE_ONCE);
> +
>  	return cpu_req_errata;
>  }
>  
> diff --git a/arch/riscv/include/asm/errata_list.h b/arch/riscv/include/asm/errata_list.h
> index 712cab7adffe..fbb2b8d39321 100644
> --- a/arch/riscv/include/asm/errata_list.h
> +++ b/arch/riscv/include/asm/errata_list.h
> @@ -11,19 +11,6 @@
>  #include <asm/hwcap.h>
>  #include <asm/vendorid_list.h>
>  
> -#ifdef CONFIG_ERRATA_SIFIVE
> -#define	ERRATA_SIFIVE_CIP_453 0
> -#define	ERRATA_SIFIVE_CIP_1200 1
> -#define	ERRATA_SIFIVE_NUMBER 2
> -#endif
> -
> -#ifdef CONFIG_ERRATA_THEAD
> -#define	ERRATA_THEAD_PBMT 0
> -#define	ERRATA_THEAD_CMO 1
> -#define	ERRATA_THEAD_PMU 2
> -#define	ERRATA_THEAD_NUMBER 3
> -#endif
> -

Here I understand you are moving stuff from errata_list.h to 
vendorid_list.h. Wouldn't it be better to do this on a separated patch 
before this one?

I understand this is used here, but it looks like it's unrelated.

>  #ifdef __ASSEMBLY__
>  
>  #define ALT_INSN_FAULT(x)						\
> diff --git a/arch/riscv/include/asm/rwonce.h b/arch/riscv/include/asm/rwonce.h
> new file mode 100644
> index 000000000000..be0b8864969d
> --- /dev/null
> +++ b/arch/riscv/include/asm/rwonce.h
> @@ -0,0 +1,24 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +
> +#ifndef __ASM_RWONCE_H
> +#define __ASM_RWONCE_H
> +
> +#include <linux/compiler_types.h>
> +#include <asm/alternative-macros.h>
> +#include <asm/vendorid_list.h>
> +
> +#define __WRITE_ONCE(x, val)				\
> +do {							\
> +	*(volatile typeof(x) *)&(x) = (val);		\
> +	asm volatile(ALTERNATIVE(			\
> +		__nops(1),				\
> +		"fence w, o\n\t",			\
> +		THEAD_VENDOR_ID,			\
> +		ERRATA_THEAD_WRITE_ONCE,		\
> +		CONFIG_ERRATA_THEAD_WRITE_ONCE)		\
> +		: : : "memory");			\
> +} while (0)
> +
> +#include <asm-generic/rwonce.h>
> +
> +#endif	/* __ASM_RWONCE_H */

IIUC the idea here is to have an alternative __WRITE_ONCE that replaces the 
asm-generic one.

Honestly, this asm alternative here seems too much information, and too 
cryptic. I mean, yeah in the patch it all makes sense, but I imagine myself
in the future looking at all this and trying to understand what is going 
on.

Wouldn't it look better to have something like:

#####

/* Some explanation like the one on Kconfig */

#define write_once_flush()			\
do {						\
	asm volatile(ALTERNATIVE(			\
		__nops(1),			\
		"fence w, o\n\t",		\
		THEAD_VENDOR_ID,		\
		ERRATA_THEAD_WRITE_ONCE,	\
		CONFIG_ERRATA_THEAD_WRITE_ONCE)	\
		: : : "memory");		\
} while(0)


#define __WRITE_ONCE(x, val)			\
do {						\
     	*(volatile typeof(x) *)&(x) = (val);	\
	write_once_flush();			\
} while(0)

#####

	
This way I could quickly see there is a flush after the writting of 
WRITE_ONCE(), and this flush is the above "complicated" asm.

What do you think?

> diff --git a/arch/riscv/include/asm/vendorid_list.h b/arch/riscv/include/asm/vendorid_list.h
> index cb89af3f0704..73078cfe4029 100644
> --- a/arch/riscv/include/asm/vendorid_list.h
> +++ b/arch/riscv/include/asm/vendorid_list.h
> @@ -8,4 +8,18 @@
>  #define SIFIVE_VENDOR_ID	0x489
>  #define THEAD_VENDOR_ID		0x5b7
>  
> +#ifdef CONFIG_ERRATA_SIFIVE
> +#define	ERRATA_SIFIVE_CIP_453 0
> +#define	ERRATA_SIFIVE_CIP_1200 1
> +#define	ERRATA_SIFIVE_NUMBER 2
> +#endif
> +
> +#ifdef CONFIG_ERRATA_THEAD
> +#define	ERRATA_THEAD_PBMT 0
> +#define	ERRATA_THEAD_CMO 1
> +#define	ERRATA_THEAD_PMU 2
> +#define	ERRATA_THEAD_WRITE_ONCE 3
> +#define	ERRATA_THEAD_NUMBER 4
> +#endif
> +
>  #endif
> diff --git a/include/asm-generic/rwonce.h b/include/asm-generic/rwonce.h
> index 8d0a6280e982..fb07fe8c6e45 100644
> --- a/include/asm-generic/rwonce.h
> +++ b/include/asm-generic/rwonce.h
> @@ -50,10 +50,12 @@
>  	__READ_ONCE(x);							\
>  })
>  
> +#ifndef __WRITE_ONCE
>  #define __WRITE_ONCE(x, val)						\
>  do {									\
>  	*(volatile typeof(x) *)&(x) = (val);				\
>  } while (0)
> +#endif
>  
>  #define WRITE_ONCE(x, val)						\
>  do {									\
> -- 
> 2.36.1
>
Guo Ren Sept. 17, 2023, 3:15 p.m. UTC | #2
On Thu, Sep 14, 2023 at 4:32 PM Leonardo Bras <leobras@redhat.com> wrote:
>
> On Sun, Sep 10, 2023 at 04:29:03AM -0400, guoren@kernel.org wrote:
> > From: Guo Ren <guoren@linux.alibaba.com>
> >
> > The early version of T-Head C9xx cores has a store merge buffer
> > delay problem. The store merge buffer could improve the store queue
> > performance by merging multi-store requests, but when there are not
> > continued store requests, the prior single store request would be
> > waiting in the store queue for a long time. That would cause
> > significant problems for communication between multi-cores. This
> > problem was found on sg2042 & th1520 platforms with the qspinlock
> > lock torture test.
> >
> > So appending a fence w.o could immediately flush the store merge
> > buffer and let other cores see the write result.
> >
> > This will apply the WRITE_ONCE errata to handle the non-standard
> > behavior via appending a fence w.o instruction for WRITE_ONCE().
> >
> > Signed-off-by: Guo Ren <guoren@linux.alibaba.com>
> > Signed-off-by: Guo Ren <guoren@kernel.org>
> > ---
> >  arch/riscv/Kconfig.errata              | 19 +++++++++++++++++++
> >  arch/riscv/errata/thead/errata.c       | 20 ++++++++++++++++++++
> >  arch/riscv/include/asm/errata_list.h   | 13 -------------
> >  arch/riscv/include/asm/rwonce.h        | 24 ++++++++++++++++++++++++
> >  arch/riscv/include/asm/vendorid_list.h | 14 ++++++++++++++
> >  include/asm-generic/rwonce.h           |  2 ++
> >  6 files changed, 79 insertions(+), 13 deletions(-)
> >  create mode 100644 arch/riscv/include/asm/rwonce.h
> >
> > diff --git a/arch/riscv/Kconfig.errata b/arch/riscv/Kconfig.errata
> > index 1aa85a427ff3..c919cc3f1a3a 100644
> > --- a/arch/riscv/Kconfig.errata
> > +++ b/arch/riscv/Kconfig.errata
> > @@ -77,4 +77,23 @@ config ERRATA_THEAD_PMU
> >
> >         If you don't know what to do here, say "Y".
> >
> > +config ERRATA_THEAD_WRITE_ONCE
> > +     bool "Apply T-Head WRITE_ONCE errata"
> > +     depends on ERRATA_THEAD
> > +     default y
> > +     help
> > +       The early version of T-Head C9xx cores has a store merge buffer
> > +       delay problem. The store merge buffer could improve the store queue
> > +       performance by merging multi-store requests, but when there are no
> > +       continued store requests, the prior single store request would be
> > +       waiting in the store queue for a long time. That would cause
> > +       significant problems for communication between multi-cores. Appending
> > +       a fence w.o could immediately flush the store merge buffer and let
> > +       other cores see the write result.
> > +
> > +       This will apply the WRITE_ONCE errata to handle the non-standard
> > +       behavior via appending a fence w.o instruction for WRITE_ONCE().
> > +
> > +       If you don't know what to do here, say "Y".
> > +
> >  endmenu # "CPU errata selection"
> > diff --git a/arch/riscv/errata/thead/errata.c b/arch/riscv/errata/thead/errata.c
> > index be84b14f0118..751eb5a7f614 100644
> > --- a/arch/riscv/errata/thead/errata.c
> > +++ b/arch/riscv/errata/thead/errata.c
> > @@ -69,6 +69,23 @@ static bool errata_probe_pmu(unsigned int stage,
> >       return true;
> >  }
> >
> > +static bool errata_probe_write_once(unsigned int stage,
> > +                                 unsigned long arch_id, unsigned long impid)
> > +{
> > +     if (!IS_ENABLED(CONFIG_ERRATA_THEAD_WRITE_ONCE))
> > +             return false;
> > +
> > +     /* target-c9xx cores report arch_id and impid as 0 */
> > +     if (arch_id != 0 || impid != 0)
> > +             return false;
> > +
> > +     if (stage == RISCV_ALTERNATIVES_BOOT ||
> > +         stage == RISCV_ALTERNATIVES_MODULE)
> > +             return true;
> > +
> > +     return false;
> > +}
> > +
> >  static u32 thead_errata_probe(unsigned int stage,
> >                             unsigned long archid, unsigned long impid)
> >  {
> > @@ -83,6 +100,9 @@ static u32 thead_errata_probe(unsigned int stage,
> >       if (errata_probe_pmu(stage, archid, impid))
> >               cpu_req_errata |= BIT(ERRATA_THEAD_PMU);
> >
> > +     if (errata_probe_write_once(stage, archid, impid))
> > +             cpu_req_errata |= BIT(ERRATA_THEAD_WRITE_ONCE);
> > +
> >       return cpu_req_errata;
> >  }
> >
> > diff --git a/arch/riscv/include/asm/errata_list.h b/arch/riscv/include/asm/errata_list.h
> > index 712cab7adffe..fbb2b8d39321 100644
> > --- a/arch/riscv/include/asm/errata_list.h
> > +++ b/arch/riscv/include/asm/errata_list.h
> > @@ -11,19 +11,6 @@
> >  #include <asm/hwcap.h>
> >  #include <asm/vendorid_list.h>
> >
> > -#ifdef CONFIG_ERRATA_SIFIVE
> > -#define      ERRATA_SIFIVE_CIP_453 0
> > -#define      ERRATA_SIFIVE_CIP_1200 1
> > -#define      ERRATA_SIFIVE_NUMBER 2
> > -#endif
> > -
> > -#ifdef CONFIG_ERRATA_THEAD
> > -#define      ERRATA_THEAD_PBMT 0
> > -#define      ERRATA_THEAD_CMO 1
> > -#define      ERRATA_THEAD_PMU 2
> > -#define      ERRATA_THEAD_NUMBER 3
> > -#endif
> > -
>
> Here I understand you are moving stuff from errata_list.h to
> vendorid_list.h. Wouldn't it be better to do this on a separated patch
> before this one?
Okay.

>
> I understand this is used here, but it looks like it's unrelated.
>
> >  #ifdef __ASSEMBLY__
> >
> >  #define ALT_INSN_FAULT(x)                                            \
> > diff --git a/arch/riscv/include/asm/rwonce.h b/arch/riscv/include/asm/rwonce.h
> > new file mode 100644
> > index 000000000000..be0b8864969d
> > --- /dev/null
> > +++ b/arch/riscv/include/asm/rwonce.h
> > @@ -0,0 +1,24 @@
> > +/* SPDX-License-Identifier: GPL-2.0 */
> > +
> > +#ifndef __ASM_RWONCE_H
> > +#define __ASM_RWONCE_H
> > +
> > +#include <linux/compiler_types.h>
> > +#include <asm/alternative-macros.h>
> > +#include <asm/vendorid_list.h>
> > +
> > +#define __WRITE_ONCE(x, val)                         \
> > +do {                                                 \
> > +     *(volatile typeof(x) *)&(x) = (val);            \
> > +     asm volatile(ALTERNATIVE(                       \
> > +             __nops(1),                              \
> > +             "fence w, o\n\t",                       \
> > +             THEAD_VENDOR_ID,                        \
> > +             ERRATA_THEAD_WRITE_ONCE,                \
> > +             CONFIG_ERRATA_THEAD_WRITE_ONCE)         \
> > +             : : : "memory");                        \
> > +} while (0)
> > +
> > +#include <asm-generic/rwonce.h>
> > +
> > +#endif       /* __ASM_RWONCE_H */
>
> IIUC the idea here is to have an alternative __WRITE_ONCE that replaces the
> asm-generic one.
>
> Honestly, this asm alternative here seems too much information, and too
> cryptic. I mean, yeah in the patch it all makes sense, but I imagine myself
> in the future looking at all this and trying to understand what is going
> on.
>
> Wouldn't it look better to have something like:
>
> #####
>
> /* Some explanation like the one on Kconfig */
>
> #define write_once_flush()                      \
> do {                                            \
>         asm volatile(ALTERNATIVE(                       \
>                 __nops(1),                      \
>                 "fence w, o\n\t",               \
>                 THEAD_VENDOR_ID,                \
>                 ERRATA_THEAD_WRITE_ONCE,        \
>                 CONFIG_ERRATA_THEAD_WRITE_ONCE) \
>                 : : : "memory");                \
> } while(0)
>
>
> #define __WRITE_ONCE(x, val)                    \
> do {                                            \
>         *(volatile typeof(x) *)&(x) = (val);    \
>         write_once_flush();                     \
> } while(0)
>
> #####
>
>
> This way I could quickly see there is a flush after the writting of
> WRITE_ONCE(), and this flush is the above "complicated" asm.
>
> What do you think?
Okay, good point, and I would take it.

>
> > diff --git a/arch/riscv/include/asm/vendorid_list.h b/arch/riscv/include/asm/vendorid_list.h
> > index cb89af3f0704..73078cfe4029 100644
> > --- a/arch/riscv/include/asm/vendorid_list.h
> > +++ b/arch/riscv/include/asm/vendorid_list.h
> > @@ -8,4 +8,18 @@
> >  #define SIFIVE_VENDOR_ID     0x489
> >  #define THEAD_VENDOR_ID              0x5b7
> >
> > +#ifdef CONFIG_ERRATA_SIFIVE
> > +#define      ERRATA_SIFIVE_CIP_453 0
> > +#define      ERRATA_SIFIVE_CIP_1200 1
> > +#define      ERRATA_SIFIVE_NUMBER 2
> > +#endif
> > +
> > +#ifdef CONFIG_ERRATA_THEAD
> > +#define      ERRATA_THEAD_PBMT 0
> > +#define      ERRATA_THEAD_CMO 1
> > +#define      ERRATA_THEAD_PMU 2
> > +#define      ERRATA_THEAD_WRITE_ONCE 3
> > +#define      ERRATA_THEAD_NUMBER 4
> > +#endif
> > +
> >  #endif
> > diff --git a/include/asm-generic/rwonce.h b/include/asm-generic/rwonce.h
> > index 8d0a6280e982..fb07fe8c6e45 100644
> > --- a/include/asm-generic/rwonce.h
> > +++ b/include/asm-generic/rwonce.h
> > @@ -50,10 +50,12 @@
> >       __READ_ONCE(x);                                                 \
> >  })
> >
> > +#ifndef __WRITE_ONCE
> >  #define __WRITE_ONCE(x, val)                                         \
> >  do {                                                                 \
> >       *(volatile typeof(x) *)&(x) = (val);                            \
> >  } while (0)
> > +#endif
> >
> >  #define WRITE_ONCE(x, val)                                           \
> >  do {                                                                 \
> > --
> > 2.36.1
> >
>
Leonardo Bras Sept. 19, 2023, 5:34 a.m. UTC | #3
On Sun, Sep 17, 2023 at 11:15:51PM +0800, Guo Ren wrote:
> On Thu, Sep 14, 2023 at 4:32 PM Leonardo Bras <leobras@redhat.com> wrote:
> >
> > On Sun, Sep 10, 2023 at 04:29:03AM -0400, guoren@kernel.org wrote:
> > > From: Guo Ren <guoren@linux.alibaba.com>
> > >
> > > The early version of T-Head C9xx cores has a store merge buffer
> > > delay problem. The store merge buffer could improve the store queue
> > > performance by merging multi-store requests, but when there are not
> > > continued store requests, the prior single store request would be
> > > waiting in the store queue for a long time. That would cause
> > > significant problems for communication between multi-cores. This
> > > problem was found on sg2042 & th1520 platforms with the qspinlock
> > > lock torture test.
> > >
> > > So appending a fence w.o could immediately flush the store merge
> > > buffer and let other cores see the write result.
> > >
> > > This will apply the WRITE_ONCE errata to handle the non-standard
> > > behavior via appending a fence w.o instruction for WRITE_ONCE().
> > >
> > > Signed-off-by: Guo Ren <guoren@linux.alibaba.com>
> > > Signed-off-by: Guo Ren <guoren@kernel.org>
> > > ---
> > >  arch/riscv/Kconfig.errata              | 19 +++++++++++++++++++
> > >  arch/riscv/errata/thead/errata.c       | 20 ++++++++++++++++++++
> > >  arch/riscv/include/asm/errata_list.h   | 13 -------------
> > >  arch/riscv/include/asm/rwonce.h        | 24 ++++++++++++++++++++++++
> > >  arch/riscv/include/asm/vendorid_list.h | 14 ++++++++++++++
> > >  include/asm-generic/rwonce.h           |  2 ++
> > >  6 files changed, 79 insertions(+), 13 deletions(-)
> > >  create mode 100644 arch/riscv/include/asm/rwonce.h
> > >
> > > diff --git a/arch/riscv/Kconfig.errata b/arch/riscv/Kconfig.errata
> > > index 1aa85a427ff3..c919cc3f1a3a 100644
> > > --- a/arch/riscv/Kconfig.errata
> > > +++ b/arch/riscv/Kconfig.errata
> > > @@ -77,4 +77,23 @@ config ERRATA_THEAD_PMU
> > >
> > >         If you don't know what to do here, say "Y".
> > >
> > > +config ERRATA_THEAD_WRITE_ONCE
> > > +     bool "Apply T-Head WRITE_ONCE errata"
> > > +     depends on ERRATA_THEAD
> > > +     default y
> > > +     help
> > > +       The early version of T-Head C9xx cores has a store merge buffer
> > > +       delay problem. The store merge buffer could improve the store queue
> > > +       performance by merging multi-store requests, but when there are no
> > > +       continued store requests, the prior single store request would be
> > > +       waiting in the store queue for a long time. That would cause
> > > +       significant problems for communication between multi-cores. Appending
> > > +       a fence w.o could immediately flush the store merge buffer and let
> > > +       other cores see the write result.
> > > +
> > > +       This will apply the WRITE_ONCE errata to handle the non-standard
> > > +       behavior via appending a fence w.o instruction for WRITE_ONCE().
> > > +
> > > +       If you don't know what to do here, say "Y".
> > > +
> > >  endmenu # "CPU errata selection"
> > > diff --git a/arch/riscv/errata/thead/errata.c b/arch/riscv/errata/thead/errata.c
> > > index be84b14f0118..751eb5a7f614 100644
> > > --- a/arch/riscv/errata/thead/errata.c
> > > +++ b/arch/riscv/errata/thead/errata.c
> > > @@ -69,6 +69,23 @@ static bool errata_probe_pmu(unsigned int stage,
> > >       return true;
> > >  }
> > >
> > > +static bool errata_probe_write_once(unsigned int stage,
> > > +                                 unsigned long arch_id, unsigned long impid)
> > > +{
> > > +     if (!IS_ENABLED(CONFIG_ERRATA_THEAD_WRITE_ONCE))
> > > +             return false;
> > > +
> > > +     /* target-c9xx cores report arch_id and impid as 0 */
> > > +     if (arch_id != 0 || impid != 0)
> > > +             return false;
> > > +
> > > +     if (stage == RISCV_ALTERNATIVES_BOOT ||
> > > +         stage == RISCV_ALTERNATIVES_MODULE)
> > > +             return true;
> > > +
> > > +     return false;
> > > +}
> > > +
> > >  static u32 thead_errata_probe(unsigned int stage,
> > >                             unsigned long archid, unsigned long impid)
> > >  {
> > > @@ -83,6 +100,9 @@ static u32 thead_errata_probe(unsigned int stage,
> > >       if (errata_probe_pmu(stage, archid, impid))
> > >               cpu_req_errata |= BIT(ERRATA_THEAD_PMU);
> > >
> > > +     if (errata_probe_write_once(stage, archid, impid))
> > > +             cpu_req_errata |= BIT(ERRATA_THEAD_WRITE_ONCE);
> > > +
> > >       return cpu_req_errata;
> > >  }
> > >
> > > diff --git a/arch/riscv/include/asm/errata_list.h b/arch/riscv/include/asm/errata_list.h
> > > index 712cab7adffe..fbb2b8d39321 100644
> > > --- a/arch/riscv/include/asm/errata_list.h
> > > +++ b/arch/riscv/include/asm/errata_list.h
> > > @@ -11,19 +11,6 @@
> > >  #include <asm/hwcap.h>
> > >  #include <asm/vendorid_list.h>
> > >
> > > -#ifdef CONFIG_ERRATA_SIFIVE
> > > -#define      ERRATA_SIFIVE_CIP_453 0
> > > -#define      ERRATA_SIFIVE_CIP_1200 1
> > > -#define      ERRATA_SIFIVE_NUMBER 2
> > > -#endif
> > > -
> > > -#ifdef CONFIG_ERRATA_THEAD
> > > -#define      ERRATA_THEAD_PBMT 0
> > > -#define      ERRATA_THEAD_CMO 1
> > > -#define      ERRATA_THEAD_PMU 2
> > > -#define      ERRATA_THEAD_NUMBER 3
> > > -#endif
> > > -
> >
> > Here I understand you are moving stuff from errata_list.h to
> > vendorid_list.h. Wouldn't it be better to do this on a separated patch
> > before this one?
> Okay.
> 
> >
> > I understand this is used here, but it looks like it's unrelated.
> >
> > >  #ifdef __ASSEMBLY__
> > >
> > >  #define ALT_INSN_FAULT(x)                                            \
> > > diff --git a/arch/riscv/include/asm/rwonce.h b/arch/riscv/include/asm/rwonce.h
> > > new file mode 100644
> > > index 000000000000..be0b8864969d
> > > --- /dev/null
> > > +++ b/arch/riscv/include/asm/rwonce.h
> > > @@ -0,0 +1,24 @@
> > > +/* SPDX-License-Identifier: GPL-2.0 */
> > > +
> > > +#ifndef __ASM_RWONCE_H
> > > +#define __ASM_RWONCE_H
> > > +
> > > +#include <linux/compiler_types.h>
> > > +#include <asm/alternative-macros.h>
> > > +#include <asm/vendorid_list.h>
> > > +
> > > +#define __WRITE_ONCE(x, val)                         \
> > > +do {                                                 \
> > > +     *(volatile typeof(x) *)&(x) = (val);            \
> > > +     asm volatile(ALTERNATIVE(                       \
> > > +             __nops(1),                              \
> > > +             "fence w, o\n\t",                       \
> > > +             THEAD_VENDOR_ID,                        \
> > > +             ERRATA_THEAD_WRITE_ONCE,                \
> > > +             CONFIG_ERRATA_THEAD_WRITE_ONCE)         \
> > > +             : : : "memory");                        \
> > > +} while (0)
> > > +
> > > +#include <asm-generic/rwonce.h>
> > > +
> > > +#endif       /* __ASM_RWONCE_H */
> >
> > IIUC the idea here is to have an alternative __WRITE_ONCE that replaces the
> > asm-generic one.
> >
> > Honestly, this asm alternative here seems too much information, and too
> > cryptic. I mean, yeah in the patch it all makes sense, but I imagine myself
> > in the future looking at all this and trying to understand what is going
> > on.
> >
> > Wouldn't it look better to have something like:
> >
> > #####
> >
> > /* Some explanation like the one on Kconfig */
> >
> > #define write_once_flush()                      \
> > do {                                            \
> >         asm volatile(ALTERNATIVE(                       \
> >                 __nops(1),                      \
> >                 "fence w, o\n\t",               \
> >                 THEAD_VENDOR_ID,                \
> >                 ERRATA_THEAD_WRITE_ONCE,        \
> >                 CONFIG_ERRATA_THEAD_WRITE_ONCE) \
> >                 : : : "memory");                \
> > } while(0)
> >
> >
> > #define __WRITE_ONCE(x, val)                    \
> > do {                                            \
> >         *(volatile typeof(x) *)&(x) = (val);    \
> >         write_once_flush();                     \
> > } while(0)
> >
> > #####
> >
> >
> > This way I could quickly see there is a flush after the writting of
> > WRITE_ONCE(), and this flush is the above "complicated" asm.
> >
> > What do you think?
> Okay, good point, and I would take it.

Thanks!

Once you take the above suggestions, please include in your next patch:

Reviewed-by: Leonardo Bras <leobras@redhat.com>


> 
> >
> > > diff --git a/arch/riscv/include/asm/vendorid_list.h b/arch/riscv/include/asm/vendorid_list.h
> > > index cb89af3f0704..73078cfe4029 100644
> > > --- a/arch/riscv/include/asm/vendorid_list.h
> > > +++ b/arch/riscv/include/asm/vendorid_list.h
> > > @@ -8,4 +8,18 @@
> > >  #define SIFIVE_VENDOR_ID     0x489
> > >  #define THEAD_VENDOR_ID              0x5b7
> > >
> > > +#ifdef CONFIG_ERRATA_SIFIVE
> > > +#define      ERRATA_SIFIVE_CIP_453 0
> > > +#define      ERRATA_SIFIVE_CIP_1200 1
> > > +#define      ERRATA_SIFIVE_NUMBER 2
> > > +#endif
> > > +
> > > +#ifdef CONFIG_ERRATA_THEAD
> > > +#define      ERRATA_THEAD_PBMT 0
> > > +#define      ERRATA_THEAD_CMO 1
> > > +#define      ERRATA_THEAD_PMU 2
> > > +#define      ERRATA_THEAD_WRITE_ONCE 3
> > > +#define      ERRATA_THEAD_NUMBER 4
> > > +#endif
> > > +
> > >  #endif
> > > diff --git a/include/asm-generic/rwonce.h b/include/asm-generic/rwonce.h
> > > index 8d0a6280e982..fb07fe8c6e45 100644
> > > --- a/include/asm-generic/rwonce.h
> > > +++ b/include/asm-generic/rwonce.h
> > > @@ -50,10 +50,12 @@
> > >       __READ_ONCE(x);                                                 \
> > >  })
> > >
> > > +#ifndef __WRITE_ONCE
> > >  #define __WRITE_ONCE(x, val)                                         \
> > >  do {                                                                 \
> > >       *(volatile typeof(x) *)&(x) = (val);                            \
> > >  } while (0)
> > > +#endif
> > >
> > >  #define WRITE_ONCE(x, val)                                           \
> > >  do {                                                                 \
> > > --
> > > 2.36.1
> > >
> >
> 
> 
> -- 
> Best Regards
>  Guo Ren
>
diff mbox series

Patch

diff --git a/arch/riscv/Kconfig.errata b/arch/riscv/Kconfig.errata
index 1aa85a427ff3..c919cc3f1a3a 100644
--- a/arch/riscv/Kconfig.errata
+++ b/arch/riscv/Kconfig.errata
@@ -77,4 +77,23 @@  config ERRATA_THEAD_PMU
 
 	  If you don't know what to do here, say "Y".
 
+config ERRATA_THEAD_WRITE_ONCE
+	bool "Apply T-Head WRITE_ONCE errata"
+	depends on ERRATA_THEAD
+	default y
+	help
+	  The early version of T-Head C9xx cores has a store merge buffer
+	  delay problem. The store merge buffer could improve the store queue
+	  performance by merging multi-store requests, but when there are no
+	  continued store requests, the prior single store request would be
+	  waiting in the store queue for a long time. That would cause
+	  significant problems for communication between multi-cores. Appending
+	  a fence w.o could immediately flush the store merge buffer and let
+	  other cores see the write result.
+
+	  This will apply the WRITE_ONCE errata to handle the non-standard
+	  behavior via appending a fence w.o instruction for WRITE_ONCE().
+
+	  If you don't know what to do here, say "Y".
+
 endmenu # "CPU errata selection"
diff --git a/arch/riscv/errata/thead/errata.c b/arch/riscv/errata/thead/errata.c
index be84b14f0118..751eb5a7f614 100644
--- a/arch/riscv/errata/thead/errata.c
+++ b/arch/riscv/errata/thead/errata.c
@@ -69,6 +69,23 @@  static bool errata_probe_pmu(unsigned int stage,
 	return true;
 }
 
+static bool errata_probe_write_once(unsigned int stage,
+				    unsigned long arch_id, unsigned long impid)
+{
+	if (!IS_ENABLED(CONFIG_ERRATA_THEAD_WRITE_ONCE))
+		return false;
+
+	/* target-c9xx cores report arch_id and impid as 0 */
+	if (arch_id != 0 || impid != 0)
+		return false;
+
+	if (stage == RISCV_ALTERNATIVES_BOOT ||
+	    stage == RISCV_ALTERNATIVES_MODULE)
+		return true;
+
+	return false;
+}
+
 static u32 thead_errata_probe(unsigned int stage,
 			      unsigned long archid, unsigned long impid)
 {
@@ -83,6 +100,9 @@  static u32 thead_errata_probe(unsigned int stage,
 	if (errata_probe_pmu(stage, archid, impid))
 		cpu_req_errata |= BIT(ERRATA_THEAD_PMU);
 
+	if (errata_probe_write_once(stage, archid, impid))
+		cpu_req_errata |= BIT(ERRATA_THEAD_WRITE_ONCE);
+
 	return cpu_req_errata;
 }
 
diff --git a/arch/riscv/include/asm/errata_list.h b/arch/riscv/include/asm/errata_list.h
index 712cab7adffe..fbb2b8d39321 100644
--- a/arch/riscv/include/asm/errata_list.h
+++ b/arch/riscv/include/asm/errata_list.h
@@ -11,19 +11,6 @@ 
 #include <asm/hwcap.h>
 #include <asm/vendorid_list.h>
 
-#ifdef CONFIG_ERRATA_SIFIVE
-#define	ERRATA_SIFIVE_CIP_453 0
-#define	ERRATA_SIFIVE_CIP_1200 1
-#define	ERRATA_SIFIVE_NUMBER 2
-#endif
-
-#ifdef CONFIG_ERRATA_THEAD
-#define	ERRATA_THEAD_PBMT 0
-#define	ERRATA_THEAD_CMO 1
-#define	ERRATA_THEAD_PMU 2
-#define	ERRATA_THEAD_NUMBER 3
-#endif
-
 #ifdef __ASSEMBLY__
 
 #define ALT_INSN_FAULT(x)						\
diff --git a/arch/riscv/include/asm/rwonce.h b/arch/riscv/include/asm/rwonce.h
new file mode 100644
index 000000000000..be0b8864969d
--- /dev/null
+++ b/arch/riscv/include/asm/rwonce.h
@@ -0,0 +1,24 @@ 
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef __ASM_RWONCE_H
+#define __ASM_RWONCE_H
+
+#include <linux/compiler_types.h>
+#include <asm/alternative-macros.h>
+#include <asm/vendorid_list.h>
+
+#define __WRITE_ONCE(x, val)				\
+do {							\
+	*(volatile typeof(x) *)&(x) = (val);		\
+	asm volatile(ALTERNATIVE(			\
+		__nops(1),				\
+		"fence w, o\n\t",			\
+		THEAD_VENDOR_ID,			\
+		ERRATA_THEAD_WRITE_ONCE,		\
+		CONFIG_ERRATA_THEAD_WRITE_ONCE)		\
+		: : : "memory");			\
+} while (0)
+
+#include <asm-generic/rwonce.h>
+
+#endif	/* __ASM_RWONCE_H */
diff --git a/arch/riscv/include/asm/vendorid_list.h b/arch/riscv/include/asm/vendorid_list.h
index cb89af3f0704..73078cfe4029 100644
--- a/arch/riscv/include/asm/vendorid_list.h
+++ b/arch/riscv/include/asm/vendorid_list.h
@@ -8,4 +8,18 @@ 
 #define SIFIVE_VENDOR_ID	0x489
 #define THEAD_VENDOR_ID		0x5b7
 
+#ifdef CONFIG_ERRATA_SIFIVE
+#define	ERRATA_SIFIVE_CIP_453 0
+#define	ERRATA_SIFIVE_CIP_1200 1
+#define	ERRATA_SIFIVE_NUMBER 2
+#endif
+
+#ifdef CONFIG_ERRATA_THEAD
+#define	ERRATA_THEAD_PBMT 0
+#define	ERRATA_THEAD_CMO 1
+#define	ERRATA_THEAD_PMU 2
+#define	ERRATA_THEAD_WRITE_ONCE 3
+#define	ERRATA_THEAD_NUMBER 4
+#endif
+
 #endif
diff --git a/include/asm-generic/rwonce.h b/include/asm-generic/rwonce.h
index 8d0a6280e982..fb07fe8c6e45 100644
--- a/include/asm-generic/rwonce.h
+++ b/include/asm-generic/rwonce.h
@@ -50,10 +50,12 @@ 
 	__READ_ONCE(x);							\
 })
 
+#ifndef __WRITE_ONCE
 #define __WRITE_ONCE(x, val)						\
 do {									\
 	*(volatile typeof(x) *)&(x) = (val);				\
 } while (0)
+#endif
 
 #define WRITE_ONCE(x, val)						\
 do {									\