diff mbox series

[2/2] riscv: implement cache-management errata for T-Head SoCs

Message ID 20220307224620.1933061-3-heiko@sntech.de (mailing list archive)
State New, archived
Headers show
Series riscv: implement Zicbom-based CMO instructions + the t-head variant | expand

Commit Message

Heiko Stübner March 7, 2022, 10:46 p.m. UTC
The T-Head C906 and C910 implement a scheme for handling
cache operations different from the generic Zicbom extension.

Add an errata for it next to the generic dma coherency ops.

Signed-off-by: Heiko Stuebner <heiko@sntech.de>
---
 arch/riscv/Kconfig.erratas           | 10 +++++++
 arch/riscv/errata/thead/errata.c     |  5 ++++
 arch/riscv/include/asm/errata_list.h | 45 ++++++++++++++++++++++++++--
 3 files changed, 57 insertions(+), 3 deletions(-)

Comments

Palmer Dabbelt March 31, 2022, 2:30 a.m. UTC | #1
On Mon, 07 Mar 2022 14:46:20 PST (-0800), heiko@sntech.de wrote:
> The T-Head C906 and C910 implement a scheme for handling
> cache operations different from the generic Zicbom extension.
>
> Add an errata for it next to the generic dma coherency ops.
>
> Signed-off-by: Heiko Stuebner <heiko@sntech.de>
> ---
>  arch/riscv/Kconfig.erratas           | 10 +++++++
>  arch/riscv/errata/thead/errata.c     |  5 ++++
>  arch/riscv/include/asm/errata_list.h | 45 ++++++++++++++++++++++++++--
>  3 files changed, 57 insertions(+), 3 deletions(-)
>
> diff --git a/arch/riscv/Kconfig.erratas b/arch/riscv/Kconfig.erratas
> index de4002baa1d0..89a6dcb8ac2a 100644
> --- a/arch/riscv/Kconfig.erratas
> +++ b/arch/riscv/Kconfig.erratas
> @@ -50,4 +50,14 @@ config ERRATA_THEAD_PBMT
>
>  	  If you don't know what to do here, say "Y".
>
> +config ERRATA_THEAD_CMO
> +	bool "Apply T-Head cache management errata"
> +	depends on ERRATA_THEAD && RISCV_DMA_NONCOHERENT
> +	default y
> +	help
> +	  This will apply the cache management errata to handle the
> +	  non-standard handling on non-coherent operations on T-Head SoCs.
> +
> +	  If you don't know what to do here, say "Y".
> +
>  endmenu
> diff --git a/arch/riscv/errata/thead/errata.c b/arch/riscv/errata/thead/errata.c
> index fd8e0538a3f0..11c26c37425f 100644
> --- a/arch/riscv/errata/thead/errata.c
> +++ b/arch/riscv/errata/thead/errata.c
> @@ -33,6 +33,11 @@ static const struct errata_info errata_list[ERRATA_THEAD_NUMBER] = {
>  		.stage = RISCV_ALTERNATIVES_EARLY_BOOT,
>  		.check_func = errata_mt_check_func
>  	},
> +	{
> +		.name = "cache-management",
> +		.stage = RISCV_ALTERNATIVES_BOOT,
> +		.check_func = errata_mt_check_func
> +	},
>  };
>
>  static u32 thead_errata_probe(unsigned int stage, unsigned long archid, unsigned long impid)
> diff --git a/arch/riscv/include/asm/errata_list.h b/arch/riscv/include/asm/errata_list.h
> index 7a2dd61af24d..f7c6805daeab 100644
> --- a/arch/riscv/include/asm/errata_list.h
> +++ b/arch/riscv/include/asm/errata_list.h
> @@ -16,7 +16,8 @@
>
>  #ifdef CONFIG_ERRATA_THEAD
>  #define	ERRATA_THEAD_PBMT 0
> -#define	ERRATA_THEAD_NUMBER 1
> +#define	ERRATA_THEAD_CMO 1
> +#define	ERRATA_THEAD_NUMBER 2
>  #endif
>
>  #define	CPUFEATURE_SVPBMT 0
> @@ -104,8 +105,37 @@ asm volatile(ALTERNATIVE(								\
>  #define CBO_CLEAN_A0	".long 0x25200F"
>  #define CBO_FLUSH_A0	".long 0x05200F"
>
> +/*
> + * dcache.ipa rs1 (invalidate, physical address)
> + * | 31 - 25 | 24 - 20 | 19 - 15 | 14 - 12 | 11 - 7 | 6 - 0 |
> + *   0000001    01010      rs1       000      00000  0001011
> + * dache.iva rs1 (invalida, virtual address)
> + *   0000001    00110      rs1       000      00000  0001011
> + *
> + * dcache.cpa rs1 (clean, physical address)
> + * | 31 - 25 | 24 - 20 | 19 - 15 | 14 - 12 | 11 - 7 | 6 - 0 |
> + *   0000001    01001      rs1       000      00000  0001011
> + * dcache.cva rs1 (clean, virtual address)
> + *   0000001    00100      rs1       000      00000  0001011
> + *
> + * dcache.cipa rs1 (clean then invalidate, physical address)
> + * | 31 - 25 | 24 - 20 | 19 - 15 | 14 - 12 | 11 - 7 | 6 - 0 |
> + *   0000001    01011      rs1       000      00000  0001011
> + * dcache.civa rs1 (... virtual address)
> + *   0000001    00111      rs1       000      00000  0001011
> + *
> + * sync.s (make sure all cache operations finished)
> + * | 31 - 25 | 24 - 20 | 19 - 15 | 14 - 12 | 11 - 7 | 6 - 0 |
> + *   0000000    11001     00000      000      00000  0001011
> + */
> +#define THEAD_INVAL_A0	".long 0x0265000b"
> +#define THEAD_CLEAN_A0	".long 0x0245000b"
> +#define THEAD_FLUSH_A0	".long 0x0275000b"
> +#define THEAD_SYNC_S	".long 0x0190000b"

IIRC this came up before, but these really need to get into the 
assembler as actual instructions.

> +
>  #define ALT_CMO_OP(_op, _start, _size)							\
> -asm volatile(ALTERNATIVE(								\
> +asm volatile(ALTERNATIVE_2(								\
> +	"nop\n\t"									\
>  	"nop\n\t"									\
>  	"nop\n\t"									\
>  	"nop\n\t"									\
> @@ -117,7 +147,16 @@ asm volatile(ALTERNATIVE(								\
>  	CBO_##_op##_A0 "\n\t"								\
>  	"addi a0, a0, %0\n\t"								\
>  	"2:\n\t"									\
> -	"bltu a0, %2, 3b\n\t", 0, CPUFEATURE_CMO, CONFIG_RISCV_DMA_NONCOHERENT)		\
> +	"bltu a0, %2, 3b\n\t"								\
> +	"nop", 0, CPUFEATURE_CMO, CONFIG_RISCV_DMA_NONCOHERENT,				\
> +	"mv a0, %1\n\t"									\
> +	"j 2f\n\t"									\
> +	"3:\n\t"									\
> +	THEAD_##_op##_A0 "\n\t"								\
> +	"addi a0, a0, %0\n\t"								\
> +	"2:\n\t"									\
> +	"bltu a0, %2, 3b\n\t"								\
> +	THEAD_SYNC_S, THEAD_VENDOR_ID, ERRATA_THEAD_CMO, CONFIG_ERRATA_THEAD_CMO)	\
>  	: : "I"(L1_CACHE_BYTES), "r"((_start) & ~(L1_CACHE_BYTES - 1)),			\
>  	    "r"(ALIGN((_start) + (_size), L1_CACHE_BYTES)))
Heiko Stübner March 31, 2022, 8:22 a.m. UTC | #2
Hi Palmer,

Am Donnerstag, 31. März 2022, 04:30:36 CEST schrieb Palmer Dabbelt:
> On Mon, 07 Mar 2022 14:46:20 PST (-0800), heiko@sntech.de wrote:
> > The T-Head C906 and C910 implement a scheme for handling
> > cache operations different from the generic Zicbom extension.
> >
> > Add an errata for it next to the generic dma coherency ops.
> >
> > Signed-off-by: Heiko Stuebner <heiko@sntech.de>
> > ---
> >  arch/riscv/Kconfig.erratas           | 10 +++++++
> >  arch/riscv/errata/thead/errata.c     |  5 ++++
> >  arch/riscv/include/asm/errata_list.h | 45 ++++++++++++++++++++++++++--
> >  3 files changed, 57 insertions(+), 3 deletions(-)
> >
> > diff --git a/arch/riscv/Kconfig.erratas b/arch/riscv/Kconfig.erratas
> > index de4002baa1d0..89a6dcb8ac2a 100644
> > --- a/arch/riscv/Kconfig.erratas
> > +++ b/arch/riscv/Kconfig.erratas
> > @@ -50,4 +50,14 @@ config ERRATA_THEAD_PBMT
> >
> >  	  If you don't know what to do here, say "Y".
> >
> > +config ERRATA_THEAD_CMO
> > +	bool "Apply T-Head cache management errata"
> > +	depends on ERRATA_THEAD && RISCV_DMA_NONCOHERENT
> > +	default y
> > +	help
> > +	  This will apply the cache management errata to handle the
> > +	  non-standard handling on non-coherent operations on T-Head SoCs.
> > +
> > +	  If you don't know what to do here, say "Y".
> > +
> >  endmenu
> > diff --git a/arch/riscv/errata/thead/errata.c b/arch/riscv/errata/thead/errata.c
> > index fd8e0538a3f0..11c26c37425f 100644
> > --- a/arch/riscv/errata/thead/errata.c
> > +++ b/arch/riscv/errata/thead/errata.c
> > @@ -33,6 +33,11 @@ static const struct errata_info errata_list[ERRATA_THEAD_NUMBER] = {
> >  		.stage = RISCV_ALTERNATIVES_EARLY_BOOT,
> >  		.check_func = errata_mt_check_func
> >  	},
> > +	{
> > +		.name = "cache-management",
> > +		.stage = RISCV_ALTERNATIVES_BOOT,
> > +		.check_func = errata_mt_check_func
> > +	},
> >  };
> >
> >  static u32 thead_errata_probe(unsigned int stage, unsigned long archid, unsigned long impid)
> > diff --git a/arch/riscv/include/asm/errata_list.h b/arch/riscv/include/asm/errata_list.h
> > index 7a2dd61af24d..f7c6805daeab 100644
> > --- a/arch/riscv/include/asm/errata_list.h
> > +++ b/arch/riscv/include/asm/errata_list.h
> > @@ -16,7 +16,8 @@
> >
> >  #ifdef CONFIG_ERRATA_THEAD
> >  #define	ERRATA_THEAD_PBMT 0
> > -#define	ERRATA_THEAD_NUMBER 1
> > +#define	ERRATA_THEAD_CMO 1
> > +#define	ERRATA_THEAD_NUMBER 2
> >  #endif
> >
> >  #define	CPUFEATURE_SVPBMT 0
> > @@ -104,8 +105,37 @@ asm volatile(ALTERNATIVE(								\
> >  #define CBO_CLEAN_A0	".long 0x25200F"
> >  #define CBO_FLUSH_A0	".long 0x05200F"
> >
> > +/*
> > + * dcache.ipa rs1 (invalidate, physical address)
> > + * | 31 - 25 | 24 - 20 | 19 - 15 | 14 - 12 | 11 - 7 | 6 - 0 |
> > + *   0000001    01010      rs1       000      00000  0001011
> > + * dache.iva rs1 (invalida, virtual address)
> > + *   0000001    00110      rs1       000      00000  0001011
> > + *
> > + * dcache.cpa rs1 (clean, physical address)
> > + * | 31 - 25 | 24 - 20 | 19 - 15 | 14 - 12 | 11 - 7 | 6 - 0 |
> > + *   0000001    01001      rs1       000      00000  0001011
> > + * dcache.cva rs1 (clean, virtual address)
> > + *   0000001    00100      rs1       000      00000  0001011
> > + *
> > + * dcache.cipa rs1 (clean then invalidate, physical address)
> > + * | 31 - 25 | 24 - 20 | 19 - 15 | 14 - 12 | 11 - 7 | 6 - 0 |
> > + *   0000001    01011      rs1       000      00000  0001011
> > + * dcache.civa rs1 (... virtual address)
> > + *   0000001    00111      rs1       000      00000  0001011
> > + *
> > + * sync.s (make sure all cache operations finished)
> > + * | 31 - 25 | 24 - 20 | 19 - 15 | 14 - 12 | 11 - 7 | 6 - 0 |
> > + *   0000000    11001     00000      000      00000  0001011
> > + */
> > +#define THEAD_INVAL_A0	".long 0x0265000b"
> > +#define THEAD_CLEAN_A0	".long 0x0245000b"
> > +#define THEAD_FLUSH_A0	".long 0x0275000b"
> > +#define THEAD_SYNC_S	".long 0x0190000b"
> 
> IIRC this came up before, but these really need to get into the 
> assembler as actual instructions.

okay :-) .

But just for my understanding which of the two ways going forward:
- keep this in the waiting area _until_ a suitable binutils is released
- use the coded instructions now and convert later once binutils is released

The reason I ask is, that any chip with a t-head core like the Allwinner-D1
will need this for things like basic networking, so with the binutils
release schedule, I guess we'd be looking at autumn 2022 at the earliest.


Thanks
Heiko

> > +
> >  #define ALT_CMO_OP(_op, _start, _size)							\
> > -asm volatile(ALTERNATIVE(								\
> > +asm volatile(ALTERNATIVE_2(								\
> > +	"nop\n\t"									\
> >  	"nop\n\t"									\
> >  	"nop\n\t"									\
> >  	"nop\n\t"									\
> > @@ -117,7 +147,16 @@ asm volatile(ALTERNATIVE(								\
> >  	CBO_##_op##_A0 "\n\t"								\
> >  	"addi a0, a0, %0\n\t"								\
> >  	"2:\n\t"									\
> > -	"bltu a0, %2, 3b\n\t", 0, CPUFEATURE_CMO, CONFIG_RISCV_DMA_NONCOHERENT)		\
> > +	"bltu a0, %2, 3b\n\t"								\
> > +	"nop", 0, CPUFEATURE_CMO, CONFIG_RISCV_DMA_NONCOHERENT,				\
> > +	"mv a0, %1\n\t"									\
> > +	"j 2f\n\t"									\
> > +	"3:\n\t"									\
> > +	THEAD_##_op##_A0 "\n\t"								\
> > +	"addi a0, a0, %0\n\t"								\
> > +	"2:\n\t"									\
> > +	"bltu a0, %2, 3b\n\t"								\
> > +	THEAD_SYNC_S, THEAD_VENDOR_ID, ERRATA_THEAD_CMO, CONFIG_ERRATA_THEAD_CMO)	\
> >  	: : "I"(L1_CACHE_BYTES), "r"((_start) & ~(L1_CACHE_BYTES - 1)),			\
> >  	    "r"(ALIGN((_start) + (_size), L1_CACHE_BYTES)))
>
Philipp Tomsich March 31, 2022, 8:29 a.m. UTC | #3
Palmer,

Could you confirm that I correctly understood what you require: is it
that a patch is on the binutils list?

Philipp.


On Thu, 31 Mar 2022 at 10:22, Heiko Stübner <heiko@sntech.de> wrote:
>
> Hi Palmer,
>
> Am Donnerstag, 31. März 2022, 04:30:36 CEST schrieb Palmer Dabbelt:
> > On Mon, 07 Mar 2022 14:46:20 PST (-0800), heiko@sntech.de wrote:
> > > The T-Head C906 and C910 implement a scheme for handling
> > > cache operations different from the generic Zicbom extension.
> > >
> > > Add an errata for it next to the generic dma coherency ops.
> > >
> > > Signed-off-by: Heiko Stuebner <heiko@sntech.de>
> > > ---
> > >  arch/riscv/Kconfig.erratas           | 10 +++++++
> > >  arch/riscv/errata/thead/errata.c     |  5 ++++
> > >  arch/riscv/include/asm/errata_list.h | 45 ++++++++++++++++++++++++++--
> > >  3 files changed, 57 insertions(+), 3 deletions(-)
> > >
> > > diff --git a/arch/riscv/Kconfig.erratas b/arch/riscv/Kconfig.erratas
> > > index de4002baa1d0..89a6dcb8ac2a 100644
> > > --- a/arch/riscv/Kconfig.erratas
> > > +++ b/arch/riscv/Kconfig.erratas
> > > @@ -50,4 +50,14 @@ config ERRATA_THEAD_PBMT
> > >
> > >       If you don't know what to do here, say "Y".
> > >
> > > +config ERRATA_THEAD_CMO
> > > +   bool "Apply T-Head cache management errata"
> > > +   depends on ERRATA_THEAD && RISCV_DMA_NONCOHERENT
> > > +   default y
> > > +   help
> > > +     This will apply the cache management errata to handle the
> > > +     non-standard handling on non-coherent operations on T-Head SoCs.
> > > +
> > > +     If you don't know what to do here, say "Y".
> > > +
> > >  endmenu
> > > diff --git a/arch/riscv/errata/thead/errata.c b/arch/riscv/errata/thead/errata.c
> > > index fd8e0538a3f0..11c26c37425f 100644
> > > --- a/arch/riscv/errata/thead/errata.c
> > > +++ b/arch/riscv/errata/thead/errata.c
> > > @@ -33,6 +33,11 @@ static const struct errata_info errata_list[ERRATA_THEAD_NUMBER] = {
> > >             .stage = RISCV_ALTERNATIVES_EARLY_BOOT,
> > >             .check_func = errata_mt_check_func
> > >     },
> > > +   {
> > > +           .name = "cache-management",
> > > +           .stage = RISCV_ALTERNATIVES_BOOT,
> > > +           .check_func = errata_mt_check_func
> > > +   },
> > >  };
> > >
> > >  static u32 thead_errata_probe(unsigned int stage, unsigned long archid, unsigned long impid)
> > > diff --git a/arch/riscv/include/asm/errata_list.h b/arch/riscv/include/asm/errata_list.h
> > > index 7a2dd61af24d..f7c6805daeab 100644
> > > --- a/arch/riscv/include/asm/errata_list.h
> > > +++ b/arch/riscv/include/asm/errata_list.h
> > > @@ -16,7 +16,8 @@
> > >
> > >  #ifdef CONFIG_ERRATA_THEAD
> > >  #define    ERRATA_THEAD_PBMT 0
> > > -#define    ERRATA_THEAD_NUMBER 1
> > > +#define    ERRATA_THEAD_CMO 1
> > > +#define    ERRATA_THEAD_NUMBER 2
> > >  #endif
> > >
> > >  #define    CPUFEATURE_SVPBMT 0
> > > @@ -104,8 +105,37 @@ asm volatile(ALTERNATIVE(                                                              \
> > >  #define CBO_CLEAN_A0       ".long 0x25200F"
> > >  #define CBO_FLUSH_A0       ".long 0x05200F"
> > >
> > > +/*
> > > + * dcache.ipa rs1 (invalidate, physical address)
> > > + * | 31 - 25 | 24 - 20 | 19 - 15 | 14 - 12 | 11 - 7 | 6 - 0 |
> > > + *   0000001    01010      rs1       000      00000  0001011
> > > + * dache.iva rs1 (invalida, virtual address)
> > > + *   0000001    00110      rs1       000      00000  0001011
> > > + *
> > > + * dcache.cpa rs1 (clean, physical address)
> > > + * | 31 - 25 | 24 - 20 | 19 - 15 | 14 - 12 | 11 - 7 | 6 - 0 |
> > > + *   0000001    01001      rs1       000      00000  0001011
> > > + * dcache.cva rs1 (clean, virtual address)
> > > + *   0000001    00100      rs1       000      00000  0001011
> > > + *
> > > + * dcache.cipa rs1 (clean then invalidate, physical address)
> > > + * | 31 - 25 | 24 - 20 | 19 - 15 | 14 - 12 | 11 - 7 | 6 - 0 |
> > > + *   0000001    01011      rs1       000      00000  0001011
> > > + * dcache.civa rs1 (... virtual address)
> > > + *   0000001    00111      rs1       000      00000  0001011
> > > + *
> > > + * sync.s (make sure all cache operations finished)
> > > + * | 31 - 25 | 24 - 20 | 19 - 15 | 14 - 12 | 11 - 7 | 6 - 0 |
> > > + *   0000000    11001     00000      000      00000  0001011
> > > + */
> > > +#define THEAD_INVAL_A0     ".long 0x0265000b"
> > > +#define THEAD_CLEAN_A0     ".long 0x0245000b"
> > > +#define THEAD_FLUSH_A0     ".long 0x0275000b"
> > > +#define THEAD_SYNC_S       ".long 0x0190000b"
> >
> > IIRC this came up before, but these really need to get into the
> > assembler as actual instructions.
>
> okay :-) .
>
> But just for my understanding which of the two ways going forward:
> - keep this in the waiting area _until_ a suitable binutils is released
> - use the coded instructions now and convert later once binutils is released
>
> The reason I ask is, that any chip with a t-head core like the Allwinner-D1
> will need this for things like basic networking, so with the binutils
> release schedule, I guess we'd be looking at autumn 2022 at the earliest.
>
>
> Thanks
> Heiko
>
> > > +
> > >  #define ALT_CMO_OP(_op, _start, _size)                                                     \
> > > -asm volatile(ALTERNATIVE(                                                          \
> > > +asm volatile(ALTERNATIVE_2(                                                                \
> > > +   "nop\n\t"                                                                       \
> > >     "nop\n\t"                                                                       \
> > >     "nop\n\t"                                                                       \
> > >     "nop\n\t"                                                                       \
> > > @@ -117,7 +147,16 @@ asm volatile(ALTERNATIVE(                                                              \
> > >     CBO_##_op##_A0 "\n\t"                                                           \
> > >     "addi a0, a0, %0\n\t"                                                           \
> > >     "2:\n\t"                                                                        \
> > > -   "bltu a0, %2, 3b\n\t", 0, CPUFEATURE_CMO, CONFIG_RISCV_DMA_NONCOHERENT)         \
> > > +   "bltu a0, %2, 3b\n\t"                                                           \
> > > +   "nop", 0, CPUFEATURE_CMO, CONFIG_RISCV_DMA_NONCOHERENT,                         \
> > > +   "mv a0, %1\n\t"                                                                 \
> > > +   "j 2f\n\t"                                                                      \
> > > +   "3:\n\t"                                                                        \
> > > +   THEAD_##_op##_A0 "\n\t"                                                         \
> > > +   "addi a0, a0, %0\n\t"                                                           \
> > > +   "2:\n\t"                                                                        \
> > > +   "bltu a0, %2, 3b\n\t"                                                           \
> > > +   THEAD_SYNC_S, THEAD_VENDOR_ID, ERRATA_THEAD_CMO, CONFIG_ERRATA_THEAD_CMO)       \
> > >     : : "I"(L1_CACHE_BYTES), "r"((_start) & ~(L1_CACHE_BYTES - 1)),                 \
> > >         "r"(ALIGN((_start) + (_size), L1_CACHE_BYTES)))
> >
>
>
>
>
Samuel Holland April 1, 2022, 1:05 a.m. UTC | #4
On 3/7/22 4:46 PM, Heiko Stuebner wrote:
> The T-Head C906 and C910 implement a scheme for handling
> cache operations different from the generic Zicbom extension.
> 
> Add an errata for it next to the generic dma coherency ops.
> 
> Signed-off-by: Heiko Stuebner <heiko@sntech.de>

Tested-by: Samuel Holland <samuel@sholland.org>

With this option disabled, MMC and USB are broken on D1 boards:

[    3.021326] Waiting for root device /dev/mmcblk0p1...
[    3.219727] usb 4-1: new full-speed USB device number 2 using ohci-platform
[   18.703736] usb 4-1: device descriptor read/64, error -110

With the option enabled, MMC, USB, and Ethernet all work fine.

> ---
>  arch/riscv/Kconfig.erratas           | 10 +++++++
>  arch/riscv/errata/thead/errata.c     |  5 ++++
>  arch/riscv/include/asm/errata_list.h | 45 ++++++++++++++++++++++++++--
>  3 files changed, 57 insertions(+), 3 deletions(-)
> 
> diff --git a/arch/riscv/Kconfig.erratas b/arch/riscv/Kconfig.erratas
> index de4002baa1d0..89a6dcb8ac2a 100644
> --- a/arch/riscv/Kconfig.erratas
> +++ b/arch/riscv/Kconfig.erratas
> @@ -50,4 +50,14 @@ config ERRATA_THEAD_PBMT
>  
>  	  If you don't know what to do here, say "Y".
>  
> +config ERRATA_THEAD_CMO
> +	bool "Apply T-Head cache management errata"
> +	depends on ERRATA_THEAD && RISCV_DMA_NONCOHERENT
> +	default y
> +	help
> +	  This will apply the cache management errata to handle the
> +	  non-standard handling on non-coherent operations on T-Head SoCs.
> +
> +	  If you don't know what to do here, say "Y".
> +
>  endmenu
> diff --git a/arch/riscv/errata/thead/errata.c b/arch/riscv/errata/thead/errata.c
> index fd8e0538a3f0..11c26c37425f 100644
> --- a/arch/riscv/errata/thead/errata.c
> +++ b/arch/riscv/errata/thead/errata.c
> @@ -33,6 +33,11 @@ static const struct errata_info errata_list[ERRATA_THEAD_NUMBER] = {
>  		.stage = RISCV_ALTERNATIVES_EARLY_BOOT,
>  		.check_func = errata_mt_check_func
>  	},
> +	{
> +		.name = "cache-management",
> +		.stage = RISCV_ALTERNATIVES_BOOT,
> +		.check_func = errata_mt_check_func
> +	},
>  };
>  
>  static u32 thead_errata_probe(unsigned int stage, unsigned long archid, unsigned long impid)
> diff --git a/arch/riscv/include/asm/errata_list.h b/arch/riscv/include/asm/errata_list.h
> index 7a2dd61af24d..f7c6805daeab 100644
> --- a/arch/riscv/include/asm/errata_list.h
> +++ b/arch/riscv/include/asm/errata_list.h
> @@ -16,7 +16,8 @@
>  
>  #ifdef CONFIG_ERRATA_THEAD
>  #define	ERRATA_THEAD_PBMT 0
> -#define	ERRATA_THEAD_NUMBER 1
> +#define	ERRATA_THEAD_CMO 1
> +#define	ERRATA_THEAD_NUMBER 2
>  #endif
>  
>  #define	CPUFEATURE_SVPBMT 0
> @@ -104,8 +105,37 @@ asm volatile(ALTERNATIVE(								\
>  #define CBO_CLEAN_A0	".long 0x25200F"
>  #define CBO_FLUSH_A0	".long 0x05200F"
>  
> +/*
> + * dcache.ipa rs1 (invalidate, physical address)
> + * | 31 - 25 | 24 - 20 | 19 - 15 | 14 - 12 | 11 - 7 | 6 - 0 |
> + *   0000001    01010      rs1       000      00000  0001011
> + * dache.iva rs1 (invalida, virtual address)
> + *   0000001    00110      rs1       000      00000  0001011
> + *
> + * dcache.cpa rs1 (clean, physical address)
> + * | 31 - 25 | 24 - 20 | 19 - 15 | 14 - 12 | 11 - 7 | 6 - 0 |
> + *   0000001    01001      rs1       000      00000  0001011
> + * dcache.cva rs1 (clean, virtual address)
> + *   0000001    00100      rs1       000      00000  0001011
> + *
> + * dcache.cipa rs1 (clean then invalidate, physical address)
> + * | 31 - 25 | 24 - 20 | 19 - 15 | 14 - 12 | 11 - 7 | 6 - 0 |
> + *   0000001    01011      rs1       000      00000  0001011
> + * dcache.civa rs1 (... virtual address)
> + *   0000001    00111      rs1       000      00000  0001011
> + *
> + * sync.s (make sure all cache operations finished)
> + * | 31 - 25 | 24 - 20 | 19 - 15 | 14 - 12 | 11 - 7 | 6 - 0 |
> + *   0000000    11001     00000      000      00000  0001011
> + */
> +#define THEAD_INVAL_A0	".long 0x0265000b"
> +#define THEAD_CLEAN_A0	".long 0x0245000b"
> +#define THEAD_FLUSH_A0	".long 0x0275000b"
> +#define THEAD_SYNC_S	".long 0x0190000b"
> +
>  #define ALT_CMO_OP(_op, _start, _size)							\
> -asm volatile(ALTERNATIVE(								\
> +asm volatile(ALTERNATIVE_2(								\
> +	"nop\n\t"									\
>  	"nop\n\t"									\
>  	"nop\n\t"									\
>  	"nop\n\t"									\
> @@ -117,7 +147,16 @@ asm volatile(ALTERNATIVE(								\
>  	CBO_##_op##_A0 "\n\t"								\
>  	"addi a0, a0, %0\n\t"								\
>  	"2:\n\t"									\
> -	"bltu a0, %2, 3b\n\t", 0, CPUFEATURE_CMO, CONFIG_RISCV_DMA_NONCOHERENT)		\
> +	"bltu a0, %2, 3b\n\t"								\
> +	"nop", 0, CPUFEATURE_CMO, CONFIG_RISCV_DMA_NONCOHERENT,				\
> +	"mv a0, %1\n\t"									\
> +	"j 2f\n\t"									\
> +	"3:\n\t"									\
> +	THEAD_##_op##_A0 "\n\t"								\
> +	"addi a0, a0, %0\n\t"								\
> +	"2:\n\t"									\
> +	"bltu a0, %2, 3b\n\t"								\
> +	THEAD_SYNC_S, THEAD_VENDOR_ID, ERRATA_THEAD_CMO, CONFIG_ERRATA_THEAD_CMO)	\
>  	: : "I"(L1_CACHE_BYTES), "r"((_start) & ~(L1_CACHE_BYTES - 1)),			\
>  	    "r"(ALIGN((_start) + (_size), L1_CACHE_BYTES)))
>  
>
Palmer Dabbelt April 20, 2022, 12:18 a.m. UTC | #5
On Thu, 31 Mar 2022 01:22:29 PDT (-0700), heiko@sntech.de wrote:
> Hi Palmer,
>
> Am Donnerstag, 31. März 2022, 04:30:36 CEST schrieb Palmer Dabbelt:
>> On Mon, 07 Mar 2022 14:46:20 PST (-0800), heiko@sntech.de wrote:
>> > The T-Head C906 and C910 implement a scheme for handling
>> > cache operations different from the generic Zicbom extension.
>> >
>> > Add an errata for it next to the generic dma coherency ops.
>> >
>> > Signed-off-by: Heiko Stuebner <heiko@sntech.de>
>> > ---
>> >  arch/riscv/Kconfig.erratas           | 10 +++++++
>> >  arch/riscv/errata/thead/errata.c     |  5 ++++
>> >  arch/riscv/include/asm/errata_list.h | 45 ++++++++++++++++++++++++++--
>> >  3 files changed, 57 insertions(+), 3 deletions(-)
>> >
>> > diff --git a/arch/riscv/Kconfig.erratas b/arch/riscv/Kconfig.erratas
>> > index de4002baa1d0..89a6dcb8ac2a 100644
>> > --- a/arch/riscv/Kconfig.erratas
>> > +++ b/arch/riscv/Kconfig.erratas
>> > @@ -50,4 +50,14 @@ config ERRATA_THEAD_PBMT
>> >
>> >  	  If you don't know what to do here, say "Y".
>> >
>> > +config ERRATA_THEAD_CMO
>> > +	bool "Apply T-Head cache management errata"
>> > +	depends on ERRATA_THEAD && RISCV_DMA_NONCOHERENT
>> > +	default y
>> > +	help
>> > +	  This will apply the cache management errata to handle the
>> > +	  non-standard handling on non-coherent operations on T-Head SoCs.
>> > +
>> > +	  If you don't know what to do here, say "Y".
>> > +
>> >  endmenu
>> > diff --git a/arch/riscv/errata/thead/errata.c b/arch/riscv/errata/thead/errata.c
>> > index fd8e0538a3f0..11c26c37425f 100644
>> > --- a/arch/riscv/errata/thead/errata.c
>> > +++ b/arch/riscv/errata/thead/errata.c
>> > @@ -33,6 +33,11 @@ static const struct errata_info errata_list[ERRATA_THEAD_NUMBER] = {
>> >  		.stage = RISCV_ALTERNATIVES_EARLY_BOOT,
>> >  		.check_func = errata_mt_check_func
>> >  	},
>> > +	{
>> > +		.name = "cache-management",
>> > +		.stage = RISCV_ALTERNATIVES_BOOT,
>> > +		.check_func = errata_mt_check_func
>> > +	},
>> >  };
>> >
>> >  static u32 thead_errata_probe(unsigned int stage, unsigned long archid, unsigned long impid)
>> > diff --git a/arch/riscv/include/asm/errata_list.h b/arch/riscv/include/asm/errata_list.h
>> > index 7a2dd61af24d..f7c6805daeab 100644
>> > --- a/arch/riscv/include/asm/errata_list.h
>> > +++ b/arch/riscv/include/asm/errata_list.h
>> > @@ -16,7 +16,8 @@
>> >
>> >  #ifdef CONFIG_ERRATA_THEAD
>> >  #define	ERRATA_THEAD_PBMT 0
>> > -#define	ERRATA_THEAD_NUMBER 1
>> > +#define	ERRATA_THEAD_CMO 1
>> > +#define	ERRATA_THEAD_NUMBER 2
>> >  #endif
>> >
>> >  #define	CPUFEATURE_SVPBMT 0
>> > @@ -104,8 +105,37 @@ asm volatile(ALTERNATIVE(								\
>> >  #define CBO_CLEAN_A0	".long 0x25200F"
>> >  #define CBO_FLUSH_A0	".long 0x05200F"
>> >
>> > +/*
>> > + * dcache.ipa rs1 (invalidate, physical address)
>> > + * | 31 - 25 | 24 - 20 | 19 - 15 | 14 - 12 | 11 - 7 | 6 - 0 |
>> > + *   0000001    01010      rs1       000      00000  0001011
>> > + * dache.iva rs1 (invalida, virtual address)
>> > + *   0000001    00110      rs1       000      00000  0001011
>> > + *
>> > + * dcache.cpa rs1 (clean, physical address)
>> > + * | 31 - 25 | 24 - 20 | 19 - 15 | 14 - 12 | 11 - 7 | 6 - 0 |
>> > + *   0000001    01001      rs1       000      00000  0001011
>> > + * dcache.cva rs1 (clean, virtual address)
>> > + *   0000001    00100      rs1       000      00000  0001011
>> > + *
>> > + * dcache.cipa rs1 (clean then invalidate, physical address)
>> > + * | 31 - 25 | 24 - 20 | 19 - 15 | 14 - 12 | 11 - 7 | 6 - 0 |
>> > + *   0000001    01011      rs1       000      00000  0001011
>> > + * dcache.civa rs1 (... virtual address)
>> > + *   0000001    00111      rs1       000      00000  0001011
>> > + *
>> > + * sync.s (make sure all cache operations finished)
>> > + * | 31 - 25 | 24 - 20 | 19 - 15 | 14 - 12 | 11 - 7 | 6 - 0 |
>> > + *   0000000    11001     00000      000      00000  0001011
>> > + */
>> > +#define THEAD_INVAL_A0	".long 0x0265000b"
>> > +#define THEAD_CLEAN_A0	".long 0x0245000b"
>> > +#define THEAD_FLUSH_A0	".long 0x0275000b"
>> > +#define THEAD_SYNC_S	".long 0x0190000b"
>> 
>> IIRC this came up before, but these really need to get into the 
>> assembler as actual instructions.
>
> okay :-) .
>
> But just for my understanding which of the two ways going forward:
> - keep this in the waiting area _until_ a suitable binutils is released
> - use the coded instructions now and convert later once binutils is released
>
> The reason I ask is, that any chip with a t-head core like the Allwinner-D1
> will need this for things like basic networking, so with the binutils
> release schedule, I guess we'd be looking at autumn 2022 at the earliest.

I'm not the binutils release maintainer, so I can't really sign off on a 
release date, but give the history that sounds about right to me.  I get 
it's a headache to have to have a toolchain that supports the ISA, but 
if it was really that important it would have made one of the last two 
releases -- I very specifically remember talking to the folks at the 
RISC-V foundation about this the better part of a year ago, but they 
decided to play at politics instead of being constructive so now we have 
two messes to clean up.

I volunteered Patrick to send binutils patches for the T-Head cache 
control stuff (as I didn't have time to write it myself this weekend), 
it's only a dozen or so instructions and thus shouldn't take that long.  
At least that way we can get a rough consensus on how we're going to 
move forward with the toolchain support, which we really need before 
we're going to start depending on anything.

Sorry you got pulled into all this. 

> Thanks
> Heiko
>
>> > +
>> >  #define ALT_CMO_OP(_op, _start, _size)							\
>> > -asm volatile(ALTERNATIVE(								\
>> > +asm volatile(ALTERNATIVE_2(								\
>> > +	"nop\n\t"									\
>> >  	"nop\n\t"									\
>> >  	"nop\n\t"									\
>> >  	"nop\n\t"									\
>> > @@ -117,7 +147,16 @@ asm volatile(ALTERNATIVE(								\
>> >  	CBO_##_op##_A0 "\n\t"								\
>> >  	"addi a0, a0, %0\n\t"								\
>> >  	"2:\n\t"									\
>> > -	"bltu a0, %2, 3b\n\t", 0, CPUFEATURE_CMO, CONFIG_RISCV_DMA_NONCOHERENT)		\
>> > +	"bltu a0, %2, 3b\n\t"								\
>> > +	"nop", 0, CPUFEATURE_CMO, CONFIG_RISCV_DMA_NONCOHERENT,				\
>> > +	"mv a0, %1\n\t"									\
>> > +	"j 2f\n\t"									\
>> > +	"3:\n\t"									\
>> > +	THEAD_##_op##_A0 "\n\t"								\
>> > +	"addi a0, a0, %0\n\t"								\
>> > +	"2:\n\t"									\
>> > +	"bltu a0, %2, 3b\n\t"								\
>> > +	THEAD_SYNC_S, THEAD_VENDOR_ID, ERRATA_THEAD_CMO, CONFIG_ERRATA_THEAD_CMO)	\
>> >  	: : "I"(L1_CACHE_BYTES), "r"((_start) & ~(L1_CACHE_BYTES - 1)),			\
>> >  	    "r"(ALIGN((_start) + (_size), L1_CACHE_BYTES)))
>>
diff mbox series

Patch

diff --git a/arch/riscv/Kconfig.erratas b/arch/riscv/Kconfig.erratas
index de4002baa1d0..89a6dcb8ac2a 100644
--- a/arch/riscv/Kconfig.erratas
+++ b/arch/riscv/Kconfig.erratas
@@ -50,4 +50,14 @@  config ERRATA_THEAD_PBMT
 
 	  If you don't know what to do here, say "Y".
 
+config ERRATA_THEAD_CMO
+	bool "Apply T-Head cache management errata"
+	depends on ERRATA_THEAD && RISCV_DMA_NONCOHERENT
+	default y
+	help
+	  This will apply the cache management errata to handle the
+	  non-standard handling on non-coherent operations on T-Head SoCs.
+
+	  If you don't know what to do here, say "Y".
+
 endmenu
diff --git a/arch/riscv/errata/thead/errata.c b/arch/riscv/errata/thead/errata.c
index fd8e0538a3f0..11c26c37425f 100644
--- a/arch/riscv/errata/thead/errata.c
+++ b/arch/riscv/errata/thead/errata.c
@@ -33,6 +33,11 @@  static const struct errata_info errata_list[ERRATA_THEAD_NUMBER] = {
 		.stage = RISCV_ALTERNATIVES_EARLY_BOOT,
 		.check_func = errata_mt_check_func
 	},
+	{
+		.name = "cache-management",
+		.stage = RISCV_ALTERNATIVES_BOOT,
+		.check_func = errata_mt_check_func
+	},
 };
 
 static u32 thead_errata_probe(unsigned int stage, unsigned long archid, unsigned long impid)
diff --git a/arch/riscv/include/asm/errata_list.h b/arch/riscv/include/asm/errata_list.h
index 7a2dd61af24d..f7c6805daeab 100644
--- a/arch/riscv/include/asm/errata_list.h
+++ b/arch/riscv/include/asm/errata_list.h
@@ -16,7 +16,8 @@ 
 
 #ifdef CONFIG_ERRATA_THEAD
 #define	ERRATA_THEAD_PBMT 0
-#define	ERRATA_THEAD_NUMBER 1
+#define	ERRATA_THEAD_CMO 1
+#define	ERRATA_THEAD_NUMBER 2
 #endif
 
 #define	CPUFEATURE_SVPBMT 0
@@ -104,8 +105,37 @@  asm volatile(ALTERNATIVE(								\
 #define CBO_CLEAN_A0	".long 0x25200F"
 #define CBO_FLUSH_A0	".long 0x05200F"
 
+/*
+ * dcache.ipa rs1 (invalidate, physical address)
+ * | 31 - 25 | 24 - 20 | 19 - 15 | 14 - 12 | 11 - 7 | 6 - 0 |
+ *   0000001    01010      rs1       000      00000  0001011
+ * dache.iva rs1 (invalida, virtual address)
+ *   0000001    00110      rs1       000      00000  0001011
+ *
+ * dcache.cpa rs1 (clean, physical address)
+ * | 31 - 25 | 24 - 20 | 19 - 15 | 14 - 12 | 11 - 7 | 6 - 0 |
+ *   0000001    01001      rs1       000      00000  0001011
+ * dcache.cva rs1 (clean, virtual address)
+ *   0000001    00100      rs1       000      00000  0001011
+ *
+ * dcache.cipa rs1 (clean then invalidate, physical address)
+ * | 31 - 25 | 24 - 20 | 19 - 15 | 14 - 12 | 11 - 7 | 6 - 0 |
+ *   0000001    01011      rs1       000      00000  0001011
+ * dcache.civa rs1 (... virtual address)
+ *   0000001    00111      rs1       000      00000  0001011
+ *
+ * sync.s (make sure all cache operations finished)
+ * | 31 - 25 | 24 - 20 | 19 - 15 | 14 - 12 | 11 - 7 | 6 - 0 |
+ *   0000000    11001     00000      000      00000  0001011
+ */
+#define THEAD_INVAL_A0	".long 0x0265000b"
+#define THEAD_CLEAN_A0	".long 0x0245000b"
+#define THEAD_FLUSH_A0	".long 0x0275000b"
+#define THEAD_SYNC_S	".long 0x0190000b"
+
 #define ALT_CMO_OP(_op, _start, _size)							\
-asm volatile(ALTERNATIVE(								\
+asm volatile(ALTERNATIVE_2(								\
+	"nop\n\t"									\
 	"nop\n\t"									\
 	"nop\n\t"									\
 	"nop\n\t"									\
@@ -117,7 +147,16 @@  asm volatile(ALTERNATIVE(								\
 	CBO_##_op##_A0 "\n\t"								\
 	"addi a0, a0, %0\n\t"								\
 	"2:\n\t"									\
-	"bltu a0, %2, 3b\n\t", 0, CPUFEATURE_CMO, CONFIG_RISCV_DMA_NONCOHERENT)		\
+	"bltu a0, %2, 3b\n\t"								\
+	"nop", 0, CPUFEATURE_CMO, CONFIG_RISCV_DMA_NONCOHERENT,				\
+	"mv a0, %1\n\t"									\
+	"j 2f\n\t"									\
+	"3:\n\t"									\
+	THEAD_##_op##_A0 "\n\t"								\
+	"addi a0, a0, %0\n\t"								\
+	"2:\n\t"									\
+	"bltu a0, %2, 3b\n\t"								\
+	THEAD_SYNC_S, THEAD_VENDOR_ID, ERRATA_THEAD_CMO, CONFIG_ERRATA_THEAD_CMO)	\
 	: : "I"(L1_CACHE_BYTES), "r"((_start) & ~(L1_CACHE_BYTES - 1)),			\
 	    "r"(ALIGN((_start) + (_size), L1_CACHE_BYTES)))