diff mbox series

[V2] irqchip/gic-v3: Workaround Marvell erratum 38545 when reading IAR

Message ID 20220304014301.2515-1-lcherian@marvell.com (mailing list archive)
State New, archived
Headers show
Series [V2] irqchip/gic-v3: Workaround Marvell erratum 38545 when reading IAR | expand

Commit Message

Linu Cherian March 4, 2022, 1:43 a.m. UTC
When a IAR register read races with a GIC interrupt RELEASE event,
GIC-CPU interface could wrongly return a valid INTID to the CPU
for an interrupt that is already released(non activated) instead of 0x3ff.

As a side effect, an interrupt handler could run twice, once with
interrupt priority and then with idle priority.

As a workaround, gic_read_iar is updated so that it will return a
valid interrupt ID only if there is a change in the active priority list
after the IAR read on all the affected Silicons.

Since there are silicon variants where both 23154 and 38545 are applicable,
workaround for erratum 23154 has been extended to address both of them.

Signed-off-by: Linu Cherian <lcherian@marvell.com>
---
Changes since V2:
- IIDR based quirk management done for 23154 has been reverted
- Extended existing 23154 errata to address 38545 as well,
  so that existing static keys are reused. 
- Added MIDR based support macros to cover all the affected parts
- Changed the unlikely construct to likely construct in the workaround
  function.



 Documentation/arm64/silicon-errata.rst |  2 +-
 arch/arm64/Kconfig                     |  8 ++++++--
 arch/arm64/include/asm/arch_gicv3.h    | 22 ++++++++++++++++++++--
 arch/arm64/include/asm/cputype.h       |  2 ++
 arch/arm64/kernel/cpu_errata.c         | 25 ++++++++++++++++++++++---
 5 files changed, 51 insertions(+), 8 deletions(-)

Comments

Marc Zyngier March 4, 2022, 7:43 a.m. UTC | #1
On Fri, 04 Mar 2022 01:43:01 +0000,
Linu Cherian <lcherian@marvell.com> wrote:
> 
> When a IAR register read races with a GIC interrupt RELEASE event,
> GIC-CPU interface could wrongly return a valid INTID to the CPU
> for an interrupt that is already released(non activated) instead of 0x3ff.
> 
> As a side effect, an interrupt handler could run twice, once with
> interrupt priority and then with idle priority.
> 
> As a workaround, gic_read_iar is updated so that it will return a
> valid interrupt ID only if there is a change in the active priority list
> after the IAR read on all the affected Silicons.
> 
> Since there are silicon variants where both 23154 and 38545 are applicable,
> workaround for erratum 23154 has been extended to address both of them.
> 
> Signed-off-by: Linu Cherian <lcherian@marvell.com>
> ---
> Changes since V2:
> - IIDR based quirk management done for 23154 has been reverted
> - Extended existing 23154 errata to address 38545 as well,
>   so that existing static keys are reused. 
> - Added MIDR based support macros to cover all the affected parts
> - Changed the unlikely construct to likely construct in the workaround
>   function.
> 
> 
> 
>  Documentation/arm64/silicon-errata.rst |  2 +-
>  arch/arm64/Kconfig                     |  8 ++++++--
>  arch/arm64/include/asm/arch_gicv3.h    | 22 ++++++++++++++++++++--
>  arch/arm64/include/asm/cputype.h       |  2 ++
>  arch/arm64/kernel/cpu_errata.c         | 25 ++++++++++++++++++++++---
>  5 files changed, 51 insertions(+), 8 deletions(-)
> 
> diff --git a/Documentation/arm64/silicon-errata.rst b/Documentation/arm64/silicon-errata.rst
> index ea281dd75517..466cb9e89047 100644
> --- a/Documentation/arm64/silicon-errata.rst
> +++ b/Documentation/arm64/silicon-errata.rst
> @@ -136,7 +136,7 @@ stable kernels.
>  +----------------+-----------------+-----------------+-----------------------------+
>  | Cavium         | ThunderX ITS    | #23144          | CAVIUM_ERRATUM_23144        |
>  +----------------+-----------------+-----------------+-----------------------------+
> -| Cavium         | ThunderX GICv3  | #23154          | CAVIUM_ERRATUM_23154        |
> +| Cavium         | ThunderX GICv3  | #23154,38545    | CAVIUM_ERRATUM_23154        |
>  +----------------+-----------------+-----------------+-----------------------------+
>  | Cavium         | ThunderX GICv3  | #38539          | N/A                         |
>  +----------------+-----------------+-----------------+-----------------------------+
> diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
> index 09b885cc4db5..778cc2e22c21 100644
> --- a/arch/arm64/Kconfig
> +++ b/arch/arm64/Kconfig
> @@ -891,13 +891,17 @@ config CAVIUM_ERRATUM_23144
>  	  If unsure, say Y.
>  
>  config CAVIUM_ERRATUM_23154
> -	bool "Cavium erratum 23154: Access to ICC_IAR1_EL1 is not sync'ed"
> +	bool "Cavium errata 23154 and 38545: GICv3 lacks HW synchronisation"
>  	default y
>  	help
> -	  The gicv3 of ThunderX requires a modified version for
> +	  The ThunderX GICv3 implementation requires a modified version for
>  	  reading the IAR status to ensure data synchronization
>  	  (access to icc_iar1_el1 is not sync'ed before and after).
>  
> +	  It also suffers from erratum 38545 (also present on Marvell's
> +	  OcteonTX and OcteonTX2), resulting in deactivated interrupts being
> +	  spuriously presented to the CPU interface.
> +
>  	  If unsure, say Y.
>  
>  config CAVIUM_ERRATUM_27456
> diff --git a/arch/arm64/include/asm/arch_gicv3.h b/arch/arm64/include/asm/arch_gicv3.h
> index 4ad22c3135db..b8fd7b1f9944 100644
> --- a/arch/arm64/include/asm/arch_gicv3.h
> +++ b/arch/arm64/include/asm/arch_gicv3.h
> @@ -53,17 +53,35 @@ static inline u64 gic_read_iar_common(void)
>   * The gicv3 of ThunderX requires a modified version for reading the
>   * IAR status to ensure data synchronization (access to icc_iar1_el1
>   * is not sync'ed before and after).
> + *
> + * Erratum 38545
> + *
> + * When a IAR register read races with a GIC interrupt RELEASE event,
> + * GIC-CPU interface could wrongly return a valid INTID to the CPU
> + * for an interrupt that is already released(non activated) instead of 0x3ff.
> + *
> + * To workaround this, return a valid interrupt ID only if there is a change
> + * in the active priority list after the IAR read.
> + *
> + * Common function used for both the workarounds since,
> + * 1. On Thunderx 88xx 1.x both erratas are applicable.
> + * 2. Having extra nops doesn't add any side effects for Silicons where
> + *    erratum 23154 is not applicable.
>   */
>  static inline u64 gic_read_iar_cavium_thunderx(void)
>  {
> -	u64 irqstat;
> +	u64 irqstat, apr;
>  
> +	apr = read_sysreg_s(SYS_ICC_AP1R0_EL1);

Why only AP1R0? Does the HW only support 5 bits of priority? If it
supports more, you need to check all the registers that may contain an
active priority (0xa0 for a standard interrupt, 0x20 for a pNMI).

>  	nops(8);
>  	irqstat = read_sysreg_s(SYS_ICC_IAR1_EL1);
>  	nops(4);
>  	mb();
>  
> -	return irqstat;
> +	if (likely(apr != read_sysreg_s(SYS_ICC_AP1R0_EL1)))
> +		return irqstat;
> +
> +	return 0x3ff;

This should be ICC_IAR1_EL1_SPURIOUS.

>  }
>  
>  static inline void gic_write_ctlr(u32 val)
> diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h
> index 999b9149f856..9407c5074a4f 100644
> --- a/arch/arm64/include/asm/cputype.h
> +++ b/arch/arm64/include/asm/cputype.h
> @@ -80,6 +80,7 @@
>  
>  #define APM_CPU_PART_POTENZA		0x000
>  
> +#define CAVIUM_CPU_PART_THUNDERX_OTX_GEN 0x0A0

Is this an actual part number? What does 'GEN' stand for?

>  #define CAVIUM_CPU_PART_THUNDERX	0x0A1
>  #define CAVIUM_CPU_PART_THUNDERX_81XX	0x0A2
>  #define CAVIUM_CPU_PART_THUNDERX_83XX	0x0A3
> @@ -121,6 +122,7 @@
>  #define MIDR_CORTEX_A710 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A710)
>  #define MIDR_CORTEX_X2 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X2)
>  #define MIDR_NEOVERSE_N2 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_N2)
> +#define MIDR_THUNDERX_OTX_GEN MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_OTX_GEN)
>  #define MIDR_THUNDERX	MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX)
>  #define MIDR_THUNDERX_81XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_81XX)
>  #define MIDR_THUNDERX_83XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_83XX)
> diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
> index b217941713a8..82ed09b363d6 100644
> --- a/arch/arm64/kernel/cpu_errata.c
> +++ b/arch/arm64/kernel/cpu_errata.c
> @@ -41,6 +41,25 @@ is_affected_midr_range_list(const struct arm64_cpu_capabilities *entry,
>  	return is_midr_in_range_list(read_cpuid_id(), entry->midr_range_list);
>  }
>  
> +static bool __maybe_unused
> +is_marvell_thunderx_otx_family(const struct arm64_cpu_capabilities *entry,
> +			       int scope)
> +{
> +	u32 model;
> +
> +	WARN_ON(scope != SCOPE_LOCAL_CPU || preemptible());
> +
> +	model = read_cpuid_id();
> +	/* 0xe8 mask will cover  0xA1 - 0xA3 and 0xB1 - 0xB6 series with
> +	 * 0xAF and 0xB8 as exceptions
> +	 */
> +	model &= MIDR_IMPLEMENTOR_MASK | (0x0e8 << MIDR_PARTNUM_SHIFT) |
> +		 MIDR_ARCHITECTURE_MASK;
> +
> +	/* This includes Thunderx, OcteonTx, OcteonTx2 family of processors */
> +	return model == MIDR_THUNDERX_OTX_GEN;
> +}
> +

No, please.  This is a version of the Kryo hack, only worse. We
*really* want to see an actual list of models and revisions, not an
obfuscated mask that covers HW that may or may not exist. All the
infrastructure is there to describe these constraints as data, just
make use of it.

>  static bool __maybe_unused
>  is_kryo_midr(const struct arm64_cpu_capabilities *entry, int scope)
>  {
> @@ -425,10 +444,10 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
>  #endif
>  #ifdef CONFIG_CAVIUM_ERRATUM_23154
>  	{
> -	/* Cavium ThunderX, pass 1.x */
> -		.desc = "Cavium erratum 23154",
> +		.desc = "Cavium errata 23154 and 38545",
>  		.capability = ARM64_WORKAROUND_CAVIUM_23154,
> -		ERRATA_MIDR_REV_RANGE(MIDR_THUNDERX, 0, 0, 1),
> +		.type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM,
> +		.matches = is_marvell_thunderx_otx_family,
>  	},
>  #endif
>  #ifdef CONFIG_CAVIUM_ERRATUM_27456

Thanks,

	M.
Linu Cherian March 4, 2022, 1:25 p.m. UTC | #2
Hi Marc,

> -----Original Message-----
> From: Marc Zyngier <maz@kernel.org>
> Sent: Friday, March 4, 2022 1:13 PM
> To: Linu Cherian <lcherian@marvell.com>
> Cc: tglx@linutronix.de; catalin.marinas@arm.com; will@kernel.org; linux-
> kernel@vger.kernel.org; linux-arm-kernel@lists.infradead.org;
> linuc.decode@gmail.com
> Subject: [EXT] Re: [PATCH V2] irqchip/gic-v3: Workaround Marvell erratum
> 38545 when reading IAR
> 
> External Email
> 
> ----------------------------------------------------------------------
> On Fri, 04 Mar 2022 01:43:01 +0000,
> Linu Cherian <lcherian@marvell.com> wrote:
> >
> > When a IAR register read races with a GIC interrupt RELEASE event,
> > GIC-CPU interface could wrongly return a valid INTID to the CPU for an
> > interrupt that is already released(non activated) instead of 0x3ff.
> >
> > As a side effect, an interrupt handler could run twice, once with
> > interrupt priority and then with idle priority.
> >
> > As a workaround, gic_read_iar is updated so that it will return a
> > valid interrupt ID only if there is a change in the active priority
> > list after the IAR read on all the affected Silicons.
> >
> > Since there are silicon variants where both 23154 and 38545 are
> > applicable, workaround for erratum 23154 has been extended to address
> both of them.
> >
> > Signed-off-by: Linu Cherian <lcherian@marvell.com>
> > ---
> > Changes since V2:
> > - IIDR based quirk management done for 23154 has been reverted
> > - Extended existing 23154 errata to address 38545 as well,
> >   so that existing static keys are reused.
> > - Added MIDR based support macros to cover all the affected parts
> > - Changed the unlikely construct to likely construct in the workaround
> >   function.
> >
> >
> >
> >  Documentation/arm64/silicon-errata.rst |  2 +-
> >  arch/arm64/Kconfig                     |  8 ++++++--
> >  arch/arm64/include/asm/arch_gicv3.h    | 22 ++++++++++++++++++++--
> >  arch/arm64/include/asm/cputype.h       |  2 ++
> >  arch/arm64/kernel/cpu_errata.c         | 25 ++++++++++++++++++++++---
> >  5 files changed, 51 insertions(+), 8 deletions(-)
> >
> > diff --git a/Documentation/arm64/silicon-errata.rst
> > b/Documentation/arm64/silicon-errata.rst
> > index ea281dd75517..466cb9e89047 100644
> > --- a/Documentation/arm64/silicon-errata.rst
> > +++ b/Documentation/arm64/silicon-errata.rst
> > @@ -136,7 +136,7 @@ stable kernels.
> >  +----------------+-----------------+-----------------+-----------------------------+
> >  | Cavium         | ThunderX ITS    | #23144          | CAVIUM_ERRATUM_23144
> |
> >
> > +----------------+-----------------+-----------------+----------------
> > -------------+
> > -| Cavium         | ThunderX GICv3  | #23154          |
> CAVIUM_ERRATUM_23154        |
> > +| Cavium         | ThunderX GICv3  | #23154,38545    |
> CAVIUM_ERRATUM_23154        |
> >  +----------------+-----------------+-----------------+-----------------------------+
> >  | Cavium         | ThunderX GICv3  | #38539          | N/A                         |
> >
> > +----------------+-----------------+-----------------+----------------
> > -------------+ diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
> > index 09b885cc4db5..778cc2e22c21 100644
> > --- a/arch/arm64/Kconfig
> > +++ b/arch/arm64/Kconfig
> > @@ -891,13 +891,17 @@ config CAVIUM_ERRATUM_23144
> >  	  If unsure, say Y.
> >
> >  config CAVIUM_ERRATUM_23154
> > -	bool "Cavium erratum 23154: Access to ICC_IAR1_EL1 is not sync'ed"
> > +	bool "Cavium errata 23154 and 38545: GICv3 lacks HW
> synchronisation"
> >  	default y
> >  	help
> > -	  The gicv3 of ThunderX requires a modified version for
> > +	  The ThunderX GICv3 implementation requires a modified version for
> >  	  reading the IAR status to ensure data synchronization
> >  	  (access to icc_iar1_el1 is not sync'ed before and after).
> >
> > +	  It also suffers from erratum 38545 (also present on Marvell's
> > +	  OcteonTX and OcteonTX2), resulting in deactivated interrupts being
> > +	  spuriously presented to the CPU interface.
> > +
> >  	  If unsure, say Y.
> >
> >  config CAVIUM_ERRATUM_27456
> > diff --git a/arch/arm64/include/asm/arch_gicv3.h
> > b/arch/arm64/include/asm/arch_gicv3.h
> > index 4ad22c3135db..b8fd7b1f9944 100644
> > --- a/arch/arm64/include/asm/arch_gicv3.h
> > +++ b/arch/arm64/include/asm/arch_gicv3.h
> > @@ -53,17 +53,35 @@ static inline u64 gic_read_iar_common(void)
> >   * The gicv3 of ThunderX requires a modified version for reading the
> >   * IAR status to ensure data synchronization (access to icc_iar1_el1
> >   * is not sync'ed before and after).
> > + *
> > + * Erratum 38545
> > + *
> > + * When a IAR register read races with a GIC interrupt RELEASE event,
> > + * GIC-CPU interface could wrongly return a valid INTID to the CPU
> > + * for an interrupt that is already released(non activated) instead of 0x3ff.
> > + *
> > + * To workaround this, return a valid interrupt ID only if there is a
> > + change
> > + * in the active priority list after the IAR read.
> > + *
> > + * Common function used for both the workarounds since,
> > + * 1. On Thunderx 88xx 1.x both erratas are applicable.
> > + * 2. Having extra nops doesn't add any side effects for Silicons where
> > + *    erratum 23154 is not applicable.
> >   */
> >  static inline u64 gic_read_iar_cavium_thunderx(void)
> >  {
> > -	u64 irqstat;
> > +	u64 irqstat, apr;
> >
> > +	apr = read_sysreg_s(SYS_ICC_AP1R0_EL1);
> 
> Why only AP1R0? Does the HW only support 5 bits of priority? If it supports
> more, you need to check all the registers that may contain an active priority
> (0xa0 for a standard interrupt, 0x20 for a pNMI).
> 

Yes correct. HW supports only 5 bits of priority groups.
Will note this in the comment.

> >  	nops(8);
> >  	irqstat = read_sysreg_s(SYS_ICC_IAR1_EL1);
> >  	nops(4);
> >  	mb();
> >
> > -	return irqstat;
> > +	if (likely(apr != read_sysreg_s(SYS_ICC_AP1R0_EL1)))
> > +		return irqstat;
> > +
> > +	return 0x3ff;
> 
> This should be ICC_IAR1_EL1_SPURIOUS.

Looks like we need fixes like below in couple of files to make use of this macro.

diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 5bc01e62c08a..d02b7339d21a 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -18,7 +18,7 @@
 #include <linux/kvm_types.h>
 #include <linux/percpu.h>
 #include <linux/psci.h>
-#include <asm/arch_gicv3.h>
+#include <linux/irqchip/arm-gic-v3.h>

Should I consider fixing these ? 
At least  its builds fine for me with similar header fixes.

> 
> >  }
> >
> >  static inline void gic_write_ctlr(u32 val) diff --git
> > a/arch/arm64/include/asm/cputype.h
> b/arch/arm64/include/asm/cputype.h
> > index 999b9149f856..9407c5074a4f 100644
> > --- a/arch/arm64/include/asm/cputype.h
> > +++ b/arch/arm64/include/asm/cputype.h
> > @@ -80,6 +80,7 @@
> >
> >  #define APM_CPU_PART_POTENZA		0x000
> >
> > +#define CAVIUM_CPU_PART_THUNDERX_OTX_GEN 0x0A0
> 
> Is this an actual part number? What does 'GEN' stand for?
> 

No, this is not an actual part number. GEN was meant to be generic
to cover a group of part numbers.

> >  #define CAVIUM_CPU_PART_THUNDERX	0x0A1
> >  #define CAVIUM_CPU_PART_THUNDERX_81XX	0x0A2
> >  #define CAVIUM_CPU_PART_THUNDERX_83XX	0x0A3
> > @@ -121,6 +122,7 @@
> >  #define MIDR_CORTEX_A710 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM,
> > ARM_CPU_PART_CORTEX_A710)  #define MIDR_CORTEX_X2
> > MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X2)
> #define
> > MIDR_NEOVERSE_N2 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM,
> > ARM_CPU_PART_NEOVERSE_N2)
> > +#define MIDR_THUNDERX_OTX_GEN
> MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM,
> > +CAVIUM_CPU_PART_THUNDERX_OTX_GEN)
> >  #define MIDR_THUNDERX	MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM,
> CAVIUM_CPU_PART_THUNDERX)
> >  #define MIDR_THUNDERX_81XX
> MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM,
> > CAVIUM_CPU_PART_THUNDERX_81XX)  #define MIDR_THUNDERX_83XX
> > MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM,
> CAVIUM_CPU_PART_THUNDERX_83XX) diff
> > --git a/arch/arm64/kernel/cpu_errata.c
> > b/arch/arm64/kernel/cpu_errata.c index b217941713a8..82ed09b363d6
> > 100644
> > --- a/arch/arm64/kernel/cpu_errata.c
> > +++ b/arch/arm64/kernel/cpu_errata.c
> > @@ -41,6 +41,25 @@ is_affected_midr_range_list(const struct
> arm64_cpu_capabilities *entry,
> >  	return is_midr_in_range_list(read_cpuid_id(),
> > entry->midr_range_list);  }
> >
> > +static bool __maybe_unused
> > +is_marvell_thunderx_otx_family(const struct arm64_cpu_capabilities
> *entry,
> > +			       int scope)
> > +{
> > +	u32 model;
> > +
> > +	WARN_ON(scope != SCOPE_LOCAL_CPU || preemptible());
> > +
> > +	model = read_cpuid_id();
> > +	/* 0xe8 mask will cover  0xA1 - 0xA3 and 0xB1 - 0xB6 series with
> > +	 * 0xAF and 0xB8 as exceptions
> > +	 */
> > +	model &= MIDR_IMPLEMENTOR_MASK | (0x0e8 <<
> MIDR_PARTNUM_SHIFT) |
> > +		 MIDR_ARCHITECTURE_MASK;
> > +
> > +	/* This includes Thunderx, OcteonTx, OcteonTx2 family of processors
> */
> > +	return model == MIDR_THUNDERX_OTX_GEN; }
> > +
> 
> No, please.  This is a version of the Kryo hack, only worse. We
> *really* want to see an actual list of models and revisions, not an obfuscated
> mask that covers HW that may or may not exist. All the infrastructure is there
> to describe these constraints as data, just make use of it.
> 

Ack. Will change this to actual part numbers. 


> >  static bool __maybe_unused
> >  is_kryo_midr(const struct arm64_cpu_capabilities *entry, int scope)
> > { @@ -425,10 +444,10 @@ const struct arm64_cpu_capabilities
> > arm64_errata[] = {  #endif  #ifdef CONFIG_CAVIUM_ERRATUM_23154
> >  	{
> > -	/* Cavium ThunderX, pass 1.x */
> > -		.desc = "Cavium erratum 23154",
> > +		.desc = "Cavium errata 23154 and 38545",
> >  		.capability = ARM64_WORKAROUND_CAVIUM_23154,
> > -		ERRATA_MIDR_REV_RANGE(MIDR_THUNDERX, 0, 0, 1),
> > +		.type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM,
> > +		.matches = is_marvell_thunderx_otx_family,
> >  	},
> >  #endif
> >  #ifdef CONFIG_CAVIUM_ERRATUM_27456
> 
> Thanks,
> 
> 	M.
> 
> --
> Without deviation from the norm, progress is not possible.
Marc Zyngier March 4, 2022, 4:44 p.m. UTC | #3
Hi Linu,

On Fri, 04 Mar 2022 13:25:42 +0000,
Linu Cherian <lcherian@marvell.com> wrote:
> 
> Hi Marc,
> 
> > >  static inline u64 gic_read_iar_cavium_thunderx(void)
> > >  {
> > > -	u64 irqstat;
> > > +	u64 irqstat, apr;
> > >
> > > +	apr = read_sysreg_s(SYS_ICC_AP1R0_EL1);
> > 
> > Why only AP1R0? Does the HW only support 5 bits of priority? If it supports
> > more, you need to check all the registers that may contain an active priority
> > (0xa0 for a standard interrupt, 0x20 for a pNMI).
> > 
> 
> Yes correct. HW supports only 5 bits of priority groups.
> Will note this in the comment.

Thanks.

> 
> > >  	nops(8);
> > >  	irqstat = read_sysreg_s(SYS_ICC_IAR1_EL1);
> > >  	nops(4);
> > >  	mb();
> > >
> > > -	return irqstat;
> > > +	if (likely(apr != read_sysreg_s(SYS_ICC_AP1R0_EL1)))
> > > +		return irqstat;
> > > +
> > > +	return 0x3ff;
> > 
> > This should be ICC_IAR1_EL1_SPURIOUS.
> 
> Looks like we need fixes like below in couple of files to make use
> of this macro.
> 
> diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
> index 5bc01e62c08a..d02b7339d21a 100644
> --- a/arch/arm64/include/asm/kvm_host.h
> +++ b/arch/arm64/include/asm/kvm_host.h
> @@ -18,7 +18,7 @@
>  #include <linux/kvm_types.h>
>  #include <linux/percpu.h>
>  #include <linux/psci.h>
> -#include <asm/arch_gicv3.h>
> +#include <linux/irqchip/arm-gic-v3.h>
> 
> Should I consider fixing these ? 
> At least  its builds fine for me with similar header fixes.

Ah, crap. I'd like to avoid dragging too much of the linux/*.h into
asm/*.h, as this eventually leads to a pretty terrible mess. Never
mind then. I'll look into fixing it independently, and we'll live with
the 0x3ff for now.

> > > +#define CAVIUM_CPU_PART_THUNDERX_OTX_GEN 0x0A0
> > 
> > Is this an actual part number? What does 'GEN' stand for?
> > 
> 
> No, this is not an actual part number. GEN was meant to be generic
> to cover a group of part numbers.

The problem with that is that it eventually clashes with part numbers
that are allocated later, and your old kernel tries to apply a
workaround on the new HW... Sticking to the actual parts is a lot
safer.

Thanks,

	M.
diff mbox series

Patch

diff --git a/Documentation/arm64/silicon-errata.rst b/Documentation/arm64/silicon-errata.rst
index ea281dd75517..466cb9e89047 100644
--- a/Documentation/arm64/silicon-errata.rst
+++ b/Documentation/arm64/silicon-errata.rst
@@ -136,7 +136,7 @@  stable kernels.
 +----------------+-----------------+-----------------+-----------------------------+
 | Cavium         | ThunderX ITS    | #23144          | CAVIUM_ERRATUM_23144        |
 +----------------+-----------------+-----------------+-----------------------------+
-| Cavium         | ThunderX GICv3  | #23154          | CAVIUM_ERRATUM_23154        |
+| Cavium         | ThunderX GICv3  | #23154,38545    | CAVIUM_ERRATUM_23154        |
 +----------------+-----------------+-----------------+-----------------------------+
 | Cavium         | ThunderX GICv3  | #38539          | N/A                         |
 +----------------+-----------------+-----------------+-----------------------------+
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 09b885cc4db5..778cc2e22c21 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -891,13 +891,17 @@  config CAVIUM_ERRATUM_23144
 	  If unsure, say Y.
 
 config CAVIUM_ERRATUM_23154
-	bool "Cavium erratum 23154: Access to ICC_IAR1_EL1 is not sync'ed"
+	bool "Cavium errata 23154 and 38545: GICv3 lacks HW synchronisation"
 	default y
 	help
-	  The gicv3 of ThunderX requires a modified version for
+	  The ThunderX GICv3 implementation requires a modified version for
 	  reading the IAR status to ensure data synchronization
 	  (access to icc_iar1_el1 is not sync'ed before and after).
 
+	  It also suffers from erratum 38545 (also present on Marvell's
+	  OcteonTX and OcteonTX2), resulting in deactivated interrupts being
+	  spuriously presented to the CPU interface.
+
 	  If unsure, say Y.
 
 config CAVIUM_ERRATUM_27456
diff --git a/arch/arm64/include/asm/arch_gicv3.h b/arch/arm64/include/asm/arch_gicv3.h
index 4ad22c3135db..b8fd7b1f9944 100644
--- a/arch/arm64/include/asm/arch_gicv3.h
+++ b/arch/arm64/include/asm/arch_gicv3.h
@@ -53,17 +53,35 @@  static inline u64 gic_read_iar_common(void)
  * The gicv3 of ThunderX requires a modified version for reading the
  * IAR status to ensure data synchronization (access to icc_iar1_el1
  * is not sync'ed before and after).
+ *
+ * Erratum 38545
+ *
+ * When a IAR register read races with a GIC interrupt RELEASE event,
+ * GIC-CPU interface could wrongly return a valid INTID to the CPU
+ * for an interrupt that is already released(non activated) instead of 0x3ff.
+ *
+ * To workaround this, return a valid interrupt ID only if there is a change
+ * in the active priority list after the IAR read.
+ *
+ * Common function used for both the workarounds since,
+ * 1. On Thunderx 88xx 1.x both erratas are applicable.
+ * 2. Having extra nops doesn't add any side effects for Silicons where
+ *    erratum 23154 is not applicable.
  */
 static inline u64 gic_read_iar_cavium_thunderx(void)
 {
-	u64 irqstat;
+	u64 irqstat, apr;
 
+	apr = read_sysreg_s(SYS_ICC_AP1R0_EL1);
 	nops(8);
 	irqstat = read_sysreg_s(SYS_ICC_IAR1_EL1);
 	nops(4);
 	mb();
 
-	return irqstat;
+	if (likely(apr != read_sysreg_s(SYS_ICC_AP1R0_EL1)))
+		return irqstat;
+
+	return 0x3ff;
 }
 
 static inline void gic_write_ctlr(u32 val)
diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h
index 999b9149f856..9407c5074a4f 100644
--- a/arch/arm64/include/asm/cputype.h
+++ b/arch/arm64/include/asm/cputype.h
@@ -80,6 +80,7 @@ 
 
 #define APM_CPU_PART_POTENZA		0x000
 
+#define CAVIUM_CPU_PART_THUNDERX_OTX_GEN 0x0A0
 #define CAVIUM_CPU_PART_THUNDERX	0x0A1
 #define CAVIUM_CPU_PART_THUNDERX_81XX	0x0A2
 #define CAVIUM_CPU_PART_THUNDERX_83XX	0x0A3
@@ -121,6 +122,7 @@ 
 #define MIDR_CORTEX_A710 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A710)
 #define MIDR_CORTEX_X2 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X2)
 #define MIDR_NEOVERSE_N2 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_N2)
+#define MIDR_THUNDERX_OTX_GEN MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_OTX_GEN)
 #define MIDR_THUNDERX	MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX)
 #define MIDR_THUNDERX_81XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_81XX)
 #define MIDR_THUNDERX_83XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_83XX)
diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
index b217941713a8..82ed09b363d6 100644
--- a/arch/arm64/kernel/cpu_errata.c
+++ b/arch/arm64/kernel/cpu_errata.c
@@ -41,6 +41,25 @@  is_affected_midr_range_list(const struct arm64_cpu_capabilities *entry,
 	return is_midr_in_range_list(read_cpuid_id(), entry->midr_range_list);
 }
 
+static bool __maybe_unused
+is_marvell_thunderx_otx_family(const struct arm64_cpu_capabilities *entry,
+			       int scope)
+{
+	u32 model;
+
+	WARN_ON(scope != SCOPE_LOCAL_CPU || preemptible());
+
+	model = read_cpuid_id();
+	/* 0xe8 mask will cover  0xA1 - 0xA3 and 0xB1 - 0xB6 series with
+	 * 0xAF and 0xB8 as exceptions
+	 */
+	model &= MIDR_IMPLEMENTOR_MASK | (0x0e8 << MIDR_PARTNUM_SHIFT) |
+		 MIDR_ARCHITECTURE_MASK;
+
+	/* This includes Thunderx, OcteonTx, OcteonTx2 family of processors */
+	return model == MIDR_THUNDERX_OTX_GEN;
+}
+
 static bool __maybe_unused
 is_kryo_midr(const struct arm64_cpu_capabilities *entry, int scope)
 {
@@ -425,10 +444,10 @@  const struct arm64_cpu_capabilities arm64_errata[] = {
 #endif
 #ifdef CONFIG_CAVIUM_ERRATUM_23154
 	{
-	/* Cavium ThunderX, pass 1.x */
-		.desc = "Cavium erratum 23154",
+		.desc = "Cavium errata 23154 and 38545",
 		.capability = ARM64_WORKAROUND_CAVIUM_23154,
-		ERRATA_MIDR_REV_RANGE(MIDR_THUNDERX, 0, 0, 1),
+		.type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM,
+		.matches = is_marvell_thunderx_otx_family,
 	},
 #endif
 #ifdef CONFIG_CAVIUM_ERRATUM_27456