diff mbox

[v4,11/21] arm64: cpufeature: Detect CPU RAS Extentions

Message ID 20171019145807.23251-12-james.morse@arm.com (mailing list archive)
State New, archived
Headers show

Commit Message

James Morse Oct. 19, 2017, 2:57 p.m. UTC
From: Xie XiuQi <xiexiuqi@huawei.com>

ARM's v8.2 Extentions add support for Reliability, Availability and
Serviceability (RAS). On CPUs with these extensions system software
can use additional barriers to isolate errors and determine if faults
are pending.

Add cpufeature detection and a barrier in the context-switch code.
There is no need to use alternatives for this as CPUs that don't
support this feature will treat the instruction as a nop.

Platform level RAS support may require additional firmware support.

Signed-off-by: Xie XiuQi <xiexiuqi@huawei.com>
[Rebased, added esb and config option, reworded commit message]
Signed-off-by: James Morse <james.morse@arm.com>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/Kconfig               | 16 ++++++++++++++++
 arch/arm64/include/asm/barrier.h |  1 +
 arch/arm64/include/asm/cpucaps.h |  3 ++-
 arch/arm64/include/asm/sysreg.h  |  2 ++
 arch/arm64/kernel/cpufeature.c   | 13 +++++++++++++
 arch/arm64/kernel/process.c      |  3 +++
 6 files changed, 37 insertions(+), 1 deletion(-)

Comments

Will Deacon Oct. 31, 2017, 1:14 p.m. UTC | #1
On Thu, Oct 19, 2017 at 03:57:57PM +0100, James Morse wrote:
> From: Xie XiuQi <xiexiuqi@huawei.com>
> 
> ARM's v8.2 Extentions add support for Reliability, Availability and
> Serviceability (RAS). On CPUs with these extensions system software
> can use additional barriers to isolate errors and determine if faults
> are pending.
> 
> Add cpufeature detection and a barrier in the context-switch code.
> There is no need to use alternatives for this as CPUs that don't
> support this feature will treat the instruction as a nop.
> 
> Platform level RAS support may require additional firmware support.
> 
> Signed-off-by: Xie XiuQi <xiexiuqi@huawei.com>
> [Rebased, added esb and config option, reworded commit message]
> Signed-off-by: James Morse <james.morse@arm.com>
> Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
> ---
>  arch/arm64/Kconfig               | 16 ++++++++++++++++
>  arch/arm64/include/asm/barrier.h |  1 +
>  arch/arm64/include/asm/cpucaps.h |  3 ++-
>  arch/arm64/include/asm/sysreg.h  |  2 ++
>  arch/arm64/kernel/cpufeature.c   | 13 +++++++++++++
>  arch/arm64/kernel/process.c      |  3 +++
>  6 files changed, 37 insertions(+), 1 deletion(-)
> 
> diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
> index 70dfe4e9ccc5..b68f5e93baac 100644
> --- a/arch/arm64/Kconfig
> +++ b/arch/arm64/Kconfig
> @@ -973,6 +973,22 @@ config ARM64_PMEM
>  	  operations if DC CVAP is not supported (following the behaviour of
>  	  DC CVAP itself if the system does not define a point of persistence).
>  
> +config ARM64_RAS_EXTN
> +	bool "Enable support for RAS CPU Extensions"
> +	default y
> +	help
> +	  CPUs that support the Reliability, Availability and Serviceability
> +	  (RAS) Extensions, part of ARMv8.2 are able to track faults and
> +	  errors, classify them and report them to software.
> +
> +	  On CPUs with these extensions system software can use additional
> +	  barriers to determine if faults are pending and read the
> +	  classification from a new set of registers.
> +
> +	  Selecting this feature will allow the kernel to use these barriers
> +	  and access the new registers if the system supports the extension.
> +	  Platform RAS features may additionally depend on firmware support.
> +
>  endmenu
>  
>  config ARM64_MODULE_CMODEL_LARGE
> diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h
> index 0fe7e43b7fbc..8b0a0eb67625 100644
> --- a/arch/arm64/include/asm/barrier.h
> +++ b/arch/arm64/include/asm/barrier.h
> @@ -30,6 +30,7 @@
>  #define isb()		asm volatile("isb" : : : "memory")
>  #define dmb(opt)	asm volatile("dmb " #opt : : : "memory")
>  #define dsb(opt)	asm volatile("dsb " #opt : : : "memory")
> +#define esb()		asm volatile("hint #16"  : : : "memory")
>  
>  #define mb()		dsb(sy)
>  #define rmb()		dsb(ld)
> diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h
> index 8da621627d7c..4820d441bfb9 100644
> --- a/arch/arm64/include/asm/cpucaps.h
> +++ b/arch/arm64/include/asm/cpucaps.h
> @@ -40,7 +40,8 @@
>  #define ARM64_WORKAROUND_858921			19
>  #define ARM64_WORKAROUND_CAVIUM_30115		20
>  #define ARM64_HAS_DCPOP				21
> +#define ARM64_HAS_RAS_EXTN			22
>  
> -#define ARM64_NCAPS				22
> +#define ARM64_NCAPS				23
>  
>  #endif /* __ASM_CPUCAPS_H */
> diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
> index f707fed5886f..64e2a80fd749 100644
> --- a/arch/arm64/include/asm/sysreg.h
> +++ b/arch/arm64/include/asm/sysreg.h
> @@ -332,6 +332,7 @@
>  #define ID_AA64ISAR1_DPB_SHIFT		0
>  
>  /* id_aa64pfr0 */
> +#define ID_AA64PFR0_RAS_SHIFT		28
>  #define ID_AA64PFR0_GIC_SHIFT		24
>  #define ID_AA64PFR0_ASIMD_SHIFT		20
>  #define ID_AA64PFR0_FP_SHIFT		16
> @@ -340,6 +341,7 @@
>  #define ID_AA64PFR0_EL1_SHIFT		4
>  #define ID_AA64PFR0_EL0_SHIFT		0
>  
> +#define ID_AA64PFR0_RAS_V1		0x1
>  #define ID_AA64PFR0_FP_NI		0xf
>  #define ID_AA64PFR0_FP_SUPPORTED	0x0
>  #define ID_AA64PFR0_ASIMD_NI		0xf
> diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
> index cd52d365d1f0..0fc017b55cb1 100644
> --- a/arch/arm64/kernel/cpufeature.c
> +++ b/arch/arm64/kernel/cpufeature.c
> @@ -125,6 +125,7 @@ static const struct arm64_ftr_bits ftr_id_aa64isar1[] = {
>  };
>  
>  static const struct arm64_ftr_bits ftr_id_aa64pfr0[] = {
> +	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64PFR0_RAS_SHIFT, 4, 0),

We probably want FTR_LOWER_SAFE here now, right? (we changed the other
fields in for-next/core).

> diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
> index 2dc0f8482210..5e5d2f0a1d0a 100644
> --- a/arch/arm64/kernel/process.c
> +++ b/arch/arm64/kernel/process.c
> @@ -365,6 +365,9 @@ __notrace_funcgraph struct task_struct *__switch_to(struct task_struct *prev,
>  	 */
>  	dsb(ish);
>  
> +	/* Deliver any pending SError from prev */
> +	esb();

I'm assuming this is going to be expensive. What if we moved it to switch_mm
instead. Do we actually need thread granularity for error isolation?

Will
James Morse Nov. 2, 2017, 12:15 p.m. UTC | #2
Hi Will,

On 31/10/17 13:14, Will Deacon wrote:
> On Thu, Oct 19, 2017 at 03:57:57PM +0100, James Morse wrote:
>> From: Xie XiuQi <xiexiuqi@huawei.com>
>>
>> ARM's v8.2 Extentions add support for Reliability, Availability and
>> Serviceability (RAS). On CPUs with these extensions system software
>> can use additional barriers to isolate errors and determine if faults
>> are pending.
>>
>> Add cpufeature detection and a barrier in the context-switch code.
>> There is no need to use alternatives for this as CPUs that don't
>> support this feature will treat the instruction as a nop.
>>
>> Platform level RAS support may require additional firmware support.

>> diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
>> index cd52d365d1f0..0fc017b55cb1 100644
>> --- a/arch/arm64/kernel/cpufeature.c
>> +++ b/arch/arm64/kernel/cpufeature.c
>> @@ -125,6 +125,7 @@ static const struct arm64_ftr_bits ftr_id_aa64isar1[] = {
>>  };
>>  
>>  static const struct arm64_ftr_bits ftr_id_aa64pfr0[] = {
>> +	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64PFR0_RAS_SHIFT, 4, 0),

> We probably want FTR_LOWER_SAFE here now, right? (we changed the other
> fields in for-next/core).

Ah, yes.
(Looks like some copy-and-paste)


>> diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
>> index 2dc0f8482210..5e5d2f0a1d0a 100644
>> --- a/arch/arm64/kernel/process.c
>> +++ b/arch/arm64/kernel/process.c
>> @@ -365,6 +365,9 @@ __notrace_funcgraph struct task_struct *__switch_to(struct task_struct *prev,
>>  	 */
>>  	dsb(ish);
>>  
>> +	/* Deliver any pending SError from prev */
>> +	esb();

> I'm assuming this is going to be expensive.

I'm hoping not, but without numbers to prove otherwise...


> What if we moved it to switch_mm
> instead. Do we actually need thread granularity for error isolation?

(after a verbal discussion with Will:)

This would be needed to blame the correct thread, but until we have kernel-first
handling this is moot as do_serror() will panic() regardless.

So, lets drop the esb() here and decide what to do if/when we get kernel-first
handling. If that only acts on groups of threads, then switch_mm is a better
place for it.

In the meantime if we see RAS SError panic()s we should remember it may have
just switched task, which in practice will probably be obvious from the stack trace.

There is no firmware-first angle here as SError is unmasked either side of this,
unlike in the KVM example.

I'll apply the same logic to the KVM version in patch 20...



Thanks,

James
diff mbox

Patch

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 70dfe4e9ccc5..b68f5e93baac 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -973,6 +973,22 @@  config ARM64_PMEM
 	  operations if DC CVAP is not supported (following the behaviour of
 	  DC CVAP itself if the system does not define a point of persistence).
 
+config ARM64_RAS_EXTN
+	bool "Enable support for RAS CPU Extensions"
+	default y
+	help
+	  CPUs that support the Reliability, Availability and Serviceability
+	  (RAS) Extensions, part of ARMv8.2 are able to track faults and
+	  errors, classify them and report them to software.
+
+	  On CPUs with these extensions system software can use additional
+	  barriers to determine if faults are pending and read the
+	  classification from a new set of registers.
+
+	  Selecting this feature will allow the kernel to use these barriers
+	  and access the new registers if the system supports the extension.
+	  Platform RAS features may additionally depend on firmware support.
+
 endmenu
 
 config ARM64_MODULE_CMODEL_LARGE
diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h
index 0fe7e43b7fbc..8b0a0eb67625 100644
--- a/arch/arm64/include/asm/barrier.h
+++ b/arch/arm64/include/asm/barrier.h
@@ -30,6 +30,7 @@ 
 #define isb()		asm volatile("isb" : : : "memory")
 #define dmb(opt)	asm volatile("dmb " #opt : : : "memory")
 #define dsb(opt)	asm volatile("dsb " #opt : : : "memory")
+#define esb()		asm volatile("hint #16"  : : : "memory")
 
 #define mb()		dsb(sy)
 #define rmb()		dsb(ld)
diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h
index 8da621627d7c..4820d441bfb9 100644
--- a/arch/arm64/include/asm/cpucaps.h
+++ b/arch/arm64/include/asm/cpucaps.h
@@ -40,7 +40,8 @@ 
 #define ARM64_WORKAROUND_858921			19
 #define ARM64_WORKAROUND_CAVIUM_30115		20
 #define ARM64_HAS_DCPOP				21
+#define ARM64_HAS_RAS_EXTN			22
 
-#define ARM64_NCAPS				22
+#define ARM64_NCAPS				23
 
 #endif /* __ASM_CPUCAPS_H */
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index f707fed5886f..64e2a80fd749 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -332,6 +332,7 @@ 
 #define ID_AA64ISAR1_DPB_SHIFT		0
 
 /* id_aa64pfr0 */
+#define ID_AA64PFR0_RAS_SHIFT		28
 #define ID_AA64PFR0_GIC_SHIFT		24
 #define ID_AA64PFR0_ASIMD_SHIFT		20
 #define ID_AA64PFR0_FP_SHIFT		16
@@ -340,6 +341,7 @@ 
 #define ID_AA64PFR0_EL1_SHIFT		4
 #define ID_AA64PFR0_EL0_SHIFT		0
 
+#define ID_AA64PFR0_RAS_V1		0x1
 #define ID_AA64PFR0_FP_NI		0xf
 #define ID_AA64PFR0_FP_SUPPORTED	0x0
 #define ID_AA64PFR0_ASIMD_NI		0xf
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index cd52d365d1f0..0fc017b55cb1 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -125,6 +125,7 @@  static const struct arm64_ftr_bits ftr_id_aa64isar1[] = {
 };
 
 static const struct arm64_ftr_bits ftr_id_aa64pfr0[] = {
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64PFR0_RAS_SHIFT, 4, 0),
 	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64PFR0_GIC_SHIFT, 4, 0),
 	S_ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_ASIMD_SHIFT, 4, ID_AA64PFR0_ASIMD_NI),
 	S_ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_FP_SHIFT, 4, ID_AA64PFR0_FP_NI),
@@ -900,6 +901,18 @@  static const struct arm64_cpu_capabilities arm64_features[] = {
 		.min_field_value = 1,
 	},
 #endif
+#ifdef CONFIG_ARM64_RAS_EXTN
+	{
+		.desc = "RAS Extension Support",
+		.capability = ARM64_HAS_RAS_EXTN,
+		.def_scope = SCOPE_SYSTEM,
+		.matches = has_cpuid_feature,
+		.sys_reg = SYS_ID_AA64PFR0_EL1,
+		.sign = FTR_UNSIGNED,
+		.field_pos = ID_AA64PFR0_RAS_SHIFT,
+		.min_field_value = ID_AA64PFR0_RAS_V1,
+	},
+#endif /* CONFIG_ARM64_RAS_EXTN */
 	{},
 };
 
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index 2dc0f8482210..5e5d2f0a1d0a 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -365,6 +365,9 @@  __notrace_funcgraph struct task_struct *__switch_to(struct task_struct *prev,
 	 */
 	dsb(ish);
 
+	/* Deliver any pending SError from prev */
+	esb();
+
 	/* the actual thread switch */
 	last = cpu_switch_to(prev, next);