Message ID | 1504609566-66318-2-git-send-email-xiexiuqi@huawei.com (mailing list archive) |
---|---|
State | Not Applicable, archived |
Headers | show |
On Tue, Sep 05, 2017 at 07:06:04PM +0800, Xie XiuQi wrote: > With ARM v8.2 RAS Extension, SEA are usually triggered when memory errors > are consumed. In some cases, if the error address is in a clean page or a > read-only page, there is a chance to recover. Such as error occurs in a > instruction page, we can reread this page from disk instead of killing process. > > Because memory_failure() may sleep, we can not call it directly in SEA exception > context. So we saved faulting physical address associated with a process in the > ghes handler and set __TIF_SEA_NOTIFY. When we return from SEA exception context > and get into do_notify_resume() before the process running, we could check it > and call memory_failure() to do recovery. It's safe, because we are in process > context. > > Signed-off-by: Xie XiuQi <xiexiuqi@huawei.com> > Signed-off-by: Wang Xiongfeng <wangxiongfeng2@huawei.com> > --- > arch/arm64/Kconfig | 11 +++ > arch/arm64/include/asm/ras.h | 36 +++++++++ > arch/arm64/include/asm/thread_info.h | 4 +- > arch/arm64/kernel/Makefile | 1 + > arch/arm64/kernel/ras.c | 143 +++++++++++++++++++++++++++++++++++ > arch/arm64/kernel/signal.c | 8 ++ > arch/arm64/mm/fault.c | 27 +++++-- > drivers/acpi/apei/ghes.c | 4 +- > 8 files changed, 223 insertions(+), 11 deletions(-) > create mode 100644 arch/arm64/include/asm/ras.h > create mode 100644 arch/arm64/kernel/ras.c Please integrate scripts/checkpatch.pl into your patch creation workflow and run all patches through it before submitting: ERROR: code indent should use tabs where possible #200: FILE: arch/arm64/kernel/ras.c:42: + atomic_t inuse;$ WARNING: please, no spaces at the start of a line #200: FILE: arch/arm64/kernel/ras.c:42: + atomic_t inuse;$ ERROR: code indent should use tabs where possible #201: FILE: arch/arm64/kernel/ras.c:43: + struct task_struct *t;$ WARNING: please, no spaces at the start of a line #201: FILE: arch/arm64/kernel/ras.c:43: + struct task_struct *t;$ ERROR: code indent should use tabs where possible #202: FILE: arch/arm64/kernel/ras.c:44: + __u64 paddr;$ WARNING: please, no spaces at the start of a line #202: FILE: arch/arm64/kernel/ras.c:44: + __u64 paddr;$ ERROR: code indent should use tabs where possible #207: FILE: arch/arm64/kernel/ras.c:49: + struct sea_info *si;$ WARNING: please, no spaces at the start of a line #207: FILE: arch/arm64/kernel/ras.c:49: + struct sea_info *si;$ ERROR: code indent should use tabs where possible #209: FILE: arch/arm64/kernel/ras.c:51: + for (si = sea_info; si < &sea_info[SEA_INFO_MAX]; si++) {$ WARNING: please, no spaces at the start of a line #209: FILE: arch/arm64/kernel/ras.c:51: + for (si = sea_info; si < &sea_info[SEA_INFO_MAX]; si++) {$ ERROR: code indent should use tabs where possible #210: FILE: arch/arm64/kernel/ras.c:52: + if (atomic_cmpxchg(&si->inuse, 0, 1) == 0) {$ WARNING: please, no spaces at the start of a line #210: FILE: arch/arm64/kernel/ras.c:52: + if (atomic_cmpxchg(&si->inuse, 0, 1) == 0) {$ ERROR: code indent should use tabs where possible #211: FILE: arch/arm64/kernel/ras.c:53: + si->t = current;$ WARNING: please, no spaces at the start of a line #211: FILE: arch/arm64/kernel/ras.c:53: + si->t = current;$ ERROR: code indent should use tabs where possible #212: FILE: arch/arm64/kernel/ras.c:54: + si->paddr = addr;$ WARNING: please, no spaces at the start of a line #212: FILE: arch/arm64/kernel/ras.c:54: + si->paddr = addr;$ ERROR: code indent should use tabs where possible #213: FILE: arch/arm64/kernel/ras.c:55: + return true;$ WARNING: please, no spaces at the start of a line #213: FILE: arch/arm64/kernel/ras.c:55: + return true;$ ERROR: code indent should use tabs where possible #214: FILE: arch/arm64/kernel/ras.c:56: + }$ WARNING: please, no spaces at the start of a line #214: FILE: arch/arm64/kernel/ras.c:56: + }$ ERROR: code indent should use tabs where possible #215: FILE: arch/arm64/kernel/ras.c:57: + }$ WARNING: please, no spaces at the start of a line #215: FILE: arch/arm64/kernel/ras.c:57: + }$ ERROR: code indent should use tabs where possible #223: FILE: arch/arm64/kernel/ras.c:65: + struct sea_info *si;$ WARNING: please, no spaces at the start of a line #223: FILE: arch/arm64/kernel/ras.c:65: + struct sea_info *si;$ ERROR: code indent should use tabs where possible #225: FILE: arch/arm64/kernel/ras.c:67: + for (si = sea_info; si < &sea_info[SEA_INFO_MAX]; si++)$ WARNING: please, no spaces at the start of a line #225: FILE: arch/arm64/kernel/ras.c:67: + for (si = sea_info; si < &sea_info[SEA_INFO_MAX]; si++)$ ERROR: code indent should use tabs where possible #226: FILE: arch/arm64/kernel/ras.c:68: + if (atomic_read(&si->inuse) && si->t == current)$ WARNING: please, no spaces at the start of a line #226: FILE: arch/arm64/kernel/ras.c:68: + if (atomic_read(&si->inuse) && si->t == current)$ ERROR: code indent should use tabs where possible #227: FILE: arch/arm64/kernel/ras.c:69: + return si;$ WARNING: please, no spaces at the start of a line #227: FILE: arch/arm64/kernel/ras.c:69: + return si;$ ERROR: code indent should use tabs where possible #228: FILE: arch/arm64/kernel/ras.c:70: + return NULL;$ WARNING: please, no spaces at the start of a line #228: FILE: arch/arm64/kernel/ras.c:70: + return NULL;$ ERROR: code indent should use tabs where possible #233: FILE: arch/arm64/kernel/ras.c:75: + atomic_set(&si->inuse, 0);$ WARNING: please, no spaces at the start of a line #233: FILE: arch/arm64/kernel/ras.c:75: + atomic_set(&si->inuse, 0);$ WARNING: braces {} are not necessary for single statement blocks #265: FILE: arch/arm64/kernel/ras.c:107: + if (memory_failure(pfn, 0, flags) < 0) { + fail++; + } WARNING: braces {} are not necessary for single statement blocks #293: FILE: arch/arm64/kernel/ras.c:135: + if (err_info->validation_bits & CPER_ARM_INFO_VALID_PHYSICAL_ADDR) { + info_saved |= sea_save_info(err_info->physical_fault_addr); + } total: 17 errors, 21 warnings, 299 lines checked NOTE: For some of the reported defects, checkpatch may be able to mechanically convert to the typical style using --fix or --fix-inplace. NOTE: Whitespace errors detected. You may wish to use scripts/cleanpatch or scripts/cleanfile Your patch has style problems, please review. NOTE: If any of the errors are false positives, please report them to the maintainer, see CHECKPATCH in MAINTAINERS.
Hi Borislav, On 2017/9/6 18:12, Borislav Petkov wrote: > On Tue, Sep 05, 2017 at 07:06:04PM +0800, Xie XiuQi wrote: >> With ARM v8.2 RAS Extension, SEA are usually triggered when memory errors >> are consumed. In some cases, if the error address is in a clean page or a >> read-only page, there is a chance to recover. Such as error occurs in a >> instruction page, we can reread this page from disk instead of killing process. >> >> Because memory_failure() may sleep, we can not call it directly in SEA exception >> context. So we saved faulting physical address associated with a process in the >> ghes handler and set __TIF_SEA_NOTIFY. When we return from SEA exception context >> and get into do_notify_resume() before the process running, we could check it >> and call memory_failure() to do recovery. It's safe, because we are in process >> context. >> >> Signed-off-by: Xie XiuQi <xiexiuqi@huawei.com> >> Signed-off-by: Wang Xiongfeng <wangxiongfeng2@huawei.com> >> --- >> arch/arm64/Kconfig | 11 +++ >> arch/arm64/include/asm/ras.h | 36 +++++++++ >> arch/arm64/include/asm/thread_info.h | 4 +- >> arch/arm64/kernel/Makefile | 1 + >> arch/arm64/kernel/ras.c | 143 +++++++++++++++++++++++++++++++++++ >> arch/arm64/kernel/signal.c | 8 ++ >> arch/arm64/mm/fault.c | 27 +++++-- >> drivers/acpi/apei/ghes.c | 4 +- >> 8 files changed, 223 insertions(+), 11 deletions(-) >> create mode 100644 arch/arm64/include/asm/ras.h >> create mode 100644 arch/arm64/kernel/ras.c > > Please integrate scripts/checkpatch.pl into your patch creation workflow > and run all patches through it before submitting: Sorry for my mistake. I'll fix it, thanks. > > ERROR: code indent should use tabs where possible > #200: FILE: arch/arm64/kernel/ras.c:42: > + atomic_t inuse;$ > > WARNING: please, no spaces at the start of a line > #200: FILE: arch/arm64/kernel/ras.c:42: > + atomic_t inuse;$ > > ERROR: code indent should use tabs where possible > #201: FILE: arch/arm64/kernel/ras.c:43: > + struct task_struct *t;$ > > WARNING: please, no spaces at the start of a line > #201: FILE: arch/arm64/kernel/ras.c:43: > + struct task_struct *t;$ > > ERROR: code indent should use tabs where possible > #202: FILE: arch/arm64/kernel/ras.c:44: > + __u64 paddr;$ > > WARNING: please, no spaces at the start of a line > #202: FILE: arch/arm64/kernel/ras.c:44: > + __u64 paddr;$ > > ERROR: code indent should use tabs where possible > #207: FILE: arch/arm64/kernel/ras.c:49: > + struct sea_info *si;$ > > WARNING: please, no spaces at the start of a line > #207: FILE: arch/arm64/kernel/ras.c:49: > + struct sea_info *si;$ > > ERROR: code indent should use tabs where possible > #209: FILE: arch/arm64/kernel/ras.c:51: > + for (si = sea_info; si < &sea_info[SEA_INFO_MAX]; si++) {$ > > WARNING: please, no spaces at the start of a line > #209: FILE: arch/arm64/kernel/ras.c:51: > + for (si = sea_info; si < &sea_info[SEA_INFO_MAX]; si++) {$ > > ERROR: code indent should use tabs where possible > #210: FILE: arch/arm64/kernel/ras.c:52: > + if (atomic_cmpxchg(&si->inuse, 0, 1) == 0) {$ > > WARNING: please, no spaces at the start of a line > #210: FILE: arch/arm64/kernel/ras.c:52: > + if (atomic_cmpxchg(&si->inuse, 0, 1) == 0) {$ > > ERROR: code indent should use tabs where possible > #211: FILE: arch/arm64/kernel/ras.c:53: > + si->t = current;$ > > WARNING: please, no spaces at the start of a line > #211: FILE: arch/arm64/kernel/ras.c:53: > + si->t = current;$ > > ERROR: code indent should use tabs where possible > #212: FILE: arch/arm64/kernel/ras.c:54: > + si->paddr = addr;$ > > WARNING: please, no spaces at the start of a line > #212: FILE: arch/arm64/kernel/ras.c:54: > + si->paddr = addr;$ > > ERROR: code indent should use tabs where possible > #213: FILE: arch/arm64/kernel/ras.c:55: > + return true;$ > > WARNING: please, no spaces at the start of a line > #213: FILE: arch/arm64/kernel/ras.c:55: > + return true;$ > > ERROR: code indent should use tabs where possible > #214: FILE: arch/arm64/kernel/ras.c:56: > + }$ > > WARNING: please, no spaces at the start of a line > #214: FILE: arch/arm64/kernel/ras.c:56: > + }$ > > ERROR: code indent should use tabs where possible > #215: FILE: arch/arm64/kernel/ras.c:57: > + }$ > > WARNING: please, no spaces at the start of a line > #215: FILE: arch/arm64/kernel/ras.c:57: > + }$ > > ERROR: code indent should use tabs where possible > #223: FILE: arch/arm64/kernel/ras.c:65: > + struct sea_info *si;$ > > WARNING: please, no spaces at the start of a line > #223: FILE: arch/arm64/kernel/ras.c:65: > + struct sea_info *si;$ > > ERROR: code indent should use tabs where possible > #225: FILE: arch/arm64/kernel/ras.c:67: > + for (si = sea_info; si < &sea_info[SEA_INFO_MAX]; si++)$ > > WARNING: please, no spaces at the start of a line > #225: FILE: arch/arm64/kernel/ras.c:67: > + for (si = sea_info; si < &sea_info[SEA_INFO_MAX]; si++)$ > > ERROR: code indent should use tabs where possible > #226: FILE: arch/arm64/kernel/ras.c:68: > + if (atomic_read(&si->inuse) && si->t == current)$ > > WARNING: please, no spaces at the start of a line > #226: FILE: arch/arm64/kernel/ras.c:68: > + if (atomic_read(&si->inuse) && si->t == current)$ > > ERROR: code indent should use tabs where possible > #227: FILE: arch/arm64/kernel/ras.c:69: > + return si;$ > > WARNING: please, no spaces at the start of a line > #227: FILE: arch/arm64/kernel/ras.c:69: > + return si;$ > > ERROR: code indent should use tabs where possible > #228: FILE: arch/arm64/kernel/ras.c:70: > + return NULL;$ > > WARNING: please, no spaces at the start of a line > #228: FILE: arch/arm64/kernel/ras.c:70: > + return NULL;$ > > ERROR: code indent should use tabs where possible > #233: FILE: arch/arm64/kernel/ras.c:75: > + atomic_set(&si->inuse, 0);$ > > WARNING: please, no spaces at the start of a line > #233: FILE: arch/arm64/kernel/ras.c:75: > + atomic_set(&si->inuse, 0);$ > > WARNING: braces {} are not necessary for single statement blocks > #265: FILE: arch/arm64/kernel/ras.c:107: > + if (memory_failure(pfn, 0, flags) < 0) { > + fail++; > + } > > WARNING: braces {} are not necessary for single statement blocks > #293: FILE: arch/arm64/kernel/ras.c:135: > + if (err_info->validation_bits & CPER_ARM_INFO_VALID_PHYSICAL_ADDR) { > + info_saved |= sea_save_info(err_info->physical_fault_addr); > + } > > total: 17 errors, 21 warnings, 299 lines checked > > NOTE: For some of the reported defects, checkpatch may be able to > mechanically convert to the typical style using --fix or --fix-inplace. > > NOTE: Whitespace errors detected. > You may wish to use scripts/cleanpatch or scripts/cleanfile > > Your patch has style problems, please review. > > NOTE: If any of the errors are false positives, please report > them to the maintainer, see CHECKPATCH in MAINTAINERS. >
Hi Xie, [auto build test ERROR on pm/linux-next] [also build test ERROR on v4.13] [cannot apply to arm64/for-next/core next-20170907] [if your patch is applied to the wrong git tree, please drop us a note to help improve the system] url: https://github.com/0day-ci/linux/commits/Xie-XiuQi/arm64-ras-support-sea-error-recovery/20170908-062353 base: https://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm.git linux-next config: x86_64-allyesdebian (attached as .config) compiler: gcc-6 (Debian 6.2.0-3) 6.2.0 20160901 reproduce: # save the attached .config to linux build tree make ARCH=x86_64 All errors (new ones prefixed by >>): >> drivers/acpi/apei/ghes.c:54:21: fatal error: asm/ras.h: No such file or directory #include <asm/ras.h> ^ compilation terminated. vim +54 drivers/acpi/apei/ghes.c 49 50 #include <acpi/actbl1.h> 51 #include <acpi/ghes.h> 52 #include <acpi/apei.h> 53 #include <asm/tlbflush.h> > 54 #include <asm/ras.h> 55 #include <ras/ras_event.h> 56 --- 0-DAY kernel test infrastructure Open Source Technology Center https://lists.01.org/pipermail/kbuild-all Intel Corporation
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index dfd9086..7d44589 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -640,6 +640,17 @@ config HOTPLUG_CPU Say Y here to experiment with turning CPUs off and on. CPUs can be controlled through /sys/devices/system/cpu. +config ARM64_ERR_RECOV + bool "Support arm64 RAS error recovery" + depends on ACPI_APEI_SEA && MEMORY_FAILURE + help + With ARM v8.2 RAS Extension, SEA are usually triggered when memory errors + are consumed. In some cases, if the error address is in a clean page or a + read-only page, there is a chance to recover. Such as error occurs in a + instruction page, we can reread this page from disk instead of killing process. + + Say Y if unsure. + # Common NUMA Features config NUMA bool "Numa Memory Allocation and Scheduler Support" diff --git a/arch/arm64/include/asm/ras.h b/arch/arm64/include/asm/ras.h new file mode 100644 index 0000000..e174f95 --- /dev/null +++ b/arch/arm64/include/asm/ras.h @@ -0,0 +1,36 @@ +/* + * ARM64 SEA error recoery support + * + * Copyright 2017 Huawei Technologies Co., Ltd. + * Author: Xie XiuQi <xiexiuqi@huawei.com> + * Author: Wang Xiongfeng <wangxiongfeng2@huawei.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version + * 2 as published by the Free Software Foundation; + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#ifndef _ASM_RAS_H +#define _ASM_RAS_H + +#include <linux/cper.h> +#include <linux/ras.h> +#include <acpi/ghes.h> + +extern void sea_notify_process(void); + +#ifdef CONFIG_ARM64_ERR_RECOV +extern void arm_process_error(struct ghes *ghes, struct cper_sec_proc_arm *err); +#else +static inline void arm_process_error(struct ghes *ghes, struct cper_sec_proc_arm *err) +{ + log_arm_hw_error(err); +} +#endif /* CONFIG_ARM64_ERR_RECOV */ + +#endif /*_ASM_RAS_H*/ diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h index 46c3b93..4b10131 100644 --- a/arch/arm64/include/asm/thread_info.h +++ b/arch/arm64/include/asm/thread_info.h @@ -86,6 +86,7 @@ struct thread_info { #define TIF_NOTIFY_RESUME 2 /* callback before returning to user */ #define TIF_FOREIGN_FPSTATE 3 /* CPU's FP state is not current's */ #define TIF_UPROBE 4 /* uprobe breakpoint or singlestep */ +#define TIF_SEA_NOTIFY 5 /* notify to do an error recovery */ #define TIF_NOHZ 7 #define TIF_SYSCALL_TRACE 8 #define TIF_SYSCALL_AUDIT 9 @@ -102,6 +103,7 @@ struct thread_info { #define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME) #define _TIF_FOREIGN_FPSTATE (1 << TIF_FOREIGN_FPSTATE) #define _TIF_NOHZ (1 << TIF_NOHZ) +#define _TIF_SEA_NOTIFY (1 << TIF_SEA_NOTIFY) #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) #define _TIF_SYSCALL_TRACEPOINT (1 << TIF_SYSCALL_TRACEPOINT) @@ -111,7 +113,7 @@ struct thread_info { #define _TIF_WORK_MASK (_TIF_NEED_RESCHED | _TIF_SIGPENDING | \ _TIF_NOTIFY_RESUME | _TIF_FOREIGN_FPSTATE | \ - _TIF_UPROBE) + _TIF_UPROBE|_TIF_SEA_NOTIFY) #define _TIF_SYSCALL_WORK (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \ _TIF_SYSCALL_TRACEPOINT | _TIF_SECCOMP | \ diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index f2b4e81..ba3abf8 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -43,6 +43,7 @@ arm64-obj-$(CONFIG_EFI) += efi.o efi-entry.stub.o arm64-obj-$(CONFIG_PCI) += pci.o arm64-obj-$(CONFIG_ARMV8_DEPRECATED) += armv8_deprecated.o arm64-obj-$(CONFIG_ACPI) += acpi.o +arm64-obj-$(CONFIG_ARM64_ERR_RECOV) += ras.o arm64-obj-$(CONFIG_ACPI_NUMA) += acpi_numa.o arm64-obj-$(CONFIG_ARM64_ACPI_PARKING_PROTOCOL) += acpi_parking_protocol.o arm64-obj-$(CONFIG_PARAVIRT) += paravirt.o diff --git a/arch/arm64/kernel/ras.c b/arch/arm64/kernel/ras.c new file mode 100644 index 0000000..5710b2e --- /dev/null +++ b/arch/arm64/kernel/ras.c @@ -0,0 +1,143 @@ +/* + * ARM64 SEA error recoery support + * + * Copyright 2017 Huawei Technologies Co., Ltd. + * Author: Xie XiuQi <xiexiuqi@huawei.com> + * Author: Wang Xiongfeng <wangxiongfeng2@huawei.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version + * 2 as published by the Free Software Foundation; + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include <linux/kernel.h> +#include <linux/cper.h> +#include <linux/mm.h> +#include <linux/preempt.h> +#include <linux/acpi.h> +#include <linux/sched/signal.h> +#include <linux/ras.h> + +#include <acpi/actbl1.h> +#include <acpi/ghes.h> +#include <acpi/apei.h> + +#include <asm/thread_info.h> +#include <asm/atomic.h> +#include <asm/ras.h> + +/* + * Need to save faulting physical address associated with a process + * in the sea ghes handler some place where we can grab it back + * later in sea_notify_process() + */ +#define SEA_INFO_MAX 16 + +struct sea_info { + atomic_t inuse; + struct task_struct *t; + __u64 paddr; +} sea_info[SEA_INFO_MAX]; + +static bool sea_save_info(__u64 addr) +{ + struct sea_info *si; + + for (si = sea_info; si < &sea_info[SEA_INFO_MAX]; si++) { + if (atomic_cmpxchg(&si->inuse, 0, 1) == 0) { + si->t = current; + si->paddr = addr; + return true; + } + } + + pr_err("Too many concurrent recoverable errors\n"); + return false; +} + +static struct sea_info *sea_find_info(void) +{ + struct sea_info *si; + + for (si = sea_info; si < &sea_info[SEA_INFO_MAX]; si++) + if (atomic_read(&si->inuse) && si->t == current) + return si; + return NULL; +} + +static void sea_clear_info(struct sea_info *si) +{ + atomic_set(&si->inuse, 0); +} + +/* + * Called in process context that interrupted by SEA and marked with + * TIF_SEA_NOTIFY, just before returning to erroneous userland. + * This code is allowed to sleep. + * Attempt possible recovery such as calling the high level VM handler to + * process any corrupted pages, and kill/signal current process if required. + * Action required errors are handled here. + */ +void sea_notify_process(void) +{ + unsigned long pfn; + int fail = 0, flags = MF_ACTION_REQUIRED; + struct sea_info *si = sea_find_info(); + + if (!si) + panic("Lost physical address for consumed uncorrectable error"); + + clear_thread_flag(TIF_SEA_NOTIFY); + do { + pfn = si->paddr >> PAGE_SHIFT; + + + pr_err("Uncorrected hardware memory error in user-access at %llx\n", + si->paddr); + /* + * We must call memory_failure() here even if the current process is + * doomed. We still need to mark the page as poisoned and alert any + * other users of the page. + */ + if (memory_failure(pfn, 0, flags) < 0) { + fail++; + } + sea_clear_info(si); + + si = sea_find_info(); + } while (si); + + if (fail) { + pr_err("Memory error not recovered\n"); + force_sig(SIGBUS, current); + } +} + +void arm_process_error(struct ghes *ghes, struct cper_sec_proc_arm *err) +{ + int i; + bool info_saved = false; + struct cper_arm_err_info *err_info; + + log_arm_hw_error(err); + + if ((ghes->generic->notify.type != ACPI_HEST_NOTIFY_SEA) || + (ghes->estatus->error_severity != CPER_SEV_RECOVERABLE)) + return; + + err_info = (struct cper_arm_err_info *)(err + 1); + for (i = 0; i < err->err_info_num; i++, err_info++) { + if (err_info->validation_bits & CPER_ARM_INFO_VALID_PHYSICAL_ADDR) { + info_saved |= sea_save_info(err_info->physical_fault_addr); + } + } + + if (info_saved) + set_thread_flag(TIF_SEA_NOTIFY); +} + diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c index 089c3747..71e314e 100644 --- a/arch/arm64/kernel/signal.c +++ b/arch/arm64/kernel/signal.c @@ -38,6 +38,7 @@ #include <asm/fpsimd.h> #include <asm/signal32.h> #include <asm/vdso.h> +#include <asm/ras.h> /* * Do a signal return; undo the signal stack. These are aligned to 128-bit. @@ -749,6 +750,13 @@ asmlinkage void do_notify_resume(struct pt_regs *regs, * Update the trace code with the current status. */ trace_hardirqs_off(); + +#ifdef CONFIG_ARM64_ERR_RECOV + /* notify userspace of pending SEAs */ + if (thread_flags & _TIF_SEA_NOTIFY) + sea_notify_process(); +#endif /* CONFIG_ARM64_ERR_RECOV */ + do { if (thread_flags & _TIF_NEED_RESCHED) { schedule(); diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 1f22a41..b38476d 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -594,14 +594,25 @@ static int do_sea(unsigned long addr, unsigned int esr, struct pt_regs *regs) nmi_exit(); } - info.si_signo = SIGBUS; - info.si_errno = 0; - info.si_code = 0; - if (esr & ESR_ELx_FnV) - info.si_addr = NULL; - else - info.si_addr = (void __user *)addr; - arm64_notify_die("", regs, &info, esr); + if (user_mode(regs)) { + if (test_thread_flag(TIF_SEA_NOTIFY)) + return ret; + + info.si_signo = SIGBUS; + info.si_errno = 0; + info.si_code = 0; + if (esr & ESR_ELx_FnV) + info.si_addr = NULL; + else + info.si_addr = (void __user *)addr; + + current->thread.fault_address = 0; + current->thread.fault_code = esr; + force_sig_info(info.si_signo, &info, current); + } else { + die("Uncorrected hardware memory error in kernel-access\n", + regs, esr); + } return ret; } diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index d661d45..502335c 100644 --- a/drivers/acpi/apei/ghes.c +++ b/drivers/acpi/apei/ghes.c @@ -46,12 +46,12 @@ #include <linux/nmi.h> #include <linux/sched/clock.h> #include <linux/uuid.h> -#include <linux/ras.h> #include <acpi/actbl1.h> #include <acpi/ghes.h> #include <acpi/apei.h> #include <asm/tlbflush.h> +#include <asm/ras.h> #include <ras/ras_event.h> #include "apei-internal.h" @@ -520,7 +520,7 @@ static void ghes_do_proc(struct ghes *ghes, else if (guid_equal(sec_type, &CPER_SEC_PROC_ARM)) { struct cper_sec_proc_arm *err = acpi_hest_get_payload(gdata); - log_arm_hw_error(err); + arm_process_error(ghes, err); } else { void *err = acpi_hest_get_payload(gdata);