@@ -824,9 +824,6 @@ static int do_sea(unsigned long far, unsigned long esr, struct pt_regs *regs)
const struct fault_info *inf;
unsigned long siaddr;
- if (do_apei_claim_sea(regs))
- return 0;
-
inf = esr_to_fault_info(esr);
if (esr & ESR_ELx_FnV) {
siaddr = 0;
@@ -838,6 +835,19 @@ static int do_sea(unsigned long far, unsigned long esr, struct pt_regs *regs)
*/
siaddr = untagged_addr(far);
}
+
+ if (do_apei_claim_sea(regs)) {
+ if (!(current->flags & (PF_KTHREAD |
+ PF_USER_WORKER |
+ PF_WQ_WORKER |
+ PF_IO_WORKER))) {
+ set_thread_esr(0, esr);
+ arm64_force_sig_fault(inf->sig, inf->code, siaddr,
+ "Uncorrected memory error on access to poison memory\n");
+ }
+ return 0;
+ }
+
arm64_notify_die(inf->name, regs, inf->sig, inf->code, siaddr, esr);
return 0;
For SEA exception, kernel require take some action to recover from memory error, such as isolate poison page adn kill failure thread, which are done in memory_failure(). During the test, the failure thread cannot be killed due to this issue[1], Here, I temporarily workaround this issue by sending signals to user processes (!(PF_KTHREAD|PF_IO_WORKER|PF_WQ_WORKER|PF_USER_WORKER)) in do_sea(). After [1] is merged, this patch can be rolled back or the SIGBUS will be sent repeated. [1]https://lore.kernel.org/lkml/20240204080144.7977-1-xueshuai@linux.alibaba.com/ Signed-off-by: Tong Tiangen <tongtiangen@huawei.com> --- arch/arm64/mm/fault.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-)