@@ -435,7 +435,8 @@ static void ghes_kick_task_work(struct callback_head *head)
estatus_node = container_of(head, struct ghes_estatus_node, task_work);
if (IS_ENABLED(CONFIG_ACPI_APEI_MEMORY_FAILURE))
- memory_failure_queue_kick(estatus_node->task_work_cpu);
+ if (memory_failure_queue_kick(estatus_node->task_work_cpu))
+ arch_apei_do_recovery_failed();
estatus = GHES_ESTATUS_FROM_NODE(estatus_node);
node_len = GHES_ESTATUS_NODE_LEN(cper_estatus_len(estatus));
@@ -3290,7 +3290,7 @@ int mf_dax_kill_procs(struct address_space *mapping, pgoff_t index,
unsigned long count, int mf_flags);
extern int memory_failure(unsigned long pfn, int flags);
extern void memory_failure_queue(unsigned long pfn, int flags);
-extern void memory_failure_queue_kick(int cpu);
+extern int memory_failure_queue_kick(int cpu);
extern int unpoison_memory(unsigned long pfn);
extern int sysctl_memory_failure_early_kill;
extern int sysctl_memory_failure_recovery;
@@ -2240,12 +2240,12 @@ void memory_failure_queue(unsigned long pfn, int flags)
}
EXPORT_SYMBOL_GPL(memory_failure_queue);
-static void memory_failure_work_func(struct work_struct *work)
+static int __memory_failure_work_func(struct work_struct *work)
{
struct memory_failure_cpu *mf_cpu;
struct memory_failure_entry entry = { 0, };
unsigned long proc_flags;
- int gotten;
+ int gotten, ret = 0, result;
mf_cpu = container_of(work, struct memory_failure_cpu, work);
for (;;) {
@@ -2254,24 +2254,34 @@ static void memory_failure_work_func(struct work_struct *work)
spin_unlock_irqrestore(&mf_cpu->lock, proc_flags);
if (!gotten)
break;
- if (entry.flags & MF_SOFT_OFFLINE)
+ if (entry.flags & MF_SOFT_OFFLINE) {
soft_offline_page(entry.pfn, entry.flags);
- else
- memory_failure(entry.pfn, entry.flags);
+ } else {
+ result = memory_failure(entry.pfn, entry.flags);
+ if (ret == 0 && result != 0)
+ ret = result;
+ }
}
+
+ return ret;
+}
+
+static void memory_failure_work_func(struct work_struct *work)
+{
+ __memory_failure_work_func(work);
}
/*
* Process memory_failure work queued on the specified CPU.
* Used to avoid return-to-userspace racing with the memory_failure workqueue.
*/
-void memory_failure_queue_kick(int cpu)
+int memory_failure_queue_kick(int cpu)
{
struct memory_failure_cpu *mf_cpu;
mf_cpu = &per_cpu(memory_failure_cpu, cpu);
cancel_work_sync(&mf_cpu->work);
- memory_failure_work_func(&mf_cpu->work);
+ return __memory_failure_work_func(&mf_cpu->work);
}
static int __init memory_failure_init(void)
memory_failure() may not always recovery successfully. In synchronous external data abort case, if memory_failure() recovery failed, we must handle it. In this case, if the recovery fails, the common helper function arch_apei_do_recovery_failed() is invoked. For arm64 platform, we just send a SIGBUS. Signed-off-by: Xie XiuQi <xiexiuqi@huawei.com> --- drivers/acpi/apei/ghes.c | 3 ++- include/linux/mm.h | 2 +- mm/memory-failure.c | 24 +++++++++++++++++------- 3 files changed, 20 insertions(+), 9 deletions(-)