diff mbox series

[RFC,RESEND,6/6] system/hugetlb_ras: Replay lost BUS_MCEERR_AO signals on VM resume

Message ID 20240910100216.2744078-7-william.roche@oracle.com (mailing list archive)
State New, archived
Headers show
Series [RFC,RESEND,1/6] accel/kvm: SIGBUS handler should also deal with si_addr_lsb | expand

Commit Message

William Roche Sept. 10, 2024, 10:02 a.m. UTC
From: William Roche <william.roche@oracle.com>

In case the SIGBUS handler is triggered by a BUS_MCEERR_AO signal
and this handler needs to exit to let the VM pause during the memory
mapping change, this SIGBUS won't be regenerated when the VM resumes.
In this case we take note of this signal before exiting the handler
to replay it when the VM resumes.

Signed-off-by: William Roche <william.roche@oracle.com>
---
 system/hugetlbfs_ras.c | 60 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 60 insertions(+)
diff mbox series

Patch

diff --git a/system/hugetlbfs_ras.c b/system/hugetlbfs_ras.c
index 90e399bbad..50f810f836 100644
--- a/system/hugetlbfs_ras.c
+++ b/system/hugetlbfs_ras.c
@@ -155,6 +155,56 @@  hugetlbfs_ras_backend_sz(void *addr)
     return rb->page_size;
 }
 
+
+/*
+ *  List of BUS_MCEERR_AO signals received before replaying.
+ *  Addition is serialized under large_hwpoison_mtx, but replay is
+ *  asynchronous.
+ */
+typedef struct LargeHWPoisonAO {
+    void  *addr;
+    QLIST_ENTRY(LargeHWPoisonAO) list;
+} LargeHWPoisonAO;
+
+static QLIST_HEAD(, LargeHWPoisonAO) large_hwpoison_ao =
+    QLIST_HEAD_INITIALIZER(large_hwpoison_ao);
+
+static void
+large_hwpoison_ao_record(void *addr)
+{
+    LargeHWPoisonAO *cel;
+
+    cel = g_new(LargeHWPoisonAO, 1);
+    cel->addr = addr;
+    QLIST_INSERT_HEAD(&large_hwpoison_ao, cel, list);
+}
+
+/* replay the possible BUS_MCEERR_AO recorded signal(s) */
+static void
+hugetlbfs_ras_ao_replay_bh(void)
+{
+    LargeHWPoisonAO *cel, *next;
+    QLIST_HEAD(, LargeHWPoisonAO) local_list =
+    QLIST_HEAD_INITIALIZER(local_list);
+
+    /*
+     * Copy to a local list to avoid holding large_hwpoison_mtx
+     * when calling kvm_on_sigbus().
+     */
+    qemu_mutex_lock(&large_hwpoison_mtx);
+    QLIST_FOREACH_SAFE(cel, &large_hwpoison_ao, list, next) {
+        QLIST_REMOVE(cel, list);
+        QLIST_INSERT_HEAD(&local_list, cel, list);
+    }
+    qemu_mutex_unlock(&large_hwpoison_mtx);
+
+    QLIST_FOREACH_SAFE(cel, &local_list, list, next) {
+        DPRINTF("AO on %p\n", cel->addr);
+        kvm_on_sigbus(BUS_MCEERR_AO, cel->addr, _PAGE_SHIFT);
+        g_free(cel);
+    }
+}
+
 /*
  * Report if this std page address of the given faulted large page should be
  * retried or if the current signal handler should continue to deal with it.
@@ -276,6 +326,15 @@  hugetlbfs_ras_correct(void **paddr, size_t *psz, int code)
     if (large_hwpoison_vm_stop) {
         DPRINTF("Handler exit requested as on page %p\n", page->page_addr);
         *paddr = NULL;
+        /*
+         * BUS_MCEERR_AO specific case: this signal is not regenerated,
+         * we keep it to replay when the VM is ready to take it.
+         */
+        if (code == BUS_MCEERR_AO) {
+            large_hwpoison_ao_record(page->first_poison ? page->first_poison :
+                reported_addr);
+        }
+
     }
     qemu_mutex_unlock(&large_hwpoison_mtx);
 
@@ -522,6 +581,7 @@  static void coroutine_hugetlbfs_ras_vmstop_bh(void *opaque)
 static void coroutine_hugetlbfs_ras_vmstart_bh(void *opaque)
 {
     vm_start();
+    hugetlbfs_ras_ao_replay_bh();
 }
 
 static void *