diff mbox series

[v3,3/7] accel/kvm: Report the loss of a large memory page

Message ID 20241125142718.3373203-4-william.roche@oracle.com (mailing list archive)
State New
Headers show
Series hugetlbfs memory HW error fixes | expand

Commit Message

“William Roche Nov. 25, 2024, 2:27 p.m. UTC
From: William Roche <william.roche@oracle.com>

In case of a large page impacted by a memory error, complete
the existing Qemu error message to indicate that the error is
injected in the VM. Also include a simlar message to the ARM
platform.
Only in the case of a large page impacted, we now report:
...Memory Error at QEMU addr X and GUEST addr Y on lost large page SIZE@ADDR of type...

Signed-off-by: William Roche <william.roche@oracle.com>
---
 accel/kvm/kvm-all.c   |  4 ----
 system/physmem.c      | 12 ++++++------
 target/arm/kvm.c      | 13 +++++++++++++
 target/i386/kvm/kvm.c | 18 ++++++++++++++----
 4 files changed, 33 insertions(+), 14 deletions(-)
diff mbox series

Patch

diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
index 24c0c4ce3f..8a47aa7258 100644
--- a/accel/kvm/kvm-all.c
+++ b/accel/kvm/kvm-all.c
@@ -1286,10 +1286,6 @@  static void kvm_unpoison_all(void *param)
 void kvm_hwpoison_page_add(ram_addr_t ram_addr)
 {
     HWPoisonPage *page;
-    size_t page_size = qemu_ram_pagesize_from_addr(ram_addr);
-
-    if (page_size > TARGET_PAGE_SIZE)
-        ram_addr = QEMU_ALIGN_DOWN(ram_addr, page_size);
 
     QLIST_FOREACH(page, &hwpoison_page_list, list) {
         if (page->ram_addr == ram_addr) {
diff --git a/system/physmem.c b/system/physmem.c
index 26711df2d2..b8daf42d20 100644
--- a/system/physmem.c
+++ b/system/physmem.c
@@ -2201,7 +2201,7 @@  static void qemu_ram_remap_mmap(RAMBlock *block, void* vaddr, size_t size,
     }
     if (area != vaddr) {
         error_report("Could not remap addr: " RAM_ADDR_FMT "@" RAM_ADDR_FMT "",
-                     size, addr);
+                     size, block->offset + offset);
         exit(1);
     }
 }
@@ -2227,7 +2227,7 @@  void qemu_ram_remap(ram_addr_t addr)
                 abort();
             } else {
                 if (ram_block_discard_range(block, offset + block->fd_offset,
-                                            length) != 0) {
+                                            page_size) != 0) {
                     /*
                      * Fold back to using mmap(), but it cannot zap pagecache
                      * pages, only anonymous pages. As soon as we might have
@@ -2236,15 +2236,15 @@  void qemu_ram_remap(ram_addr_t addr)
                      * We don't take the risk of using mmap and fail now.
                      */
                     if (block->fd >= 0 && (qemu_ram_is_shared(block) ||
-                        (length > TARGET_PAGE_SIZE))) {
+                        (page_size > TARGET_PAGE_SIZE))) {
                         error_report("Memory poison recovery failure addr: "
                                      RAM_ADDR_FMT "@" RAM_ADDR_FMT "",
-                                     length, addr);
+                                     page_size, addr);
                         exit(1);
                     }
                     qemu_ram_remap_mmap(block, vaddr, page_size, offset);
-                    memory_try_enable_merging(vaddr, size);
-                    qemu_ram_setup_dump(vaddr, size);
+                    memory_try_enable_merging(vaddr, page_size);
+                    qemu_ram_setup_dump(vaddr, page_size);
                 }
             }
 
diff --git a/target/arm/kvm.c b/target/arm/kvm.c
index 7b6812c0de..d92b195851 100644
--- a/target/arm/kvm.c
+++ b/target/arm/kvm.c
@@ -2366,6 +2366,8 @@  void kvm_arch_on_sigbus_vcpu(CPUState *c, int code, void *addr)
 {
     ram_addr_t ram_addr;
     hwaddr paddr;
+    size_t page_size;
+    char lp_msg[57];
 
     assert(code == BUS_MCEERR_AR || code == BUS_MCEERR_AO);
 
@@ -2373,6 +2375,14 @@  void kvm_arch_on_sigbus_vcpu(CPUState *c, int code, void *addr)
         ram_addr = qemu_ram_addr_from_host(addr);
         if (ram_addr != RAM_ADDR_INVALID &&
             kvm_physical_memory_addr_from_host(c->kvm_state, addr, &paddr)) {
+            page_size = qemu_ram_pagesize_from_addr(ram_addr);
+            if (page_size > TARGET_PAGE_SIZE) {
+                ram_addr = ROUND_DOWN(ram_addr, page_size);
+                sprintf(lp_msg, " on lost large page "
+                    RAM_ADDR_FMT "@" RAM_ADDR_FMT "", page_size, ram_addr);
+            } else {
+                lp_msg[0] = '\0';
+            }
             kvm_hwpoison_page_add(ram_addr);
             /*
              * If this is a BUS_MCEERR_AR, we know we have been called
@@ -2389,6 +2399,9 @@  void kvm_arch_on_sigbus_vcpu(CPUState *c, int code, void *addr)
                 kvm_cpu_synchronize_state(c);
                 if (!acpi_ghes_record_errors(ACPI_HEST_SRC_ID_SEA, paddr)) {
                     kvm_inject_arm_sea(c);
+                    error_report("Guest Memory Error at QEMU addr %p and "
+                        "GUEST addr 0x%" HWADDR_PRIx "%s of type %s injected",
+                        addr, paddr, lp_msg, "BUS_MCEERR_AR");
                 } else {
                     error_report("failed to record the error");
                     abort();
diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
index 8e17942c3b..182985b159 100644
--- a/target/i386/kvm/kvm.c
+++ b/target/i386/kvm/kvm.c
@@ -741,6 +741,8 @@  void kvm_arch_on_sigbus_vcpu(CPUState *c, int code, void *addr)
     CPUX86State *env = &cpu->env;
     ram_addr_t ram_addr;
     hwaddr paddr;
+    size_t page_size;
+    char lp_msg[57];
 
     /* If we get an action required MCE, it has been injected by KVM
      * while the VM was running.  An action optional MCE instead should
@@ -753,6 +755,14 @@  void kvm_arch_on_sigbus_vcpu(CPUState *c, int code, void *addr)
         ram_addr = qemu_ram_addr_from_host(addr);
         if (ram_addr != RAM_ADDR_INVALID &&
             kvm_physical_memory_addr_from_host(c->kvm_state, addr, &paddr)) {
+            page_size = qemu_ram_pagesize_from_addr(ram_addr);
+            if (page_size > TARGET_PAGE_SIZE) {
+                ram_addr = ROUND_DOWN(ram_addr, page_size);
+                sprintf(lp_msg, " on lost large page "
+                        RAM_ADDR_FMT "@" RAM_ADDR_FMT "", page_size, ram_addr);
+            } else {
+                lp_msg[0] = '\0';
+            }
             kvm_hwpoison_page_add(ram_addr);
             kvm_mce_inject(cpu, paddr, code);
 
@@ -763,12 +773,12 @@  void kvm_arch_on_sigbus_vcpu(CPUState *c, int code, void *addr)
              */
             if (code == BUS_MCEERR_AR) {
                 error_report("Guest MCE Memory Error at QEMU addr %p and "
-                    "GUEST addr 0x%" HWADDR_PRIx " of type %s injected",
-                    addr, paddr, "BUS_MCEERR_AR");
+                    "GUEST addr 0x%" HWADDR_PRIx "%s of type %s injected",
+                    addr, paddr, lp_msg, "BUS_MCEERR_AR");
             } else {
                  warn_report("Guest MCE Memory Error at QEMU addr %p and "
-                     "GUEST addr 0x%" HWADDR_PRIx " of type %s injected",
-                     addr, paddr, "BUS_MCEERR_AO");
+                     "GUEST addr 0x%" HWADDR_PRIx "%s of type %s injected",
+                     addr, paddr, lp_msg, "BUS_MCEERR_AO");
             }
 
             return;