diff mbox series

[v3,2/7] system/physmem: poisoned memory discard on reboot

Message ID 20241125142718.3373203-3-william.roche@oracle.com (mailing list archive)
State New
Headers show
Series hugetlbfs memory HW error fixes | expand

Commit Message

“William Roche Nov. 25, 2024, 2:27 p.m. UTC
From: William Roche <william.roche@oracle.com>

Repair memory locations, calling ram_block_discard_range(),
punching a hole in the backend file when necessary and regenerate
a usable memory.
Fall back to unmap/remap the memory location(s) if the kernel doesn't
support the madvise calls used by ram_block_discard_range().

Signed-off-by: William Roche <william.roche@oracle.com>
---
 system/physmem.c | 69 ++++++++++++++++++++++++++++++++----------------
 1 file changed, 46 insertions(+), 23 deletions(-)
diff mbox series

Patch

diff --git a/system/physmem.c b/system/physmem.c
index 410eabd29d..26711df2d2 100644
--- a/system/physmem.c
+++ b/system/physmem.c
@@ -2180,13 +2180,37 @@  void qemu_ram_free(RAMBlock *block)
 }
 
 #ifndef _WIN32
+/* Try to recover the given location using mmap */
+static void qemu_ram_remap_mmap(RAMBlock *block, void* vaddr, size_t size,
+                                ram_addr_t offset)
+{
+    int flags, prot;
+    void *area;
+
+    flags = MAP_FIXED;
+    flags |= block->flags & RAM_SHARED ? MAP_SHARED : MAP_PRIVATE;
+    flags |= block->flags & RAM_NORESERVE ? MAP_NORESERVE : 0;
+    prot = PROT_READ;
+    prot |= block->flags & RAM_READONLY ? 0 : PROT_WRITE;
+    if (block->fd >= 0) {
+        area = mmap(vaddr, size, prot, flags, block->fd,
+                    offset + block->fd_offset);
+    } else {
+        flags |= MAP_ANONYMOUS;
+        area = mmap(vaddr, size, prot, flags, -1, 0);
+    }
+    if (area != vaddr) {
+        error_report("Could not remap addr: " RAM_ADDR_FMT "@" RAM_ADDR_FMT "",
+                     size, addr);
+        exit(1);
+    }
+}
+
 void qemu_ram_remap(ram_addr_t addr)
 {
     RAMBlock *block;
     ram_addr_t offset;
-    int flags;
-    void *area, *vaddr;
-    int prot;
+    void *vaddr;
     size_t page_size;
 
     RAMBLOCK_FOREACH(block) {
@@ -2202,27 +2226,26 @@  void qemu_ram_remap(ram_addr_t addr)
             } else if (xen_enabled()) {
                 abort();
             } else {
-                flags = MAP_FIXED;
-                flags |= block->flags & RAM_SHARED ?
-                         MAP_SHARED : MAP_PRIVATE;
-                flags |= block->flags & RAM_NORESERVE ? MAP_NORESERVE : 0;
-                prot = PROT_READ;
-                prot |= block->flags & RAM_READONLY ? 0 : PROT_WRITE;
-                if (block->fd >= 0) {
-                    area = mmap(vaddr, page_size, prot, flags, block->fd,
-                                offset + block->fd_offset);
-                } else {
-                    flags |= MAP_ANONYMOUS;
-                    area = mmap(vaddr, page_size, prot, flags, -1, 0);
-                }
-                if (area != vaddr) {
-                    error_report("Could not remap addr: "
-                                 RAM_ADDR_FMT "@" RAM_ADDR_FMT "",
-                                 page_size, addr);
-                    exit(1);
+                if (ram_block_discard_range(block, offset + block->fd_offset,
+                                            length) != 0) {
+                    /*
+                     * Fold back to using mmap(), but it cannot zap pagecache
+                     * pages, only anonymous pages. As soon as we might have
+                     * pagecache pages involved (either private or shared
+                     * mapping), we must be careful.
+                     * We don't take the risk of using mmap and fail now.
+                     */
+                    if (block->fd >= 0 && (qemu_ram_is_shared(block) ||
+                        (length > TARGET_PAGE_SIZE))) {
+                        error_report("Memory poison recovery failure addr: "
+                                     RAM_ADDR_FMT "@" RAM_ADDR_FMT "",
+                                     length, addr);
+                        exit(1);
+                    }
+                    qemu_ram_remap_mmap(block, vaddr, page_size, offset);
+                    memory_try_enable_merging(vaddr, size);
+                    qemu_ram_setup_dump(vaddr, size);
                 }
-                memory_try_enable_merging(vaddr, page_size);
-                qemu_ram_setup_dump(vaddr, page_size);
             }
 
             break;