diff mbox series

[v5,2/6] system/physmem: poisoned memory discard on reboot

Message ID 20250110211405.2284121-3-william.roche@oracle.com (mailing list archive)
State New, archived
Headers show
Series Poisoned memory recovery on reboot | expand

Commit Message

William Roche Jan. 10, 2025, 9:14 p.m. UTC
From: William Roche <william.roche@oracle.com>

Repair poisoned memory location(s), calling ram_block_discard_range():
punching a hole in the backend file when necessary and regenerating
a usable memory.
If the kernel doesn't support the madvise calls used by this function
and we are dealing with anonymous memory, fall back to remapping the
location(s).

Signed-off-by: William Roche <william.roche@oracle.com>
---
 system/physmem.c | 57 ++++++++++++++++++++++++++++++------------------
 1 file changed, 36 insertions(+), 21 deletions(-)

Comments

David Hildenbrand Jan. 14, 2025, 2:07 p.m. UTC | #1
On 10.01.25 22:14, “William Roche wrote:
> From: William Roche <william.roche@oracle.com>
> 
> Repair poisoned memory location(s), calling ram_block_discard_range():
> punching a hole in the backend file when necessary and regenerating
> a usable memory.
> If the kernel doesn't support the madvise calls used by this function
> and we are dealing with anonymous memory, fall back to remapping the
> location(s).
> 
> Signed-off-by: William Roche <william.roche@oracle.com>
> ---
>   system/physmem.c | 57 ++++++++++++++++++++++++++++++------------------
>   1 file changed, 36 insertions(+), 21 deletions(-)
> 
> diff --git a/system/physmem.c b/system/physmem.c
> index 7a87548f99..ae1caa97d8 100644
> --- a/system/physmem.c
> +++ b/system/physmem.c
> @@ -2180,13 +2180,32 @@ void qemu_ram_free(RAMBlock *block)
>   }
>   
>   #ifndef _WIN32
> +/* Simply remap the given VM memory location from start to start+length */
> +static void qemu_ram_remap_mmap(RAMBlock *block, uint64_t start, size_t length)
> +{
> +    int flags, prot;
> +    void *area;
> +    void *host_startaddr = block->host + start;
> +
> +    assert(block->fd < 0);
> +    flags = MAP_FIXED | MAP_ANONYMOUS;
> +    flags |= block->flags & RAM_SHARED ? MAP_SHARED : MAP_PRIVATE;
> +    flags |= block->flags & RAM_NORESERVE ? MAP_NORESERVE : 0;
> +    prot = PROT_READ;
> +    prot |= block->flags & RAM_READONLY ? 0 : PROT_WRITE;
> +    area = mmap(host_startaddr, length, prot, flags, -1, 0);
> +    if (area != host_startaddr) {
> +        error_report("Could not remap addr: " RAM_ADDR_FMT "@" RAM_ADDR_FMT "",
> +                     length, start);
> +        exit(1);
> +    }

Can we return an error and have a single error printed in the caller?

return area != host_startaddr ? -errno : 0;

> +}
> +
>   void qemu_ram_remap(ram_addr_t addr)
>   {
>       RAMBlock *block;
>       ram_addr_t offset;
> -    int flags;
> -    void *area, *vaddr;
> -    int prot;
> +    void *vaddr;
>       size_t page_size;
>   
>       RAMBLOCK_FOREACH(block) {
> @@ -2202,24 +2221,20 @@ void qemu_ram_remap(ram_addr_t addr)
>               } else if (xen_enabled()) {
>                   abort();
>               } else {
> -                flags = MAP_FIXED;
> -                flags |= block->flags & RAM_SHARED ?
> -                         MAP_SHARED : MAP_PRIVATE;
> -                flags |= block->flags & RAM_NORESERVE ? MAP_NORESERVE : 0;
> -                prot = PROT_READ;
> -                prot |= block->flags & RAM_READONLY ? 0 : PROT_WRITE;
> -                if (block->fd >= 0) {
> -                    area = mmap(vaddr, page_size, prot, flags, block->fd,
> -                                offset + block->fd_offset);
> -                } else {
> -                    flags |= MAP_ANONYMOUS;
> -                    area = mmap(vaddr, page_size, prot, flags, -1, 0);
> -                }
> -                if (area != vaddr) {
> -                    error_report("Could not remap addr: "
> -                                 RAM_ADDR_FMT "@" RAM_ADDR_FMT "",
> -                                 page_size, addr);
> -                    exit(1);
> +                if (ram_block_discard_range(block, offset, page_size) != 0) {
> +                    /*
> +                     * Fall back to using mmap() only for anonymous mapping,
> +                     * as if a backing file is associated we may not be able
> +                     * to recover the memory in all cases.
> +                     * So don't take the risk of using only mmap and fail now.
> +                     */
> +                    if (block->fd >= 0) {
> +                        error_report("Memory poison recovery failure addr: "
> +                                     RAM_ADDR_FMT "@" RAM_ADDR_FMT "",
> +                                     page_size, addr);

See my error message proposal as reply to v4 patch #1: we should similarly
just print rb->idstr, offset and page_size like ram_block_discard_range() does.

ram_addr_t is weird and not helpful for users.


To have a single error

if (ram_block_discard_range(block, offset, page_size) != 0) {
	/* ...
	if (block->fd >= 0 || qemu_ram_remap_mmap(block, offset, page_size)) {
		error_report() ... // use proposal from my reply to v4 patch #1
		exit(1);
	}
}
William Roche Jan. 27, 2025, 9:16 p.m. UTC | #2
On 1/14/25 15:07, David Hildenbrand wrote:
> On 10.01.25 22:14, “William Roche wrote:
>> From: William Roche <william.roche@oracle.com>
>>
>> Repair poisoned memory location(s), calling ram_block_discard_range():
>> punching a hole in the backend file when necessary and regenerating
>> a usable memory.
>> If the kernel doesn't support the madvise calls used by this function
>> and we are dealing with anonymous memory, fall back to remapping the
>> location(s).
>>
>> Signed-off-by: William Roche <william.roche@oracle.com>
>> ---
>>   system/physmem.c | 57 ++++++++++++++++++++++++++++++------------------
>>   1 file changed, 36 insertions(+), 21 deletions(-)
>>
>> diff --git a/system/physmem.c b/system/physmem.c
>> index 7a87548f99..ae1caa97d8 100644
>> --- a/system/physmem.c
>> +++ b/system/physmem.c
>> @@ -2180,13 +2180,32 @@ void qemu_ram_free(RAMBlock *block)
>>   }
>>   #ifndef _WIN32
>> +/* Simply remap the given VM memory location from start to 
>> start+length */
>> +static void qemu_ram_remap_mmap(RAMBlock *block, uint64_t start, 
>> size_t length)
>> +{
>> +    int flags, prot;
>> +    void *area;
>> +    void *host_startaddr = block->host + start;
>> +
>> +    assert(block->fd < 0);
>> +    flags = MAP_FIXED | MAP_ANONYMOUS;
>> +    flags |= block->flags & RAM_SHARED ? MAP_SHARED : MAP_PRIVATE;
>> +    flags |= block->flags & RAM_NORESERVE ? MAP_NORESERVE : 0;
>> +    prot = PROT_READ;
>> +    prot |= block->flags & RAM_READONLY ? 0 : PROT_WRITE;
>> +    area = mmap(host_startaddr, length, prot, flags, -1, 0);
>> +    if (area != host_startaddr) {
>> +        error_report("Could not remap addr: " RAM_ADDR_FMT "@" 
>> RAM_ADDR_FMT "",
>> +                     length, start);
>> +        exit(1);
>> +    }
> 
> Can we return an error and have a single error printed in the caller?
> 
> return area != host_startaddr ? -errno : 0;

Done.

> 
>> +}
>> +
>>   void qemu_ram_remap(ram_addr_t addr)
>>   {
>>       RAMBlock *block;
>>       ram_addr_t offset;
>> -    int flags;
>> -    void *area, *vaddr;
>> -    int prot;
>> +    void *vaddr;
>>       size_t page_size;
>>       RAMBLOCK_FOREACH(block) {
>> @@ -2202,24 +2221,20 @@ void qemu_ram_remap(ram_addr_t addr)
>>               } else if (xen_enabled()) {
>>                   abort();
>>               } else {
>> -                flags = MAP_FIXED;
>> -                flags |= block->flags & RAM_SHARED ?
>> -                         MAP_SHARED : MAP_PRIVATE;
>> -                flags |= block->flags & RAM_NORESERVE ? MAP_NORESERVE : 0;
>> -                prot = PROT_READ;
>> -                prot |= block->flags & RAM_READONLY ? 0 : PROT_WRITE;
>> -                if (block->fd >= 0) {
>> -                    area = mmap(vaddr, page_size, prot, flags, block->fd,
>> -                                offset + block->fd_offset);
>> -                } else {
>> -                    flags |= MAP_ANONYMOUS;
>> -                    area = mmap(vaddr, page_size, prot, flags, -1, 0);
>> -                }
>> -                if (area != vaddr) {
>> -                    error_report("Could not remap addr: "
>> -                                 RAM_ADDR_FMT "@" RAM_ADDR_FMT "",
>> -                                 page_size, addr);
>> -                    exit(1);
>> +                if (ram_block_discard_range(block, offset, page_size) != 0) {
>> +                    /*
>> +                     * Fall back to using mmap() only for anonymous mapping,
>> +                     * as if a backing file is associated we may not be able
>> +                     * to recover the memory in all cases.
>> +                     * So don't take the risk of using only mmap and fail now.
>> +                     */
>> +                    if (block->fd >= 0) {
>> +                        error_report("Memory poison recovery failure addr: "
>> +                                     RAM_ADDR_FMT "@" RAM_ADDR_FMT "",
>> +                                     page_size, addr);
> 
> See my error message proposal as reply to v4 patch #1: we should similarly
> just print rb->idstr, offset and page_size like 
> ram_block_discard_range() does.
> 
> ram_addr_t is weird and not helpful for users.
> 
> 
> To have a single error
> 
> if (ram_block_discard_range(block, offset, page_size) != 0) {
>      /* ...
>      if (block->fd >= 0 || qemu_ram_remap_mmap(block, offset, page_size)) {
>          error_report() ... // use proposal from my reply to v4 patch #1
>          exit(1);
>      }
> }
> 

Done.
diff mbox series

Patch

diff --git a/system/physmem.c b/system/physmem.c
index 7a87548f99..ae1caa97d8 100644
--- a/system/physmem.c
+++ b/system/physmem.c
@@ -2180,13 +2180,32 @@  void qemu_ram_free(RAMBlock *block)
 }
 
 #ifndef _WIN32
+/* Simply remap the given VM memory location from start to start+length */
+static void qemu_ram_remap_mmap(RAMBlock *block, uint64_t start, size_t length)
+{
+    int flags, prot;
+    void *area;
+    void *host_startaddr = block->host + start;
+
+    assert(block->fd < 0);
+    flags = MAP_FIXED | MAP_ANONYMOUS;
+    flags |= block->flags & RAM_SHARED ? MAP_SHARED : MAP_PRIVATE;
+    flags |= block->flags & RAM_NORESERVE ? MAP_NORESERVE : 0;
+    prot = PROT_READ;
+    prot |= block->flags & RAM_READONLY ? 0 : PROT_WRITE;
+    area = mmap(host_startaddr, length, prot, flags, -1, 0);
+    if (area != host_startaddr) {
+        error_report("Could not remap addr: " RAM_ADDR_FMT "@" RAM_ADDR_FMT "",
+                     length, start);
+        exit(1);
+    }
+}
+
 void qemu_ram_remap(ram_addr_t addr)
 {
     RAMBlock *block;
     ram_addr_t offset;
-    int flags;
-    void *area, *vaddr;
-    int prot;
+    void *vaddr;
     size_t page_size;
 
     RAMBLOCK_FOREACH(block) {
@@ -2202,24 +2221,20 @@  void qemu_ram_remap(ram_addr_t addr)
             } else if (xen_enabled()) {
                 abort();
             } else {
-                flags = MAP_FIXED;
-                flags |= block->flags & RAM_SHARED ?
-                         MAP_SHARED : MAP_PRIVATE;
-                flags |= block->flags & RAM_NORESERVE ? MAP_NORESERVE : 0;
-                prot = PROT_READ;
-                prot |= block->flags & RAM_READONLY ? 0 : PROT_WRITE;
-                if (block->fd >= 0) {
-                    area = mmap(vaddr, page_size, prot, flags, block->fd,
-                                offset + block->fd_offset);
-                } else {
-                    flags |= MAP_ANONYMOUS;
-                    area = mmap(vaddr, page_size, prot, flags, -1, 0);
-                }
-                if (area != vaddr) {
-                    error_report("Could not remap addr: "
-                                 RAM_ADDR_FMT "@" RAM_ADDR_FMT "",
-                                 page_size, addr);
-                    exit(1);
+                if (ram_block_discard_range(block, offset, page_size) != 0) {
+                    /*
+                     * Fall back to using mmap() only for anonymous mapping,
+                     * as if a backing file is associated we may not be able
+                     * to recover the memory in all cases.
+                     * So don't take the risk of using only mmap and fail now.
+                     */
+                    if (block->fd >= 0) {
+                        error_report("Memory poison recovery failure addr: "
+                                     RAM_ADDR_FMT "@" RAM_ADDR_FMT "",
+                                     page_size, addr);
+                        exit(1);
+                    }
+                    qemu_ram_remap_mmap(block, offset, page_size);
                 }
                 memory_try_enable_merging(vaddr, page_size);
                 qemu_ram_setup_dump(vaddr, page_size);