@@ -454,6 +454,20 @@ static void machine_set_mem_merge(Object *obj, bool value, Error **errp)
ms->mem_merge = value;
}
+static int machine_get_anon_alloc(Object *obj, Error **errp)
+{
+ MachineState *ms = MACHINE(obj);
+
+ return ms->anon_alloc;
+}
+
+static void machine_set_anon_alloc(Object *obj, int value, Error **errp)
+{
+ MachineState *ms = MACHINE(obj);
+
+ ms->anon_alloc = value;
+}
+
static bool machine_get_usb(Object *obj, Error **errp)
{
MachineState *ms = MACHINE(obj);
@@ -1066,6 +1080,11 @@ static void machine_class_init(ObjectClass *oc, void *data)
object_class_property_set_description(oc, "mem-merge",
"Enable/disable memory merge support");
+ object_class_property_add_enum(oc, "anon-alloc", "AnonAllocOption",
+ &AnonAllocOption_lookup,
+ machine_get_anon_alloc,
+ machine_set_anon_alloc);
+
object_class_property_add_bool(oc, "usb",
machine_get_usb, machine_set_usb);
object_class_property_set_description(oc, "usb",
@@ -1416,6 +1435,11 @@ static bool create_default_memdev(MachineState *ms, const char *path, Error **er
if (!object_property_set_int(obj, "size", ms->ram_size, errp)) {
goto out;
}
+ if (!object_property_set_bool(obj, "share",
+ ms->anon_alloc == ANON_ALLOC_OPTION_MEMFD,
+ errp)) {
+ goto out;
+ }
object_property_add_child(object_get_objects_root(), mc->default_ram_id,
obj);
/* Ensure backend's memory region name is equal to mc->default_ram_id */
@@ -383,6 +383,7 @@ struct MachineState {
bool enable_graphics;
ConfidentialGuestSupport *cgs;
HostMemoryBackend *memdev;
+ AnonAllocOption anon_alloc;
/*
* convenience alias to ram_memdev_id backend memory region
* or to numa container memory region
@@ -1881,3 +1881,17 @@
{ 'command': 'x-query-interrupt-controllers',
'returns': 'HumanReadableText',
'features': [ 'unstable' ]}
+
+##
+# @AnonAllocOption:
+#
+# An enumeration of the options for allocating anonymous guest memory.
+#
+# @mmap: allocate using mmap MAP_ANON
+#
+# @memfd: allocate using memfd_create
+#
+# Since: 9.1
+##
+{ 'enum': 'AnonAllocOption',
+ 'data': [ 'mmap', 'memfd' ] }
@@ -38,6 +38,7 @@ DEF("machine", HAS_ARG, QEMU_OPTION_machine, \
" nvdimm=on|off controls NVDIMM support (default=off)\n"
" memory-encryption=@var{} memory encryption object to use (default=none)\n"
" hmat=on|off controls ACPI HMAT support (default=off)\n"
+ " anon-alloc=mmap|memfd allocate anonymous guest RAM using mmap MAP_ANON or memfd_create (default: mmap)\n"
" memory-backend='backend-id' specifies explicitly provided backend for main RAM (default=none)\n"
" cxl-fmw.0.targets.0=firsttarget,cxl-fmw.0.targets.1=secondtarget,cxl-fmw.0.size=size[,cxl-fmw.0.interleave-granularity=granularity]\n",
QEMU_ARCH_ALL)
@@ -101,6 +102,18 @@ SRST
Enables or disables ACPI Heterogeneous Memory Attribute Table
(HMAT) support. The default is off.
+ ``anon-alloc=mmap|memfd``
+ Allocate anonymous guest RAM using mmap MAP_ANON (the default)
+ or memfd_create. This affects memory-backend-ram objects,
+ RAM created with the global -m option but without an
+ associated memory-backend object and without the -mem-path
+ option, and various memory regions such as ROMs that are
+ allocated when devices are created. This option does not
+ affect memory-backend-file, memory-backend-memfd, or
+ memory-backend-epc objects.
+
+ Some migration modes require anon-alloc=memfd.
+
``memory-backend='id'``
An alternative to legacy ``-mem-path`` and ``mem-prealloc`` options.
Allows to use a memory backend as main RAM.
@@ -1552,8 +1552,10 @@ bool memory_region_init_ram_nomigrate(MemoryRegion *mr,
uint64_t size,
Error **errp)
{
+ uint32_t flags = (current_machine->anon_alloc == ANON_ALLOC_OPTION_MEMFD) ?
+ RAM_SHARED : 0;
return memory_region_init_ram_flags_nomigrate(mr, owner, name,
- size, 0, errp);
+ size, flags, errp);
}
bool memory_region_init_ram_flags_nomigrate(MemoryRegion *mr,
@@ -1713,8 +1715,10 @@ bool memory_region_init_rom_nomigrate(MemoryRegion *mr,
uint64_t size,
Error **errp)
{
+ uint32_t flags = (current_machine->anon_alloc == ANON_ALLOC_OPTION_MEMFD) ?
+ RAM_SHARED : 0;
if (!memory_region_init_ram_flags_nomigrate(mr, owner, name,
- size, 0, errp)) {
+ size, flags, errp)) {
return false;
}
mr->readonly = true;
@@ -1731,6 +1735,8 @@ bool memory_region_init_rom_device_nomigrate(MemoryRegion *mr,
Error **errp)
{
Error *err = NULL;
+ uint32_t flags = (current_machine->anon_alloc == ANON_ALLOC_OPTION_MEMFD) ?
+ RAM_SHARED : 0;
assert(ops);
memory_region_init(mr, owner, name, size);
mr->ops = ops;
@@ -1738,7 +1744,7 @@ bool memory_region_init_rom_device_nomigrate(MemoryRegion *mr,
mr->terminates = true;
mr->rom_device = true;
mr->destructor = memory_region_destructor_ram;
- mr->ram_block = qemu_ram_alloc(size, 0, mr, &err);
+ mr->ram_block = qemu_ram_alloc(size, flags, mr, &err);
if (err) {
mr->size = int128_zero();
object_unparent(OBJECT(mr));
@@ -47,6 +47,7 @@
#include "qemu/qemu-print.h"
#include "qemu/log.h"
#include "qemu/memalign.h"
+#include "qemu/memfd.h"
#include "exec/memory.h"
#include "exec/ioport.h"
#include "sysemu/dma.h"
@@ -54,6 +55,7 @@
#include "sysemu/hw_accel.h"
#include "sysemu/xen-mapcache.h"
#include "trace/trace-root.h"
+#include "trace.h"
#ifdef CONFIG_FALLOCATE_PUNCH_HOLE
#include <linux/falloc.h>
@@ -69,6 +71,8 @@
#include "qemu/pmem.h"
+#include "qapi/qapi-types-migration.h"
+#include "migration/options.h"
#include "migration/vmstate.h"
#include "qemu/range.h"
@@ -1828,6 +1832,32 @@ static void ram_block_add(RAMBlock *new_block, Error **errp)
qemu_mutex_unlock_ramlist();
return;
}
+
+ } else if (new_block->flags & RAM_SHARED) {
+ size_t max_length = new_block->max_length;
+ MemoryRegion *mr = new_block->mr;
+ const char *name = memory_region_name(mr);
+
+ new_block->mr->align = QEMU_VMALLOC_ALIGN;
+
+ if (new_block->fd == -1) {
+ new_block->fd = qemu_memfd_create(name, max_length + mr->align,
+ 0, 0, 0, errp);
+ }
+
+ if (new_block->fd >= 0) {
+ int mfd = new_block->fd;
+ qemu_set_cloexec(mfd);
+ new_block->host = file_ram_alloc(new_block, max_length, mfd,
+ false, 0, errp);
+ }
+ if (!new_block->host) {
+ qemu_mutex_unlock_ramlist();
+ return;
+ }
+ memory_try_enable_merging(new_block->host, new_block->max_length);
+ free_on_error = true;
+
} else {
new_block->host = qemu_anon_ram_alloc(new_block->max_length,
&new_block->mr->align,
@@ -1911,6 +1941,9 @@ static void ram_block_add(RAMBlock *new_block, Error **errp)
ram_block_notify_add(new_block->host, new_block->used_length,
new_block->max_length);
}
+ trace_ram_block_add(memory_region_name(new_block->mr), new_block->flags,
+ new_block->fd, new_block->used_length,
+ new_block->max_length);
return;
out_free:
@@ -2097,8 +2130,11 @@ RAMBlock *qemu_ram_alloc_resizeable(ram_addr_t size, ram_addr_t maxsz,
void *host),
MemoryRegion *mr, Error **errp)
{
+ uint32_t flags = (current_machine->anon_alloc == ANON_ALLOC_OPTION_MEMFD) ?
+ RAM_SHARED : 0;
+ flags |= RAM_RESIZEABLE;
return qemu_ram_alloc_internal(size, maxsz, resized, NULL,
- RAM_RESIZEABLE, mr, errp);
+ flags, mr, errp);
}
static void reclaim_ramblock(RAMBlock *block)
@@ -38,3 +38,6 @@ dirtylimit_state_finalize(void)
dirtylimit_throttle_pct(int cpu_index, uint64_t pct, int64_t time_us) "CPU[%d] throttle percent: %" PRIu64 ", throttle adjust time %"PRIi64 " us"
dirtylimit_set_vcpu(int cpu_index, uint64_t quota) "CPU[%d] set dirty page rate limit %"PRIu64
dirtylimit_vcpu_execute(int cpu_index, int64_t sleep_time_us) "CPU[%d] sleep %"PRIi64 " us"
+
+#physmem.c
+ram_block_add(const char *name, uint32_t flags, int fd, size_t used_length, size_t max_length) "%s, flags %u, fd %d, len %lu, maxlen %lu"
Allocate anonymous memory using mmap MAP_ANON or memfd_create depending on the value of the anon-alloc machine property. This affects memory-backend-ram objects, guest RAM created with the global -m option but without an associated memory-backend object and without the -mem-path option, and various memory regions such as ROMs that are allocated when devices are created. This option does not affect memory-backend-file, memory-backend-memfd, or memory-backend-epc objects. The memfd option is intended to support new migration modes, in which the memory region can be transferred in place to a new QEMU process, by sending the memfd file descriptor to the process. Memory contents are preserved, and if the mode also transfers device descriptors, then pages that are locked in memory for DMA remain locked. This behavior is a pre-requisite for supporting vfio, vdpa, and iommufd devices with the new modes. To access the same memory in the old and new QEMU processes, the memory must be mapped shared. Therefore, the implementation always sets RAM_SHARED if alloc-anon=memfd, except for memory-backend-ram, where the user must explicitly specify the share option. In lieu of defining a new RAM flag, at the lowest level the implementation uses RAM_SHARED with fd=-1 as the condition for calling memfd_create. Signed-off-by: Steve Sistare <steven.sistare@oracle.com> --- hw/core/machine.c | 24 ++++++++++++++++++++++++ include/hw/boards.h | 1 + qapi/machine.json | 14 ++++++++++++++ qemu-options.hx | 13 +++++++++++++ system/memory.c | 12 +++++++++--- system/physmem.c | 38 +++++++++++++++++++++++++++++++++++++- system/trace-events | 3 +++ 7 files changed, 101 insertions(+), 4 deletions(-)