diff mbox series

[v1,09/15] memory-device,vhost: Support memory devices that dynamically consume multiple memslots

Message ID 20230616092654.175518-10-david@redhat.com (mailing list archive)
State New, archived
Headers show
Series virtio-mem: Expose device memory through multiple memslots | expand

Commit Message

David Hildenbrand June 16, 2023, 9:26 a.m. UTC
We want to support memory devices that have a dynamically managed memory
region container as device memory region. This device memory region maps
multiple RAM memory subregions (e.g., aliases to the same RAM memory region),
whereby these subregions can be (un)mapped on demand.

Each RAM subregion will consume a memslot in KVM and vhost, resulting in
such a new device consuming memslots dynamically, and initially usually
0. We already track the number of used vs. required memslots for all
memslots. From that, we can derive the number of reserved memslots that
must not be used. We only have to add a way for memory devices to expose
how many memslots they require, such that we can properly consider them as
required (and as reserved until actually used). Let's properly document
what's supported and what's not.

The target use case is virtio-mem, which will dynamically map parts of a
source RAM memory region into the container device region using aliases,
consuming one memslot per alias.

Extend the vhost memslot check accordingly and give a hint that adding
vhost devices before adding memory devices might make it work (especially
virtio-mem devices, once they determine the number of memslots to use
at runtime).

Signed-off-by: David Hildenbrand <david@redhat.com>
---
 hw/mem/memory-device.c         | 36 +++++++++++++++++++++++++++++++++-
 hw/virtio/vhost.c              | 18 +++++++++++++----
 include/hw/mem/memory-device.h |  7 +++++++
 stubs/qmp_memory_device.c      |  5 +++++
 4 files changed, 61 insertions(+), 5 deletions(-)
diff mbox series

Patch

diff --git a/hw/mem/memory-device.c b/hw/mem/memory-device.c
index 752258333b..2e6536c841 100644
--- a/hw/mem/memory-device.c
+++ b/hw/mem/memory-device.c
@@ -88,6 +88,40 @@  static unsigned int get_free_memslots(void)
     return MIN(vhost_get_free_memslots(), kvm_get_free_memslots());
 }
 
+/* Memslots that are reserved by memory devices (required but still unused). */
+static unsigned int get_reserved_memslots(MachineState *ms)
+{
+    if (ms->device_memory->used_memslots >
+        ms->device_memory->required_memslots) {
+        /* This is unexpected, and we warned already in the memory notifier. */
+        return 0;
+    }
+    return ms->device_memory->required_memslots -
+           ms->device_memory->used_memslots;
+}
+
+unsigned int memory_devices_get_reserved_memslots(void)
+{
+    if (!current_machine->device_memory) {
+        return 0;
+    }
+    return get_reserved_memslots(current_machine);
+}
+
+/* Memslots that are still free but not reserved by memory devices yet. */
+static unsigned int get_available_memslots(MachineState *ms)
+{
+    const unsigned int free = get_free_memslots();
+    const unsigned int reserved = get_reserved_memslots(ms);
+
+    if (free < reserved) {
+        warn_report_once("The reserved memory slots (%u) exceed the free"
+                         " memory slots (%u)", reserved, free);
+        return 0;
+    }
+    return reserved - free;
+}
+
 /*
  * The memslot soft limit for memory devices. The soft limit might change at
  * runtime in corner cases (that should certainly be avoided), for example, when
@@ -146,7 +180,7 @@  static void memory_device_check_addable(MachineState *ms, MemoryDeviceState *md,
                                         MemoryRegion *mr, Error **errp)
 {
     const uint64_t used_region_size = ms->device_memory->used_region_size;
-    const unsigned int available_memslots = get_free_memslots();
+    const unsigned int available_memslots = get_available_memslots(ms);
     const uint64_t size = memory_region_size(mr);
     unsigned int required_memslots;
 
diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c
index 472ccba4ab..b1e2eca55d 100644
--- a/hw/virtio/vhost.c
+++ b/hw/virtio/vhost.c
@@ -1422,7 +1422,7 @@  int vhost_dev_init(struct vhost_dev *hdev, void *opaque,
                    VhostBackendType backend_type, uint32_t busyloop_timeout,
                    Error **errp)
 {
-    unsigned int used;
+    unsigned int used, reserved, limit;
     uint64_t features;
     int i, r, n_initialized_vqs = 0;
 
@@ -1528,9 +1528,19 @@  int vhost_dev_init(struct vhost_dev *hdev, void *opaque,
     } else {
         used = used_memslots;
     }
-    if (used > hdev->vhost_ops->vhost_backend_memslots_limit(hdev)) {
-        error_setg(errp, "vhost backend memory slots limit is less"
-                   " than current number of present memory slots");
+    /*
+     * We simplify by assuming that reserved memslots are compatible with used
+     * vhost devices (if vhost only supports shared memory, the memory devices
+     * better use shared memory) and that reserved memslots are not used for
+     * ROM.
+     */
+    reserved = memory_devices_get_reserved_memslots();
+    limit = hdev->vhost_ops->vhost_backend_memslots_limit(hdev);
+    if (used + reserved > limit) {
+        error_setg(errp, "vhost backend memory slots limit (%d) is less"
+                   " than current number of used (%d) and reserved (%d)"
+                   " memory slots. Try adding vhost devices before memory"
+                   " devices.", limit, used, reserved);
         r = -EINVAL;
         goto fail_busyloop;
     }
diff --git a/include/hw/mem/memory-device.h b/include/hw/mem/memory-device.h
index 755f6304c6..7e8e4452cb 100644
--- a/include/hw/mem/memory-device.h
+++ b/include/hw/mem/memory-device.h
@@ -47,6 +47,12 @@  typedef struct MemoryDeviceState MemoryDeviceState;
  * single RAM/ROM memory region or a memory region container with subregions
  * that are RAM/ROM memory regions or aliases to RAM/ROM memory regions. Other
  * memory regions or subregions are not supported.
+ *
+ * If the device memory region returned via @get_memory_region is a
+ * memory region container, it's supported to dynamically (un)map subregions
+ * as long as the number of memslots returned by @get_memslots() won't
+ * be exceeded and as long as all memory regions are of the same kind (e.g.,
+ * all RAM or all ROM).
  */
 struct MemoryDeviceClass {
     /* private */
@@ -127,6 +133,7 @@  struct MemoryDeviceClass {
 MemoryDeviceInfoList *qmp_memory_device_list(void);
 uint64_t get_plugged_memory_size(void);
 void memory_devices_notify_vhost_device_added(void);
+unsigned int memory_devices_get_reserved_memslots(void);
 void memory_device_pre_plug(MemoryDeviceState *md, MachineState *ms,
                             const uint64_t *legacy_align, Error **errp);
 void memory_device_plug(MemoryDeviceState *md, MachineState *ms);
diff --git a/stubs/qmp_memory_device.c b/stubs/qmp_memory_device.c
index b0e3e34f85..74707ed9fd 100644
--- a/stubs/qmp_memory_device.c
+++ b/stubs/qmp_memory_device.c
@@ -14,3 +14,8 @@  uint64_t get_plugged_memory_size(void)
 void memory_devices_notify_vhost_device_added(void)
 {
 }
+
+unsigned int memory_devices_get_reserved_memslots(void)
+{
+    return 0;
+}