diff mbox series

balloon config change seems to break live migration from 3.0.1 to 4.0

Message ID 20190627131252.GA14795@olga.proxmox.com (mailing list archive)
State New, archived
Headers show
Series balloon config change seems to break live migration from 3.0.1 to 4.0 | expand

Commit Message

Wolfgang Bumiller June 27, 2019, 1:12 p.m. UTC
While testing with 4.0 we've run into issues with live migration from
3.0.1 to 4.0 when a balloon device was involved.

We'd see the following error on the destination:
  qemu-system-x86_64: get_pci_config_device: Bad config data: i=0x10 read: a1 device: 1 cmask: ff wmask: c0 w1cmask:0 
  qemu-system-x86_64: Failed to load PCIDevice:config 
  qemu-system-x86_64: Failed to load virtio-balloon:virtio 
  qemu-system-x86_64: error while loading state for instance 0x0 of device '0000:00:03.0/virtio-balloon' 
  qemu-system-x86_64: load of migration failed: Invalid argument

After looking through the commits I noticed that the pci config sent for
the balloon device comes from
include/standard-headers/linux/virtio_balloon.h and changed size between
3.1 and 4.0.
As a "guess" I tried reverting that change (commented out the two last
fields (and access to it in hw/virtio/virtio-balloon.c's
virtio_balloon_get_config()), and then the migration seems to go through
successfully.

I've since also rebuilt qemu without our patches (tags v3.0.1 and v4.0.0)
and also tried with master (since dgilbert mentioned on irc remembering
the issue and that there may have been a fix around), but got the same
result.

Posting here now as dgilbert requested on irc.

Here are the commands used to start qemu:
  Source:
    /usr/bin/kvm \
      -name randomclone \
      -chardev 'socket,id=qmp,path=/var/run/qemu-server/101.qmp,server,nowait' \
      -mon 'chardev=qmp,mode=control' \
      -chardev 'socket,id=qmp-event,path=/var/run/qmeventd.sock,reconnect=5' \
      -mon 'chardev=qmp-event,mode=control' \
      -pidfile /var/run/qemu-server/101.pid \
      -daemonize \
      -smbios 'type=1,uuid=f3ab31f6-ca7d-469c-bf51-547fd9bbd2d9' \
      -smp '4,sockets=1,cores=4,maxcpus=4' \
      -nodefaults \
      -boot 'menu=on,strict=on,reboot-timeout=1000,splash=/usr/share/qemu-server/bootsplash.jpg' \
      -vnc unix:/var/run/qemu-server/101.vnc,password \
      -cpu host,+pcid,+spec-ctrl,+ssbd,+pdpe1gb,+kvm_pv_unhalt,+kvm_pv_eoi \
      -m 4096 \
      -device 'pci-bridge,id=pci.2,chassis_nr=2,bus=pci.0,addr=0x1f' \
      -device 'pci-bridge,id=pci.1,chassis_nr=1,bus=pci.0,addr=0x1e' \
      -device 'vmgenid,guid=fb282779-7056-4f1d-96bb-70f578294e45' \
      -device 'piix3-usb-uhci,id=uhci,bus=pci.0,addr=0x1.0x2' \
      -device 'usb-tablet,id=tablet,bus=uhci.0,port=1' \
      -device 'VGA,id=vga,bus=pci.0,addr=0x2' \
      -device 'virtio-balloon-pci,id=balloon0,bus=pci.0,addr=0x3' \
      -iscsi 'initiator-name=iqn.1993-08.org.debian:01:856d32b504d' \
      -drive 'if=none,id=drive-ide2,media=cdrom,aio=threads' \
      -device 'ide-cd,bus=ide.1,unit=0,drive=drive-ide2,id=ide2,bootindex=200' \
      -device 'virtio-scsi-pci,id=scsihw0,bus=pci.0,addr=0x5' \
      -drive 'file=rbd:rbd/vm-101-disk-0:conf=/etc/pve/ceph.conf:id=admin:keyring=/etc/pve/priv/ceph/rbd.keyring,if=none,id=drive-scsi0,discard=on,format=raw,cache=none,aio=native,detect-zeroes=unmap' \
      -device 'scsi-hd,bus=scsihw0.0,channel=0,scsi-id=0,lun=0,drive=drive-scsi0,id=scsi0,rotation_rate=1,bootindex=100' \
      -netdev 'type=tap,id=net0,ifname=tap101i0,script=/var/lib/qemu-server/pve-bridge,downscript=/var/lib/qemu-server/pve-bridgedown,vhost=on' \
      -device 'virtio-net-pci,mac=4E:5D:50:75:4D:ED,netdev=net0,bus=pci.0,addr=0x12,id=net0,bootindex=300' \
      -machine 'type=pc' \
      -enable-kvm

  Destination:
    /usr/bin/kvm \
      -name randomclone \
      -chardev socket,id=qmp,path=/var/run/qemu-server/101.qmp,server,nowait \
      -mon chardev=qmp,mode=control \
      -chardev socket,id=qmp-event,path=/var/run/qmeventd.sock,reconnect=5 \
      -mon chardev=qmp-event,mode=control \
      -pidfile /var/run/qemu-server/101.pid \
      -smbios type=1,uuid=f3ab31f6-ca7d-469c-bf51-547fd9bbd2d9 \
      -smp 4,sockets=1,cores=4,maxcpus=4 \
      -nodefaults \
      -boot menu=on,strict=on,reboot-timeout=1000,splash=/usr/share/qemu-server/bootsplash.jpg \
      -vnc unix:/var/run/qemu-server/101.vnc,password \
      -cpu host,+pcid,+spec-ctrl,+ssbd,+pdpe1gb,+kvm_pv_unhalt,+kvm_pv_eoi \
      -m 4096 \
      -device pci-bridge,id=pci.1,chassis_nr=1,bus=pci.0,addr=0x1e \
      -device pci-bridge,id=pci.2,chassis_nr=2,bus=pci.0,addr=0x1f \
      -device vmgenid,guid=fb282779-7056-4f1d-96bb-70f578294e45 \
      -device piix3-usb-uhci,id=uhci,bus=pci.0,addr=0x1.0x2 \
      -device usb-tablet,id=tablet,bus=uhci.0,port=1 \
      -device VGA,id=vga,bus=pci.0,addr=0x2 \
      -device virtio-balloon-pci,id=balloon0,bus=pci.0,addr=0x3 \
      -iscsi initiator-name=iqn.1993-08.org.debian:01:ee4e4a566b \
      -drive if=none,id=drive-ide2,media=cdrom,aio=threads \
      -device ide-cd,bus=ide.1,unit=0,drive=drive-ide2,id=ide2,bootindex=200 \
      -device virtio-scsi-pci,id=scsihw0,bus=pci.0,addr=0x5 \
      -drive file=rbd:rbd/vm-101-disk-0:conf=/etc/pve/ceph.conf:id=admin:keyring=/etc/pve/priv/ceph/rbd.keyring,if=none,id=drive-scsi0,discard=on,format=raw,cache=none,aio=native,detect-zeroes=unmap \
      -device scsi-hd,bus=scsihw0.0,channel=0,scsi-id=0,lun=0,drive=drive-scsi0,id=scsi0,rotation_rate=1,bootindex=100 \
      -netdev type=tap,id=net0,ifname=tap101i0,script=/var/lib/qemu-server/pve-bridge,downscript=/var/lib/qemu-server/pve-bridgedown,vhost=on \
      -device virtio-net-pci,mac=4E:5D:50:75:4D:ED,netdev=net0,bus=pci.0,addr=0x12,id=net0,bootindex=300 \
      -machine type=pc-i440fx-3.0 \
      -enable-kvm \
      -incoming tcp:10.9.2.106:9989 \
      -S

This is the exact test-change I made which seems to work around it, but
a proper fix would be nicer. Not sure how, though.

---8<---

Comments

no-reply@patchew.org June 27, 2019, 6:40 p.m. UTC | #1
Patchew URL: https://patchew.org/QEMU/20190627131252.GA14795@olga.proxmox.com/



Hi,

This series seems to have some coding style problems. See output below for
more information:

Type: series
Message-id: 20190627131252.GA14795@olga.proxmox.com
Subject: [Qemu-devel] balloon config change seems to break live migration from 3.0.1 to 4.0

=== TEST SCRIPT BEGIN ===
#!/bin/bash
git rev-parse base > /dev/null || exit 0
git config --local diff.renamelimit 0
git config --local diff.renames True
git config --local diff.algorithm histogram
./scripts/checkpatch.pl --mailback base..
=== TEST SCRIPT END ===

From https://github.com/patchew-project/qemu
 * [new tag]         patchew/20190627131252.GA14795@olga.proxmox.com -> patchew/20190627131252.GA14795@olga.proxmox.com
Switched to a new branch 'test'
16eeaca balloon config change seems to break live migration from 3.0.1 to 4.0

=== OUTPUT BEGIN ===
ERROR: do not use C99 // comments
#127: FILE: hw/virtio/virtio-balloon.c:626:
+    //if (dev->free_page_report_status == FREE_PAGE_REPORT_S_REQUESTED) {

ERROR: do not use C99 // comments
#128: FILE: hw/virtio/virtio-balloon.c:627:
+    //    config.free_page_report_cmd_id =

ERROR: do not use C99 // comments
#129: FILE: hw/virtio/virtio-balloon.c:628:
+    //                   cpu_to_le32(dev->free_page_report_cmd_id);

ERROR: do not use C99 // comments
#130: FILE: hw/virtio/virtio-balloon.c:629:
+    //} else if (dev->free_page_report_status == FREE_PAGE_REPORT_S_STOP) {

ERROR: do not use C99 // comments
#131: FILE: hw/virtio/virtio-balloon.c:630:
+    //    config.free_page_report_cmd_id =

ERROR: do not use C99 // comments
#132: FILE: hw/virtio/virtio-balloon.c:631:
+    //                   cpu_to_le32(VIRTIO_BALLOON_CMD_ID_STOP);

ERROR: do not use C99 // comments
#133: FILE: hw/virtio/virtio-balloon.c:632:
+    //} else if (dev->free_page_report_status == FREE_PAGE_REPORT_S_DONE) {

ERROR: do not use C99 // comments
#134: FILE: hw/virtio/virtio-balloon.c:633:
+    //    config.free_page_report_cmd_id =

ERROR: do not use C99 // comments
#135: FILE: hw/virtio/virtio-balloon.c:634:
+    //                   cpu_to_le32(VIRTIO_BALLOON_CMD_ID_DONE);

ERROR: do not use C99 // comments
#136: FILE: hw/virtio/virtio-balloon.c:635:
+    //}

ERROR: Missing Signed-off-by: line(s)

total: 11 errors, 0 warnings, 38 lines checked

Commit 16eeaca78a15 (balloon config change seems to break live migration from 3.0.1 to 4.0) has style problems, please review.  If any of these errors
are false positives report them to the maintainer, see
CHECKPATCH in MAINTAINERS.
=== OUTPUT END ===

Test command exited with code: 1


The full log is available at
http://patchew.org/logs/20190627131252.GA14795@olga.proxmox.com/testing.checkpatch/?type=message.
---
Email generated automatically by Patchew [https://patchew.org/].
Please send your feedback to patchew-devel@redhat.com
Wolfgang Bumiller July 2, 2019, 10:21 a.m. UTC | #2
On Thu, Jun 27, 2019 at 03:12:52PM +0200, Wolfgang Bumiller wrote:
> While testing with 4.0 we've run into issues with live migration from
> 3.0.1 to 4.0 when a balloon device was involved.
> 
> We'd see the following error on the destination:
>   qemu-system-x86_64: get_pci_config_device: Bad config data: i=0x10 read: a1 device: 1 cmask: ff wmask: c0 w1cmask:0 
>   qemu-system-x86_64: Failed to load PCIDevice:config 
>   qemu-system-x86_64: Failed to load virtio-balloon:virtio 
>   qemu-system-x86_64: error while loading state for instance 0x0 of device '0000:00:03.0/virtio-balloon' 
>   qemu-system-x86_64: load of migration failed: Invalid argument

So if I'm not mistaken this seems to cover the BAR containing the mapped
address, which now has a stricter alignment requirement (since the
config space crossed the 32 byte boundary and now is now 64 bytes and
therefor requires a 64 byte alignment).
(This in turn means that it only affects guests where the balloon's
mapping isn't already 64 byte aligned.)

I tested with the changes below meant to use the old configuration size
for machines depending on their machine version... This seems to fix the
problem for me here.

With this changing compatibility options I'm not sure if this is a
desired upstream change since 4.0 is already released?

---8<---
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Wolfgang Bumiller <w.bumiller@proxmox.com>
Date: Tue, 2 Jul 2019 09:23:43 +0200
Subject: [PATCH] virtio-balloon: use smaller config on older guests

The increased config size changes its alignment requirements
preventing guests from migrating from old qemu versions if
their balloon mapping isn't stricter aligned.
So base the default config size on the machine version.

Signed-off-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
---
 hw/i386/pc.c                       |  2 ++
 hw/virtio/virtio-balloon.c         | 52 +++++++++++++++++++++++++++---
 include/hw/virtio/virtio-balloon.h |  1 +
 3 files changed, 51 insertions(+), 4 deletions(-)

diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index 652eb72b2b..3676187a19 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -77,6 +77,7 @@
 #include "hw/i386/intel_iommu.h"
 #include "hw/net/ne2000-isa.h"
 #include "standard-headers/asm-x86/bootparam.h"
+#include "hw/virtio/virtio-balloon.h"
 
 /* debug PC/ISA interrupts */
 //#define DEBUG_IRQ
@@ -137,6 +138,7 @@ GlobalProperty pc_compat_3_1[] = {
     { "Icelake-Server" "-" TYPE_X86_CPU,      "mpx", "on" },
     { "Cascadelake-Server" "-" TYPE_X86_CPU, "stepping", "5" },
     { TYPE_X86_CPU, "x-intel-pt-auto-level", "off" },
+    { TYPE_VIRTIO_BALLOON, "x-use-large-balloon-config", "off" },
 };
 const size_t pc_compat_3_1_len = G_N_ELEMENTS(pc_compat_3_1);
 
diff --git a/hw/virtio/virtio-balloon.c b/hw/virtio/virtio-balloon.c
index d96e4aa96f..cd0f124fbb 100644
--- a/hw/virtio/virtio-balloon.c
+++ b/hw/virtio/virtio-balloon.c
@@ -328,6 +328,28 @@ static void balloon_stats_set_poll_interval(Object *obj, Visitor *v,
     balloon_stats_change_timer(s, 0);
 }
 
+static void balloon_get_large_config(Object *obj, Visitor *v,
+                                     const char *name, void *opaque,
+                                     Error **errp)
+{
+    VirtIOBalloon *s = opaque;
+    visit_type_bool(v, name, &s->use_large_config, errp);
+}
+
+static void balloon_set_large_config(Object *obj, Visitor *v,
+                                     const char *name, void *opaque,
+                                     Error **errp)
+{
+    VirtIOBalloon *s = opaque;
+    DeviceState *dev = DEVICE(s);
+
+    if (dev->realized) {
+        error_setg(errp, "balloon config size cannot be changed at runtime");
+    } else {
+        visit_type_bool(v, name, &s->use_large_config, errp);
+    }
+}
+
 static void virtio_balloon_handle_output(VirtIODevice *vdev, VirtQueue *vq)
 {
     VirtIOBalloon *s = VIRTIO_BALLOON(vdev);
@@ -526,6 +548,17 @@ static bool virtio_balloon_free_page_support(void *opaque)
     return virtio_vdev_has_feature(vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT);
 }
 
+static size_t virtio_balloon_config_size(void *opaque)
+{
+    VirtIOBalloon *s = opaque;
+
+    if (s->use_large_config) {
+        return sizeof(struct virtio_balloon_config);
+    } else {
+        return offsetof(struct virtio_balloon_config, free_page_report_cmd_id);
+    }
+}
+
 static void virtio_balloon_free_page_start(VirtIOBalloon *s)
 {
     VirtIODevice *vdev = VIRTIO_DEVICE(s);
@@ -635,7 +668,7 @@ static void virtio_balloon_get_config(VirtIODevice *vdev, uint8_t *config_data)
     }
 
     trace_virtio_balloon_get_config(config.num_pages, config.actual);
-    memcpy(config_data, &config, sizeof(struct virtio_balloon_config));
+    memcpy(config_data, &config, virtio_balloon_config_size(dev));
 }
 
 static int build_dimm_list(Object *obj, void *opaque)
@@ -679,7 +712,7 @@ static void virtio_balloon_set_config(VirtIODevice *vdev,
     uint32_t oldactual = dev->actual;
     ram_addr_t vm_ram_size = get_current_ram_size();
 
-    memcpy(&config, config_data, sizeof(struct virtio_balloon_config));
+    memcpy(&config, config_data, virtio_balloon_config_size(dev));
     dev->actual = le32_to_cpu(config.actual);
     if (dev->actual != oldactual) {
         qapi_event_send_balloon_change(vm_ram_size -
@@ -795,7 +828,7 @@ static void virtio_balloon_device_realize(DeviceState *dev, Error **errp)
     int ret;
 
     virtio_init(vdev, "virtio-balloon", VIRTIO_ID_BALLOON,
-                sizeof(struct virtio_balloon_config));
+                virtio_balloon_config_size(s));
 
     ret = qemu_add_balloon_handler(virtio_balloon_to_target,
                                    virtio_balloon_stat, s);
@@ -820,7 +853,7 @@ static void virtio_balloon_device_realize(DeviceState *dev, Error **errp)
         s->free_page_report_notify.notify =
                                        virtio_balloon_free_page_report_notify;
         precopy_add_notifier(&s->free_page_report_notify);
-        if (s->iothread) {
+        if (s->iothread && s->use_large_config) {
             object_ref(OBJECT(s->iothread));
             s->free_page_bh = aio_bh_new(iothread_get_aio_context(s->iothread),
                                        virtio_ballloon_get_free_page_hints, s);
@@ -833,6 +866,11 @@ static void virtio_balloon_device_realize(DeviceState *dev, Error **errp)
             virtio_error(vdev, "iothread is missing");
         }
     }
+
+    if (!s->use_large_config) {
+        s->host_features &= ~(1 << VIRTIO_BALLOON_F_PAGE_POISON);
+    }
+
     reset_stats(s);
 }
 
@@ -909,6 +947,12 @@ static void virtio_balloon_instance_init(Object *obj)
                         balloon_stats_get_poll_interval,
                         balloon_stats_set_poll_interval,
                         NULL, s, NULL);
+
+    s->use_large_config = true;
+    object_property_add(obj, "x-use-large-balloon-config", "bool",
+                        balloon_get_large_config,
+                        balloon_set_large_config,
+                        NULL, s, NULL);
 }
 
 static const VMStateDescription vmstate_virtio_balloon = {
diff --git a/include/hw/virtio/virtio-balloon.h b/include/hw/virtio/virtio-balloon.h
index 1afafb12f6..f212c08bd7 100644
--- a/include/hw/virtio/virtio-balloon.h
+++ b/include/hw/virtio/virtio-balloon.h
@@ -45,6 +45,7 @@ enum virtio_balloon_free_page_report_status {
 typedef struct VirtIOBalloon {
     VirtIODevice parent_obj;
     VirtQueue *ivq, *dvq, *svq, *free_page_vq;
+    bool use_large_config;
     uint32_t free_page_report_status;
     uint32_t num_pages;
     uint32_t actual;
Dr. David Alan Gilbert July 9, 2019, 11:05 a.m. UTC | #3
* Wolfgang Bumiller (w.bumiller@proxmox.com) wrote:
> While testing with 4.0 we've run into issues with live migration from
> 3.0.1 to 4.0 when a balloon device was involved.
> 
> We'd see the following error on the destination:
>   qemu-system-x86_64: get_pci_config_device: Bad config data: i=0x10 read: a1 device: 1 cmask: ff wmask: c0 w1cmask:0 
>   qemu-system-x86_64: Failed to load PCIDevice:config 
>   qemu-system-x86_64: Failed to load virtio-balloon:virtio 
>   qemu-system-x86_64: error while loading state for instance 0x0 of device '0000:00:03.0/virtio-balloon' 
>   qemu-system-x86_64: load of migration failed: Invalid argument
> 
> After looking through the commits I noticed that the pci config sent for
> the balloon device comes from
> include/standard-headers/linux/virtio_balloon.h and changed size between
> 3.1 and 4.0.
> As a "guess" I tried reverting that change (commented out the two last
> fields (and access to it in hw/virtio/virtio-balloon.c's
> virtio_balloon_get_config()), and then the migration seems to go through
> successfully.
> 
> I've since also rebuilt qemu without our patches (tags v3.0.1 and v4.0.0)
> and also tried with master (since dgilbert mentioned on irc remembering
> the issue and that there may have been a fix around), but got the same
> result.
> 
> Posting here now as dgilbert requested on irc.

Apologies for the delay, I was out last week.

Thanks for the command lines; I can confirm I can recreate it here using
a simplified version of your command line.

Dave

> Here are the commands used to start qemu:
>   Source:
>     /usr/bin/kvm \
>       -name randomclone \
>       -chardev 'socket,id=qmp,path=/var/run/qemu-server/101.qmp,server,nowait' \
>       -mon 'chardev=qmp,mode=control' \
>       -chardev 'socket,id=qmp-event,path=/var/run/qmeventd.sock,reconnect=5' \
>       -mon 'chardev=qmp-event,mode=control' \
>       -pidfile /var/run/qemu-server/101.pid \
>       -daemonize \
>       -smbios 'type=1,uuid=f3ab31f6-ca7d-469c-bf51-547fd9bbd2d9' \
>       -smp '4,sockets=1,cores=4,maxcpus=4' \
>       -nodefaults \
>       -boot 'menu=on,strict=on,reboot-timeout=1000,splash=/usr/share/qemu-server/bootsplash.jpg' \
>       -vnc unix:/var/run/qemu-server/101.vnc,password \
>       -cpu host,+pcid,+spec-ctrl,+ssbd,+pdpe1gb,+kvm_pv_unhalt,+kvm_pv_eoi \
>       -m 4096 \
>       -device 'pci-bridge,id=pci.2,chassis_nr=2,bus=pci.0,addr=0x1f' \
>       -device 'pci-bridge,id=pci.1,chassis_nr=1,bus=pci.0,addr=0x1e' \
>       -device 'vmgenid,guid=fb282779-7056-4f1d-96bb-70f578294e45' \
>       -device 'piix3-usb-uhci,id=uhci,bus=pci.0,addr=0x1.0x2' \
>       -device 'usb-tablet,id=tablet,bus=uhci.0,port=1' \
>       -device 'VGA,id=vga,bus=pci.0,addr=0x2' \
>       -device 'virtio-balloon-pci,id=balloon0,bus=pci.0,addr=0x3' \
>       -iscsi 'initiator-name=iqn.1993-08.org.debian:01:856d32b504d' \
>       -drive 'if=none,id=drive-ide2,media=cdrom,aio=threads' \
>       -device 'ide-cd,bus=ide.1,unit=0,drive=drive-ide2,id=ide2,bootindex=200' \
>       -device 'virtio-scsi-pci,id=scsihw0,bus=pci.0,addr=0x5' \
>       -drive 'file=rbd:rbd/vm-101-disk-0:conf=/etc/pve/ceph.conf:id=admin:keyring=/etc/pve/priv/ceph/rbd.keyring,if=none,id=drive-scsi0,discard=on,format=raw,cache=none,aio=native,detect-zeroes=unmap' \
>       -device 'scsi-hd,bus=scsihw0.0,channel=0,scsi-id=0,lun=0,drive=drive-scsi0,id=scsi0,rotation_rate=1,bootindex=100' \
>       -netdev 'type=tap,id=net0,ifname=tap101i0,script=/var/lib/qemu-server/pve-bridge,downscript=/var/lib/qemu-server/pve-bridgedown,vhost=on' \
>       -device 'virtio-net-pci,mac=4E:5D:50:75:4D:ED,netdev=net0,bus=pci.0,addr=0x12,id=net0,bootindex=300' \
>       -machine 'type=pc' \
>       -enable-kvm
> 
>   Destination:
>     /usr/bin/kvm \
>       -name randomclone \
>       -chardev socket,id=qmp,path=/var/run/qemu-server/101.qmp,server,nowait \
>       -mon chardev=qmp,mode=control \
>       -chardev socket,id=qmp-event,path=/var/run/qmeventd.sock,reconnect=5 \
>       -mon chardev=qmp-event,mode=control \
>       -pidfile /var/run/qemu-server/101.pid \
>       -smbios type=1,uuid=f3ab31f6-ca7d-469c-bf51-547fd9bbd2d9 \
>       -smp 4,sockets=1,cores=4,maxcpus=4 \
>       -nodefaults \
>       -boot menu=on,strict=on,reboot-timeout=1000,splash=/usr/share/qemu-server/bootsplash.jpg \
>       -vnc unix:/var/run/qemu-server/101.vnc,password \
>       -cpu host,+pcid,+spec-ctrl,+ssbd,+pdpe1gb,+kvm_pv_unhalt,+kvm_pv_eoi \
>       -m 4096 \
>       -device pci-bridge,id=pci.1,chassis_nr=1,bus=pci.0,addr=0x1e \
>       -device pci-bridge,id=pci.2,chassis_nr=2,bus=pci.0,addr=0x1f \
>       -device vmgenid,guid=fb282779-7056-4f1d-96bb-70f578294e45 \
>       -device piix3-usb-uhci,id=uhci,bus=pci.0,addr=0x1.0x2 \
>       -device usb-tablet,id=tablet,bus=uhci.0,port=1 \
>       -device VGA,id=vga,bus=pci.0,addr=0x2 \
>       -device virtio-balloon-pci,id=balloon0,bus=pci.0,addr=0x3 \
>       -iscsi initiator-name=iqn.1993-08.org.debian:01:ee4e4a566b \
>       -drive if=none,id=drive-ide2,media=cdrom,aio=threads \
>       -device ide-cd,bus=ide.1,unit=0,drive=drive-ide2,id=ide2,bootindex=200 \
>       -device virtio-scsi-pci,id=scsihw0,bus=pci.0,addr=0x5 \
>       -drive file=rbd:rbd/vm-101-disk-0:conf=/etc/pve/ceph.conf:id=admin:keyring=/etc/pve/priv/ceph/rbd.keyring,if=none,id=drive-scsi0,discard=on,format=raw,cache=none,aio=native,detect-zeroes=unmap \
>       -device scsi-hd,bus=scsihw0.0,channel=0,scsi-id=0,lun=0,drive=drive-scsi0,id=scsi0,rotation_rate=1,bootindex=100 \
>       -netdev type=tap,id=net0,ifname=tap101i0,script=/var/lib/qemu-server/pve-bridge,downscript=/var/lib/qemu-server/pve-bridgedown,vhost=on \
>       -device virtio-net-pci,mac=4E:5D:50:75:4D:ED,netdev=net0,bus=pci.0,addr=0x12,id=net0,bootindex=300 \
>       -machine type=pc-i440fx-3.0 \
>       -enable-kvm \
>       -incoming tcp:10.9.2.106:9989 \
>       -S
> 
> This is the exact test-change I made which seems to work around it, but
> a proper fix would be nicer. Not sure how, though.
> 
> ---8<---
> diff --git a/hw/virtio/virtio-balloon.c b/hw/virtio/virtio-balloon.c
> index d96e4aa96f..8d631d67a8 100644
> --- a/hw/virtio/virtio-balloon.c
> +++ b/hw/virtio/virtio-balloon.c
> @@ -623,16 +623,16 @@ static void virtio_balloon_get_config(VirtIODevice *vdev, uint8_t *config_data)
>      config.num_pages = cpu_to_le32(dev->num_pages);
>      config.actual = cpu_to_le32(dev->actual);
>  
> -    if (dev->free_page_report_status == FREE_PAGE_REPORT_S_REQUESTED) {
> -        config.free_page_report_cmd_id =
> -                       cpu_to_le32(dev->free_page_report_cmd_id);
> -    } else if (dev->free_page_report_status == FREE_PAGE_REPORT_S_STOP) {
> -        config.free_page_report_cmd_id =
> -                       cpu_to_le32(VIRTIO_BALLOON_CMD_ID_STOP);
> -    } else if (dev->free_page_report_status == FREE_PAGE_REPORT_S_DONE) {
> -        config.free_page_report_cmd_id =
> -                       cpu_to_le32(VIRTIO_BALLOON_CMD_ID_DONE);
> -    }
> +    //if (dev->free_page_report_status == FREE_PAGE_REPORT_S_REQUESTED) {
> +    //    config.free_page_report_cmd_id =
> +    //                   cpu_to_le32(dev->free_page_report_cmd_id);
> +    //} else if (dev->free_page_report_status == FREE_PAGE_REPORT_S_STOP) {
> +    //    config.free_page_report_cmd_id =
> +    //                   cpu_to_le32(VIRTIO_BALLOON_CMD_ID_STOP);
> +    //} else if (dev->free_page_report_status == FREE_PAGE_REPORT_S_DONE) {
> +    //    config.free_page_report_cmd_id =
> +    //                   cpu_to_le32(VIRTIO_BALLOON_CMD_ID_DONE);
> +    //}
>  
>      trace_virtio_balloon_get_config(config.num_pages, config.actual);
>      memcpy(config_data, &config, sizeof(struct virtio_balloon_config));
> diff --git a/include/standard-headers/linux/virtio_balloon.h b/include/standard-headers/linux/virtio_balloon.h
> index 9375ca2a70..86aca75972 100644
> --- a/include/standard-headers/linux/virtio_balloon.h
> +++ b/include/standard-headers/linux/virtio_balloon.h
> @@ -48,9 +48,9 @@ struct virtio_balloon_config {
>  	/* Number of pages we've actually got in balloon. */
>  	uint32_t actual;
>  	/* Free page report command id, readonly by guest */
> -	uint32_t free_page_report_cmd_id;
> -	/* Stores PAGE_POISON if page poisoning is in use */
> -	uint32_t poison_val;
> +	//uint32_t free_page_report_cmd_id;
> +	///* Stores PAGE_POISON if page poisoning is in use */
> +	//uint32_t poison_val;
>  };
>  
>  #define VIRTIO_BALLOON_S_SWAP_IN  0   /* Amount of memory swapped in */
> 
--
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
Dr. David Alan Gilbert July 9, 2019, 2:22 p.m. UTC | #4
* Wolfgang Bumiller (w.bumiller@proxmox.com) wrote:
> On Thu, Jun 27, 2019 at 03:12:52PM +0200, Wolfgang Bumiller wrote:
> > While testing with 4.0 we've run into issues with live migration from
> > 3.0.1 to 4.0 when a balloon device was involved.
> > 
> > We'd see the following error on the destination:
> >   qemu-system-x86_64: get_pci_config_device: Bad config data: i=0x10 read: a1 device: 1 cmask: ff wmask: c0 w1cmask:0 
> >   qemu-system-x86_64: Failed to load PCIDevice:config 
> >   qemu-system-x86_64: Failed to load virtio-balloon:virtio 
> >   qemu-system-x86_64: error while loading state for instance 0x0 of device '0000:00:03.0/virtio-balloon' 
> >   qemu-system-x86_64: load of migration failed: Invalid argument
> 
> So if I'm not mistaken this seems to cover the BAR containing the mapped
> address, which now has a stricter alignment requirement (since the
> config space crossed the 32 byte boundary and now is now 64 bytes and
> therefor requires a 64 byte alignment).
> (This in turn means that it only affects guests where the balloon's
> mapping isn't already 64 byte aligned.)

Right, I agree; thanks for figuring this out.

> I tested with the changes below meant to use the old configuration size
> for machines depending on their machine version... This seems to fix the
> problem for me here.

OK, can you post this as a separate mail please; cc'ing all who I've
added on this list.

> With this changing compatibility options I'm not sure if this is a
> desired upstream change since 4.0 is already released?

There's no great answer here - one of them is going to stay broken;
we can either fix migration earlier 4.0 and break migration to 4.0
or leave migration from earlier broken.
Let's add the  property at least - downstream I know I'll need
it.

As for your actual patch; it's way bigger than I expected - can't you
just use DEFINE_PROP - like for exmaple the 'disable-modern' in
virtio/virtio-pci.c ?

Stefan/mst: What do you reckon - should we:
   a) Fix it so migration with older qemu's works but break 4.0
   b) Or leave 4.0 working and keep older broken.

Dave

> ---8<---
> From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
> From: Wolfgang Bumiller <w.bumiller@proxmox.com>
> Date: Tue, 2 Jul 2019 09:23:43 +0200
> Subject: [PATCH] virtio-balloon: use smaller config on older guests
> 
> The increased config size changes its alignment requirements
> preventing guests from migrating from old qemu versions if
> their balloon mapping isn't stricter aligned.
> So base the default config size on the machine version.
> 
> Signed-off-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
> ---
>  hw/i386/pc.c                       |  2 ++
>  hw/virtio/virtio-balloon.c         | 52 +++++++++++++++++++++++++++---
>  include/hw/virtio/virtio-balloon.h |  1 +
>  3 files changed, 51 insertions(+), 4 deletions(-)
> 
> diff --git a/hw/i386/pc.c b/hw/i386/pc.c
> index 652eb72b2b..3676187a19 100644
> --- a/hw/i386/pc.c
> +++ b/hw/i386/pc.c
> @@ -77,6 +77,7 @@
>  #include "hw/i386/intel_iommu.h"
>  #include "hw/net/ne2000-isa.h"
>  #include "standard-headers/asm-x86/bootparam.h"
> +#include "hw/virtio/virtio-balloon.h"
>  
>  /* debug PC/ISA interrupts */
>  //#define DEBUG_IRQ
> @@ -137,6 +138,7 @@ GlobalProperty pc_compat_3_1[] = {
>      { "Icelake-Server" "-" TYPE_X86_CPU,      "mpx", "on" },
>      { "Cascadelake-Server" "-" TYPE_X86_CPU, "stepping", "5" },
>      { TYPE_X86_CPU, "x-intel-pt-auto-level", "off" },
> +    { TYPE_VIRTIO_BALLOON, "x-use-large-balloon-config", "off" },
>  };
>  const size_t pc_compat_3_1_len = G_N_ELEMENTS(pc_compat_3_1);
>  
> diff --git a/hw/virtio/virtio-balloon.c b/hw/virtio/virtio-balloon.c
> index d96e4aa96f..cd0f124fbb 100644
> --- a/hw/virtio/virtio-balloon.c
> +++ b/hw/virtio/virtio-balloon.c
> @@ -328,6 +328,28 @@ static void balloon_stats_set_poll_interval(Object *obj, Visitor *v,
>      balloon_stats_change_timer(s, 0);
>  }
>  
> +static void balloon_get_large_config(Object *obj, Visitor *v,
> +                                     const char *name, void *opaque,
> +                                     Error **errp)
> +{
> +    VirtIOBalloon *s = opaque;
> +    visit_type_bool(v, name, &s->use_large_config, errp);
> +}
> +
> +static void balloon_set_large_config(Object *obj, Visitor *v,
> +                                     const char *name, void *opaque,
> +                                     Error **errp)
> +{
> +    VirtIOBalloon *s = opaque;
> +    DeviceState *dev = DEVICE(s);
> +
> +    if (dev->realized) {
> +        error_setg(errp, "balloon config size cannot be changed at runtime");
> +    } else {
> +        visit_type_bool(v, name, &s->use_large_config, errp);
> +    }
> +}
> +
>  static void virtio_balloon_handle_output(VirtIODevice *vdev, VirtQueue *vq)
>  {
>      VirtIOBalloon *s = VIRTIO_BALLOON(vdev);
> @@ -526,6 +548,17 @@ static bool virtio_balloon_free_page_support(void *opaque)
>      return virtio_vdev_has_feature(vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT);
>  }
>  
> +static size_t virtio_balloon_config_size(void *opaque)
> +{
> +    VirtIOBalloon *s = opaque;
> +
> +    if (s->use_large_config) {
> +        return sizeof(struct virtio_balloon_config);
> +    } else {
> +        return offsetof(struct virtio_balloon_config, free_page_report_cmd_id);
> +    }
> +}
> +
>  static void virtio_balloon_free_page_start(VirtIOBalloon *s)
>  {
>      VirtIODevice *vdev = VIRTIO_DEVICE(s);
> @@ -635,7 +668,7 @@ static void virtio_balloon_get_config(VirtIODevice *vdev, uint8_t *config_data)
>      }
>  
>      trace_virtio_balloon_get_config(config.num_pages, config.actual);
> -    memcpy(config_data, &config, sizeof(struct virtio_balloon_config));
> +    memcpy(config_data, &config, virtio_balloon_config_size(dev));
>  }
>  
>  static int build_dimm_list(Object *obj, void *opaque)
> @@ -679,7 +712,7 @@ static void virtio_balloon_set_config(VirtIODevice *vdev,
>      uint32_t oldactual = dev->actual;
>      ram_addr_t vm_ram_size = get_current_ram_size();
>  
> -    memcpy(&config, config_data, sizeof(struct virtio_balloon_config));
> +    memcpy(&config, config_data, virtio_balloon_config_size(dev));
>      dev->actual = le32_to_cpu(config.actual);
>      if (dev->actual != oldactual) {
>          qapi_event_send_balloon_change(vm_ram_size -
> @@ -795,7 +828,7 @@ static void virtio_balloon_device_realize(DeviceState *dev, Error **errp)
>      int ret;
>  
>      virtio_init(vdev, "virtio-balloon", VIRTIO_ID_BALLOON,
> -                sizeof(struct virtio_balloon_config));
> +                virtio_balloon_config_size(s));
>  
>      ret = qemu_add_balloon_handler(virtio_balloon_to_target,
>                                     virtio_balloon_stat, s);
> @@ -820,7 +853,7 @@ static void virtio_balloon_device_realize(DeviceState *dev, Error **errp)
>          s->free_page_report_notify.notify =
>                                         virtio_balloon_free_page_report_notify;
>          precopy_add_notifier(&s->free_page_report_notify);
> -        if (s->iothread) {
> +        if (s->iothread && s->use_large_config) {
>              object_ref(OBJECT(s->iothread));
>              s->free_page_bh = aio_bh_new(iothread_get_aio_context(s->iothread),
>                                         virtio_ballloon_get_free_page_hints, s);
> @@ -833,6 +866,11 @@ static void virtio_balloon_device_realize(DeviceState *dev, Error **errp)
>              virtio_error(vdev, "iothread is missing");
>          }
>      }
> +
> +    if (!s->use_large_config) {
> +        s->host_features &= ~(1 << VIRTIO_BALLOON_F_PAGE_POISON);
> +    }
> +
>      reset_stats(s);
>  }
>  
> @@ -909,6 +947,12 @@ static void virtio_balloon_instance_init(Object *obj)
>                          balloon_stats_get_poll_interval,
>                          balloon_stats_set_poll_interval,
>                          NULL, s, NULL);
> +
> +    s->use_large_config = true;
> +    object_property_add(obj, "x-use-large-balloon-config", "bool",
> +                        balloon_get_large_config,
> +                        balloon_set_large_config,
> +                        NULL, s, NULL);
>  }
>  
>  static const VMStateDescription vmstate_virtio_balloon = {
> diff --git a/include/hw/virtio/virtio-balloon.h b/include/hw/virtio/virtio-balloon.h
> index 1afafb12f6..f212c08bd7 100644
> --- a/include/hw/virtio/virtio-balloon.h
> +++ b/include/hw/virtio/virtio-balloon.h
> @@ -45,6 +45,7 @@ enum virtio_balloon_free_page_report_status {
>  typedef struct VirtIOBalloon {
>      VirtIODevice parent_obj;
>      VirtQueue *ivq, *dvq, *svq, *free_page_vq;
> +    bool use_large_config;
>      uint32_t free_page_report_status;
>      uint32_t num_pages;
>      uint32_t actual;
> -- 
> 2.20.1
> 
> 
--
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
Stefan Hajnoczi July 10, 2019, 2:24 p.m. UTC | #5
On Tue, Jul 9, 2019 at 4:23 PM Dr. David Alan Gilbert
<dgilbert@redhat.com> wrote:
> * Wolfgang Bumiller (w.bumiller@proxmox.com) wrote:
> > On Thu, Jun 27, 2019 at 03:12:52PM +0200, Wolfgang Bumiller wrote:
> > With this changing compatibility options I'm not sure if this is a
> > desired upstream change since 4.0 is already released?
>
> There's no great answer here - one of them is going to stay broken;
> we can either fix migration earlier 4.0 and break migration to 4.0
> or leave migration from earlier broken.
> Let's add the  property at least - downstream I know I'll need
> it.
>
> As for your actual patch; it's way bigger than I expected - can't you
> just use DEFINE_PROP - like for exmaple the 'disable-modern' in
> virtio/virtio-pci.c ?
>
> Stefan/mst: What do you reckon - should we:
>    a) Fix it so migration with older qemu's works but break 4.0
>    b) Or leave 4.0 working and keep older broken.

I suggest we keep the 4.0 machine type as it is (with the larger
config size) but fix older machine types so they use the smaller
config size.

QEMU 4.1 and newer machine types should use a config size that depends
on the virtio-balloon features enabled.

With this approach 3.1 -> 4.1 and 4.0 -> 4.1 migration works.
However, migrating from QEMU 4.0 to 4.1 with a machine type earlier
than 4.0 will fail.  I think this is okay because 3.1 -> 4.0 already
failed in this case.

Please see the patch I have sent separately and let me know if you like it.

Stefan
diff mbox series

Patch

diff --git a/hw/virtio/virtio-balloon.c b/hw/virtio/virtio-balloon.c
index d96e4aa96f..8d631d67a8 100644
--- a/hw/virtio/virtio-balloon.c
+++ b/hw/virtio/virtio-balloon.c
@@ -623,16 +623,16 @@  static void virtio_balloon_get_config(VirtIODevice *vdev, uint8_t *config_data)
     config.num_pages = cpu_to_le32(dev->num_pages);
     config.actual = cpu_to_le32(dev->actual);
 
-    if (dev->free_page_report_status == FREE_PAGE_REPORT_S_REQUESTED) {
-        config.free_page_report_cmd_id =
-                       cpu_to_le32(dev->free_page_report_cmd_id);
-    } else if (dev->free_page_report_status == FREE_PAGE_REPORT_S_STOP) {
-        config.free_page_report_cmd_id =
-                       cpu_to_le32(VIRTIO_BALLOON_CMD_ID_STOP);
-    } else if (dev->free_page_report_status == FREE_PAGE_REPORT_S_DONE) {
-        config.free_page_report_cmd_id =
-                       cpu_to_le32(VIRTIO_BALLOON_CMD_ID_DONE);
-    }
+    //if (dev->free_page_report_status == FREE_PAGE_REPORT_S_REQUESTED) {
+    //    config.free_page_report_cmd_id =
+    //                   cpu_to_le32(dev->free_page_report_cmd_id);
+    //} else if (dev->free_page_report_status == FREE_PAGE_REPORT_S_STOP) {
+    //    config.free_page_report_cmd_id =
+    //                   cpu_to_le32(VIRTIO_BALLOON_CMD_ID_STOP);
+    //} else if (dev->free_page_report_status == FREE_PAGE_REPORT_S_DONE) {
+    //    config.free_page_report_cmd_id =
+    //                   cpu_to_le32(VIRTIO_BALLOON_CMD_ID_DONE);
+    //}
 
     trace_virtio_balloon_get_config(config.num_pages, config.actual);
     memcpy(config_data, &config, sizeof(struct virtio_balloon_config));
diff --git a/include/standard-headers/linux/virtio_balloon.h b/include/standard-headers/linux/virtio_balloon.h
index 9375ca2a70..86aca75972 100644
--- a/include/standard-headers/linux/virtio_balloon.h
+++ b/include/standard-headers/linux/virtio_balloon.h
@@ -48,9 +48,9 @@  struct virtio_balloon_config {
 	/* Number of pages we've actually got in balloon. */
 	uint32_t actual;
 	/* Free page report command id, readonly by guest */
-	uint32_t free_page_report_cmd_id;
-	/* Stores PAGE_POISON if page poisoning is in use */
-	uint32_t poison_val;
+	//uint32_t free_page_report_cmd_id;
+	///* Stores PAGE_POISON if page poisoning is in use */
+	//uint32_t poison_val;
 };
 
 #define VIRTIO_BALLOON_S_SWAP_IN  0   /* Amount of memory swapped in */