diff mbox series

[V2,26/45] migration: close kvm after cpr

Message ID 1739542467-226739-27-git-send-email-steven.sistare@oracle.com (mailing list archive)
State New
Headers show
Series Live update: vfio and iommufd | expand

Commit Message

Steven Sistare Feb. 14, 2025, 2:14 p.m. UTC
cpr-transfer breaks vfio network connectivity to and from the guest, and
the host system log shows:
  irq bypass consumer (token 00000000a03c32e5) registration fails: -16
which is EBUSY.  This occurs because KVM descriptors are still open in
the old QEMU process.  Close them.

Signed-off-by: Steve Sistare <steven.sistare@oracle.com>
---
 accel/kvm/kvm-all.c           | 28 ++++++++++++++++++++++++++++
 hw/vfio/common.c              |  8 ++++++++
 include/hw/vfio/vfio-common.h |  1 +
 include/migration/cpr.h       |  2 ++
 include/system/kvm.h          |  1 +
 migration/cpr-transfer.c      | 18 ++++++++++++++++++
 migration/cpr.c               |  8 ++++++++
 migration/migration.c         |  1 +
 8 files changed, 67 insertions(+)

Comments

Steven Sistare Feb. 14, 2025, 3:51 p.m. UTC | #1
cc kvm reviewers.

The series is here:
   https://lore.kernel.org/qemu-devel/1739542467-226739-1-git-send-email-steven.sistare@oracle.com/

- Steve

On 2/14/2025 9:14 AM, Steve Sistare wrote:
> cpr-transfer breaks vfio network connectivity to and from the guest, and
> the host system log shows:
>    irq bypass consumer (token 00000000a03c32e5) registration fails: -16
> which is EBUSY.  This occurs because KVM descriptors are still open in
> the old QEMU process.  Close them.
> 
> Signed-off-by: Steve Sistare <steven.sistare@oracle.com>
> ---
>   accel/kvm/kvm-all.c           | 28 ++++++++++++++++++++++++++++
>   hw/vfio/common.c              |  8 ++++++++
>   include/hw/vfio/vfio-common.h |  1 +
>   include/migration/cpr.h       |  2 ++
>   include/system/kvm.h          |  1 +
>   migration/cpr-transfer.c      | 18 ++++++++++++++++++
>   migration/cpr.c               |  8 ++++++++
>   migration/migration.c         |  1 +
>   8 files changed, 67 insertions(+)
> 
> diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
> index c65b790..cdbe91c 100644
> --- a/accel/kvm/kvm-all.c
> +++ b/accel/kvm/kvm-all.c
> @@ -507,16 +507,23 @@ static int do_kvm_destroy_vcpu(CPUState *cpu)
>           goto err;
>       }
>   
> +    /* If I am the CPU that created coalesced_mmio_ring, then discard it */
> +    if (s->coalesced_mmio_ring == (void *)cpu->kvm_run + PAGE_SIZE) {
> +        s->coalesced_mmio_ring = NULL;
> +    }
> +
>       ret = munmap(cpu->kvm_run, mmap_size);
>       if (ret < 0) {
>           goto err;
>       }
> +    cpu->kvm_run = NULL;
>   
>       if (cpu->kvm_dirty_gfns) {
>           ret = munmap(cpu->kvm_dirty_gfns, s->kvm_dirty_ring_bytes);
>           if (ret < 0) {
>               goto err;
>           }
> +        cpu->kvm_dirty_gfns = NULL;
>       }
>   
>       kvm_park_vcpu(cpu);
> @@ -595,6 +602,27 @@ err:
>       return ret;
>   }
>   
> +void kvm_close(void)
> +{
> +    CPUState *cpu;
> +
> +    CPU_FOREACH(cpu) {
> +        cpu_remove_sync(cpu);
> +        close(cpu->kvm_fd);
> +        cpu->kvm_fd = -1;
> +        close(cpu->kvm_vcpu_stats_fd);
> +        cpu->kvm_vcpu_stats_fd = -1;
> +    }
> +
> +    if (kvm_state && kvm_state->fd != -1) {
> +        close(kvm_state->vmfd);
> +        kvm_state->vmfd = -1;
> +        close(kvm_state->fd);
> +        kvm_state->fd = -1;
> +    }
> +    kvm_state = NULL;
> +}
> +
>   /*
>    * dirty pages logging control
>    */
> diff --git a/hw/vfio/common.c b/hw/vfio/common.c
> index 48663ad..c536698 100644
> --- a/hw/vfio/common.c
> +++ b/hw/vfio/common.c
> @@ -1501,6 +1501,14 @@ int vfio_kvm_device_del_fd(int fd, Error **errp)
>       return 0;
>   }
>   
> +void vfio_kvm_device_close(void)
> +{
> +    if (vfio_kvm_device_fd != -1) {
> +        close(vfio_kvm_device_fd);
> +        vfio_kvm_device_fd = -1;
> +    }
> +}
> +
>   VFIOAddressSpace *vfio_get_address_space(AddressSpace *as)
>   {
>       VFIOAddressSpace *space;
> diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
> index 1563f3a..78e4f12 100644
> --- a/include/hw/vfio/vfio-common.h
> +++ b/include/hw/vfio/vfio-common.h
> @@ -259,6 +259,7 @@ VFIODevice *vfio_get_vfio_device(Object *obj);
>   
>   int vfio_kvm_device_add_fd(int fd, Error **errp);
>   int vfio_kvm_device_del_fd(int fd, Error **errp);
> +void vfio_kvm_device_close(void);
>   
>   bool vfio_cpr_register_container(VFIOContainerBase *bcontainer, Error **errp);
>   void vfio_cpr_unregister_container(VFIOContainerBase *bcontainer);
> diff --git a/include/migration/cpr.h b/include/migration/cpr.h
> index 6ad04d4..c5c191d 100644
> --- a/include/migration/cpr.h
> +++ b/include/migration/cpr.h
> @@ -32,7 +32,9 @@ void cpr_state_close(void);
>   struct QIOChannel *cpr_state_ioc(void);
>   
>   bool cpr_needed_for_reuse(void *opaque);
> +void cpr_kvm_close(void);
>   
> +void cpr_transfer_init(void);
>   QEMUFile *cpr_transfer_output(MigrationChannel *channel, Error **errp);
>   QEMUFile *cpr_transfer_input(MigrationChannel *channel, Error **errp);
>   
> diff --git a/include/system/kvm.h b/include/system/kvm.h
> index ab17c09..ad5c55e 100644
> --- a/include/system/kvm.h
> +++ b/include/system/kvm.h
> @@ -194,6 +194,7 @@ bool kvm_has_sync_mmu(void);
>   int kvm_has_vcpu_events(void);
>   int kvm_max_nested_state_length(void);
>   int kvm_has_gsi_routing(void);
> +void kvm_close(void);
>   
>   /**
>    * kvm_arm_supports_user_irq
> diff --git a/migration/cpr-transfer.c b/migration/cpr-transfer.c
> index e1f1403..396558f 100644
> --- a/migration/cpr-transfer.c
> +++ b/migration/cpr-transfer.c
> @@ -17,6 +17,24 @@
>   #include "migration/vmstate.h"
>   #include "trace.h"
>   
> +static int cpr_transfer_notifier(NotifierWithReturn *notifier,
> +                                 MigrationEvent *e,
> +                                 Error **errp)
> +{
> +    if (e->type == MIG_EVENT_PRECOPY_DONE) {
> +        cpr_kvm_close();
> +    }
> +    return 0;
> +}
> +
> +void cpr_transfer_init(void)
> +{
> +    static NotifierWithReturn notifier;
> +
> +    migration_add_notifier_mode(&notifier, cpr_transfer_notifier,
> +                                MIG_MODE_CPR_TRANSFER);
> +}
> +
>   QEMUFile *cpr_transfer_output(MigrationChannel *channel, Error **errp)
>   {
>       MigrationAddress *addr = channel->addr;
> diff --git a/migration/cpr.c b/migration/cpr.c
> index 12c489b..351e12d 100644
> --- a/migration/cpr.c
> +++ b/migration/cpr.c
> @@ -7,12 +7,14 @@
>   
>   #include "qemu/osdep.h"
>   #include "qapi/error.h"
> +#include "hw/vfio/vfio-common.h"
>   #include "migration/cpr.h"
>   #include "migration/misc.h"
>   #include "migration/options.h"
>   #include "migration/qemu-file.h"
>   #include "migration/savevm.h"
>   #include "migration/vmstate.h"
> +#include "system/kvm.h"
>   #include "system/runstate.h"
>   #include "trace.h"
>   
> @@ -266,3 +268,9 @@ bool cpr_needed_for_reuse(void *opaque)
>       MigMode mode = migrate_mode();
>       return mode == MIG_MODE_CPR_TRANSFER;
>   }
> +
> +void cpr_kvm_close(void)
> +{
> +    kvm_close();
> +    vfio_kvm_device_close();
> +}
> diff --git a/migration/migration.c b/migration/migration.c
> index 3969285..bdc5255 100644
> --- a/migration/migration.c
> +++ b/migration/migration.c
> @@ -296,6 +296,7 @@ void migration_object_init(void)
>   
>       ram_mig_init();
>       dirty_bitmap_mig_init();
> +    cpr_transfer_init();
>   
>       /* Initialize cpu throttle timers */
>       cpu_throttle_init();
diff mbox series

Patch

diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
index c65b790..cdbe91c 100644
--- a/accel/kvm/kvm-all.c
+++ b/accel/kvm/kvm-all.c
@@ -507,16 +507,23 @@  static int do_kvm_destroy_vcpu(CPUState *cpu)
         goto err;
     }
 
+    /* If I am the CPU that created coalesced_mmio_ring, then discard it */
+    if (s->coalesced_mmio_ring == (void *)cpu->kvm_run + PAGE_SIZE) {
+        s->coalesced_mmio_ring = NULL;
+    }
+
     ret = munmap(cpu->kvm_run, mmap_size);
     if (ret < 0) {
         goto err;
     }
+    cpu->kvm_run = NULL;
 
     if (cpu->kvm_dirty_gfns) {
         ret = munmap(cpu->kvm_dirty_gfns, s->kvm_dirty_ring_bytes);
         if (ret < 0) {
             goto err;
         }
+        cpu->kvm_dirty_gfns = NULL;
     }
 
     kvm_park_vcpu(cpu);
@@ -595,6 +602,27 @@  err:
     return ret;
 }
 
+void kvm_close(void)
+{
+    CPUState *cpu;
+
+    CPU_FOREACH(cpu) {
+        cpu_remove_sync(cpu);
+        close(cpu->kvm_fd);
+        cpu->kvm_fd = -1;
+        close(cpu->kvm_vcpu_stats_fd);
+        cpu->kvm_vcpu_stats_fd = -1;
+    }
+
+    if (kvm_state && kvm_state->fd != -1) {
+        close(kvm_state->vmfd);
+        kvm_state->vmfd = -1;
+        close(kvm_state->fd);
+        kvm_state->fd = -1;
+    }
+    kvm_state = NULL;
+}
+
 /*
  * dirty pages logging control
  */
diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index 48663ad..c536698 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -1501,6 +1501,14 @@  int vfio_kvm_device_del_fd(int fd, Error **errp)
     return 0;
 }
 
+void vfio_kvm_device_close(void)
+{
+    if (vfio_kvm_device_fd != -1) {
+        close(vfio_kvm_device_fd);
+        vfio_kvm_device_fd = -1;
+    }
+}
+
 VFIOAddressSpace *vfio_get_address_space(AddressSpace *as)
 {
     VFIOAddressSpace *space;
diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
index 1563f3a..78e4f12 100644
--- a/include/hw/vfio/vfio-common.h
+++ b/include/hw/vfio/vfio-common.h
@@ -259,6 +259,7 @@  VFIODevice *vfio_get_vfio_device(Object *obj);
 
 int vfio_kvm_device_add_fd(int fd, Error **errp);
 int vfio_kvm_device_del_fd(int fd, Error **errp);
+void vfio_kvm_device_close(void);
 
 bool vfio_cpr_register_container(VFIOContainerBase *bcontainer, Error **errp);
 void vfio_cpr_unregister_container(VFIOContainerBase *bcontainer);
diff --git a/include/migration/cpr.h b/include/migration/cpr.h
index 6ad04d4..c5c191d 100644
--- a/include/migration/cpr.h
+++ b/include/migration/cpr.h
@@ -32,7 +32,9 @@  void cpr_state_close(void);
 struct QIOChannel *cpr_state_ioc(void);
 
 bool cpr_needed_for_reuse(void *opaque);
+void cpr_kvm_close(void);
 
+void cpr_transfer_init(void);
 QEMUFile *cpr_transfer_output(MigrationChannel *channel, Error **errp);
 QEMUFile *cpr_transfer_input(MigrationChannel *channel, Error **errp);
 
diff --git a/include/system/kvm.h b/include/system/kvm.h
index ab17c09..ad5c55e 100644
--- a/include/system/kvm.h
+++ b/include/system/kvm.h
@@ -194,6 +194,7 @@  bool kvm_has_sync_mmu(void);
 int kvm_has_vcpu_events(void);
 int kvm_max_nested_state_length(void);
 int kvm_has_gsi_routing(void);
+void kvm_close(void);
 
 /**
  * kvm_arm_supports_user_irq
diff --git a/migration/cpr-transfer.c b/migration/cpr-transfer.c
index e1f1403..396558f 100644
--- a/migration/cpr-transfer.c
+++ b/migration/cpr-transfer.c
@@ -17,6 +17,24 @@ 
 #include "migration/vmstate.h"
 #include "trace.h"
 
+static int cpr_transfer_notifier(NotifierWithReturn *notifier,
+                                 MigrationEvent *e,
+                                 Error **errp)
+{
+    if (e->type == MIG_EVENT_PRECOPY_DONE) {
+        cpr_kvm_close();
+    }
+    return 0;
+}
+
+void cpr_transfer_init(void)
+{
+    static NotifierWithReturn notifier;
+
+    migration_add_notifier_mode(&notifier, cpr_transfer_notifier,
+                                MIG_MODE_CPR_TRANSFER);
+}
+
 QEMUFile *cpr_transfer_output(MigrationChannel *channel, Error **errp)
 {
     MigrationAddress *addr = channel->addr;
diff --git a/migration/cpr.c b/migration/cpr.c
index 12c489b..351e12d 100644
--- a/migration/cpr.c
+++ b/migration/cpr.c
@@ -7,12 +7,14 @@ 
 
 #include "qemu/osdep.h"
 #include "qapi/error.h"
+#include "hw/vfio/vfio-common.h"
 #include "migration/cpr.h"
 #include "migration/misc.h"
 #include "migration/options.h"
 #include "migration/qemu-file.h"
 #include "migration/savevm.h"
 #include "migration/vmstate.h"
+#include "system/kvm.h"
 #include "system/runstate.h"
 #include "trace.h"
 
@@ -266,3 +268,9 @@  bool cpr_needed_for_reuse(void *opaque)
     MigMode mode = migrate_mode();
     return mode == MIG_MODE_CPR_TRANSFER;
 }
+
+void cpr_kvm_close(void)
+{
+    kvm_close();
+    vfio_kvm_device_close();
+}
diff --git a/migration/migration.c b/migration/migration.c
index 3969285..bdc5255 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -296,6 +296,7 @@  void migration_object_init(void)
 
     ram_mig_init();
     dirty_bitmap_mig_init();
+    cpr_transfer_init();
 
     /* Initialize cpu throttle timers */
     cpu_throttle_init();