diff mbox series

[05/33] migration: push Error **errp into qemu_loadvm_state_main()

Message ID 20210204171907.901471-6-berrange@redhat.com (mailing list archive)
State New, archived
Headers show
Series migration: capture error reports into Error object | expand

Commit Message

Daniel P. Berrangé Feb. 4, 2021, 5:18 p.m. UTC
This is an incremental step in converting vmstate loading code to report
via Error objects instead of printing directly to the console/monitor.

Signed-off-by: Daniel P. Berrangé <berrange@redhat.com>
---
 migration/colo.c   |  3 +-
 migration/savevm.c | 73 +++++++++++++++++++++++++++++++---------------
 migration/savevm.h |  3 +-
 3 files changed, 52 insertions(+), 27 deletions(-)

Comments

Dr. David Alan Gilbert Feb. 15, 2021, 6:35 p.m. UTC | #1
* Daniel P. Berrangé (berrange@redhat.com) wrote:
> This is an incremental step in converting vmstate loading code to report
> via Error objects instead of printing directly to the console/monitor.
> 
> Signed-off-by: Daniel P. Berrangé <berrange@redhat.com>
> ---
>  migration/colo.c   |  3 +-
>  migration/savevm.c | 73 +++++++++++++++++++++++++++++++---------------
>  migration/savevm.h |  3 +-
>  3 files changed, 52 insertions(+), 27 deletions(-)
> 
> diff --git a/migration/colo.c b/migration/colo.c
> index e344b7cf32..4a050ac579 100644
> --- a/migration/colo.c
> +++ b/migration/colo.c
> @@ -705,11 +705,10 @@ static void colo_incoming_process_checkpoint(MigrationIncomingState *mis,
>  
>      qemu_mutex_lock_iothread();
>      cpu_synchronize_all_states();
> -    ret = qemu_loadvm_state_main(mis->from_src_file, mis);
> +    ret = qemu_loadvm_state_main(mis->from_src_file, mis, errp);
>      qemu_mutex_unlock_iothread();
>  
>      if (ret < 0) {
> -        error_setg(errp, "Load VM's live state (ram) error");
>          return;
>      }
>  
> diff --git a/migration/savevm.c b/migration/savevm.c
> index dd41292d4e..e47aec435c 100644
> --- a/migration/savevm.c
> +++ b/migration/savevm.c
> @@ -1819,6 +1819,7 @@ static void *postcopy_ram_listen_thread(void *opaque)
>      QEMUFile *f = mis->from_src_file;
>      int load_res;
>      MigrationState *migr = migrate_get_current();
> +    Error *local_err = NULL;
>  
>      object_ref(OBJECT(migr));
>  
> @@ -1833,7 +1834,7 @@ static void *postcopy_ram_listen_thread(void *opaque)
>       * in qemu_file, and thus we must be blocking now.
>       */
>      qemu_file_set_blocking(f, true);
> -    load_res = qemu_loadvm_state_main(f, mis);
> +    load_res = qemu_loadvm_state_main(f, mis, &local_err);
>  
>      /*
>       * This is tricky, but, mis->from_src_file can change after it
> @@ -1849,6 +1850,7 @@ static void *postcopy_ram_listen_thread(void *opaque)
>      if (load_res < 0) {
>          qemu_file_set_error(f, load_res);
>          dirty_bitmap_mig_cancel_incoming();
> +        error_report_err(local_err);
>          if (postcopy_state_get() == POSTCOPY_INCOMING_RUNNING &&
>              !migrate_postcopy_ram() && migrate_dirty_bitmaps())
>          {
> @@ -1859,12 +1861,10 @@ static void *postcopy_ram_listen_thread(void *opaque)
>                           __func__, load_res);
>              load_res = 0; /* prevent further exit() */
>          } else {
> -            error_report("%s: loadvm failed: %d", __func__, load_res);
>              migrate_set_state(&mis->state, MIGRATION_STATUS_POSTCOPY_ACTIVE,
>                                             MIGRATION_STATUS_FAILED);
>          }
> -    }
> -    if (load_res >= 0) {
> +    } else {
>          /*
>           * This looks good, but it's possible that the device loading in the
>           * main thread hasn't finished yet, and so we might not be in 'RUN'
> @@ -2116,14 +2116,17 @@ static int loadvm_postcopy_handle_resume(MigrationIncomingState *mis)
>   * @mis: Incoming state
>   * @length: Length of packaged data to read
>   *
> - * Returns: Negative values on error
> - *
> + * Returns:
> + *   0: success
> + *   LOADVM_QUIT: success, but stop
> + *   -1: error
>   */
>  static int loadvm_handle_cmd_packaged(MigrationIncomingState *mis)
>  {
>      int ret;
>      size_t length;
>      QIOChannelBuffer *bioc;
> +    Error *local_err = NULL;
>  
>      length = qemu_get_be32(mis->from_src_file);
>      trace_loadvm_handle_cmd_packaged(length);
> @@ -2149,8 +2152,11 @@ static int loadvm_handle_cmd_packaged(MigrationIncomingState *mis)
>  
>      QEMUFile *packf = qemu_fopen_channel_input(QIO_CHANNEL(bioc));
>  
> -    ret = qemu_loadvm_state_main(packf, mis);
> +    ret = qemu_loadvm_state_main(packf, mis, &local_err);
>      trace_loadvm_handle_cmd_packaged_main(ret);
> +    if (ret < 0) {
> +        error_report_err(local_err);
> +    }
>      qemu_fclose(packf);
>      object_unref(OBJECT(bioc));
>  
> @@ -2568,7 +2574,14 @@ static bool postcopy_pause_incoming(MigrationIncomingState *mis)
>      return true;
>  }
>  
> -int qemu_loadvm_state_main(QEMUFile *f, MigrationIncomingState *mis)
> +/*
> + * Returns:
> + *   0: success
> + *   LOADVM_QUIT: success, but stop
> + *   -1: error
> + */
> +int qemu_loadvm_state_main(QEMUFile *f, MigrationIncomingState *mis,
> +                           Error **errp)
>  {
>      uint8_t section_type;
>      int ret = 0;
> @@ -2579,7 +2592,9 @@ retry:
>  
>          if (qemu_file_get_error(f)) {
>              ret = qemu_file_get_error(f);
> -            break;
> +            error_setg(errp,
> +                       "Failed to load device state section ID: %d", ret);

Can I ask why these don't use strerror(ret) ?

The test I'm running is, start a VM with an actual guest and a useful
amount of ram:

./x86_64-softmmu/qemu-system-x86_64 -M pc,accel=kvm -nographic -m 8G -drive if=virtio,file=/home/vmimages/fedora-33-nest.qcow

./x86_64-softmmu/qemu-system-x86_64 -M pc,accel=kvm -nographic -m 8G -drive if=virtio,file=/home/vmimages/fedora-33-nest.qcow -incoming tcp:0:4444

source:
  migrate_set_speed 1m
  migrate -d tcp:0:4444
  <Now quickly>
  migrate_cancel

In the old world I get:
qemu-system-x86_64: load of migration failed: Input/output error

In your world I get:
qemu-system-x86_64: Failed to load device state section ID: -5

(5 being EIO)

Dave


> +            goto out;
>          }
>  
>          trace_qemu_loadvm_state_section(section_type);
> @@ -2588,6 +2603,9 @@ retry:
>          case QEMU_VM_SECTION_FULL:
>              ret = qemu_loadvm_section_start_full(f, mis);
>              if (ret < 0) {
> +                error_setg(errp,
> +                           "Failed to load device state section start: %d",
> +                           ret);
>                  goto out;
>              }
>              break;
> @@ -2595,29 +2613,38 @@ retry:
>          case QEMU_VM_SECTION_END:
>              ret = qemu_loadvm_section_part_end(f, mis);
>              if (ret < 0) {
> +                error_setg(errp,
> +                           "Failed to load device state section end: %d", ret);
>                  goto out;
>              }
>              break;
>          case QEMU_VM_COMMAND:
>              ret = loadvm_process_command(f);
>              trace_qemu_loadvm_state_section_command(ret);
> -            if ((ret < 0) || (ret == LOADVM_QUIT)) {
> +            if (ret < 0) {
> +                error_setg(errp,
> +                           "Failed to load device state command: %d", ret);
> +                goto out;
> +            }
> +            if (ret == LOADVM_QUIT) {
>                  goto out;
>              }
>              break;
>          case QEMU_VM_EOF:
>              /* This is the end of migration */
> +            ret = 0;
>              goto out;
>          default:
> -            error_report("Unknown savevm section type %d", section_type);
> -            ret = -EINVAL;
> +            error_setg(errp,
> +                       "Unknown savevm section type %d", section_type);
> +            ret = -1;
>              goto out;
>          }
>      }
>  
>  out:
>      if (ret < 0) {
> -        qemu_file_set_error(f, ret);
> +        qemu_file_set_error(f, -EINVAL);
>  
>          /* Cancel bitmaps incoming regardless of recovery */
>          dirty_bitmap_mig_cancel_incoming();
> @@ -2643,6 +2670,12 @@ out:
>      return ret;
>  }
>  
> +/*
> + * Returns:
> + *   0: success
> + *   LOADVM_QUIT: success, but stop
> + *   -1: error
> + */
>  int qemu_loadvm_state(QEMUFile *f, Error **errp)
>  {
>      MigrationIncomingState *mis = migration_incoming_get_current();
> @@ -2662,17 +2695,12 @@ int qemu_loadvm_state(QEMUFile *f, Error **errp)
>  
>      cpu_synchronize_all_pre_loadvm();
>  
> -    ret = qemu_loadvm_state_main(f, mis);
> -    if (ret < 0) {
> -        error_setg(errp, "Error %d while loading VM state", ret);
> -        ret = -1;
> -    }
> +    ret = qemu_loadvm_state_main(f, mis, errp);
>      qemu_event_set(&mis->main_thread_load_event);
>  
>      trace_qemu_loadvm_state_post_main(ret);
>  
>      if (mis->have_listen_thread) {
> -        error_setg(errp, "Error %d while loading VM state", ret);
>          /* Listen thread still going, can't clean up yet */
>          return ret;
>      }
> @@ -2729,13 +2757,10 @@ int qemu_loadvm_state(QEMUFile *f, Error **errp)
>  int qemu_load_device_state(QEMUFile *f, Error **errp)
>  {
>      MigrationIncomingState *mis = migration_incoming_get_current();
> -    int ret;
>  
>      /* Load QEMU_VM_SECTION_FULL section */
> -    ret = qemu_loadvm_state_main(f, mis);
> -    if (ret < 0) {
> -        error_setg(errp, "Failed to load device state: %d", ret);
> -        return ret;
> +    if (qemu_loadvm_state_main(f, mis, errp) < 0) {
> +        return -1;
>      }
>  
>      cpu_synchronize_all_post_init();
> diff --git a/migration/savevm.h b/migration/savevm.h
> index c727bc103e..1cec83c729 100644
> --- a/migration/savevm.h
> +++ b/migration/savevm.h
> @@ -62,7 +62,8 @@ int qemu_save_device_state(QEMUFile *f);
>  
>  int qemu_loadvm_state(QEMUFile *f, Error **errp);
>  void qemu_loadvm_state_cleanup(void);
> -int qemu_loadvm_state_main(QEMUFile *f, MigrationIncomingState *mis);
> +int qemu_loadvm_state_main(QEMUFile *f, MigrationIncomingState *mis,
> +                           Error **errp);
>  int qemu_load_device_state(QEMUFile *f, Error **errp);
>  
>  #endif
> -- 
> 2.29.2
>
Daniel P. Berrangé Feb. 15, 2021, 6:58 p.m. UTC | #2
On Mon, Feb 15, 2021 at 06:35:15PM +0000, Dr. David Alan Gilbert wrote:
> * Daniel P. Berrangé (berrange@redhat.com) wrote:
> > This is an incremental step in converting vmstate loading code to report
> > via Error objects instead of printing directly to the console/monitor.
> > 
> > Signed-off-by: Daniel P. Berrangé <berrange@redhat.com>
> > ---
> >  migration/colo.c   |  3 +-
> >  migration/savevm.c | 73 +++++++++++++++++++++++++++++++---------------
> >  migration/savevm.h |  3 +-
> >  3 files changed, 52 insertions(+), 27 deletions(-)
> > 
> > diff --git a/migration/colo.c b/migration/colo.c
> > index e344b7cf32..4a050ac579 100644
> > --- a/migration/colo.c
> > +++ b/migration/colo.c
> > @@ -705,11 +705,10 @@ static void colo_incoming_process_checkpoint(MigrationIncomingState *mis,
> >  
> >      qemu_mutex_lock_iothread();
> >      cpu_synchronize_all_states();
> > -    ret = qemu_loadvm_state_main(mis->from_src_file, mis);
> > +    ret = qemu_loadvm_state_main(mis->from_src_file, mis, errp);
> >      qemu_mutex_unlock_iothread();
> >  
> >      if (ret < 0) {
> > -        error_setg(errp, "Load VM's live state (ram) error");
> >          return;
> >      }
> >  
> > diff --git a/migration/savevm.c b/migration/savevm.c
> > index dd41292d4e..e47aec435c 100644
> > --- a/migration/savevm.c
> > +++ b/migration/savevm.c
> > @@ -1819,6 +1819,7 @@ static void *postcopy_ram_listen_thread(void *opaque)
> >      QEMUFile *f = mis->from_src_file;
> >      int load_res;
> >      MigrationState *migr = migrate_get_current();
> > +    Error *local_err = NULL;
> >  
> >      object_ref(OBJECT(migr));
> >  
> > @@ -1833,7 +1834,7 @@ static void *postcopy_ram_listen_thread(void *opaque)
> >       * in qemu_file, and thus we must be blocking now.
> >       */
> >      qemu_file_set_blocking(f, true);
> > -    load_res = qemu_loadvm_state_main(f, mis);
> > +    load_res = qemu_loadvm_state_main(f, mis, &local_err);
> >  
> >      /*
> >       * This is tricky, but, mis->from_src_file can change after it
> > @@ -1849,6 +1850,7 @@ static void *postcopy_ram_listen_thread(void *opaque)
> >      if (load_res < 0) {
> >          qemu_file_set_error(f, load_res);
> >          dirty_bitmap_mig_cancel_incoming();
> > +        error_report_err(local_err);
> >          if (postcopy_state_get() == POSTCOPY_INCOMING_RUNNING &&
> >              !migrate_postcopy_ram() && migrate_dirty_bitmaps())
> >          {
> > @@ -1859,12 +1861,10 @@ static void *postcopy_ram_listen_thread(void *opaque)
> >                           __func__, load_res);
> >              load_res = 0; /* prevent further exit() */
> >          } else {
> > -            error_report("%s: loadvm failed: %d", __func__, load_res);
> >              migrate_set_state(&mis->state, MIGRATION_STATUS_POSTCOPY_ACTIVE,
> >                                             MIGRATION_STATUS_FAILED);
> >          }
> > -    }
> > -    if (load_res >= 0) {
> > +    } else {
> >          /*
> >           * This looks good, but it's possible that the device loading in the
> >           * main thread hasn't finished yet, and so we might not be in 'RUN'
> > @@ -2116,14 +2116,17 @@ static int loadvm_postcopy_handle_resume(MigrationIncomingState *mis)
> >   * @mis: Incoming state
> >   * @length: Length of packaged data to read
> >   *
> > - * Returns: Negative values on error
> > - *
> > + * Returns:
> > + *   0: success
> > + *   LOADVM_QUIT: success, but stop
> > + *   -1: error
> >   */
> >  static int loadvm_handle_cmd_packaged(MigrationIncomingState *mis)
> >  {
> >      int ret;
> >      size_t length;
> >      QIOChannelBuffer *bioc;
> > +    Error *local_err = NULL;
> >  
> >      length = qemu_get_be32(mis->from_src_file);
> >      trace_loadvm_handle_cmd_packaged(length);
> > @@ -2149,8 +2152,11 @@ static int loadvm_handle_cmd_packaged(MigrationIncomingState *mis)
> >  
> >      QEMUFile *packf = qemu_fopen_channel_input(QIO_CHANNEL(bioc));
> >  
> > -    ret = qemu_loadvm_state_main(packf, mis);
> > +    ret = qemu_loadvm_state_main(packf, mis, &local_err);
> >      trace_loadvm_handle_cmd_packaged_main(ret);
> > +    if (ret < 0) {
> > +        error_report_err(local_err);
> > +    }
> >      qemu_fclose(packf);
> >      object_unref(OBJECT(bioc));
> >  
> > @@ -2568,7 +2574,14 @@ static bool postcopy_pause_incoming(MigrationIncomingState *mis)
> >      return true;
> >  }
> >  
> > -int qemu_loadvm_state_main(QEMUFile *f, MigrationIncomingState *mis)
> > +/*
> > + * Returns:
> > + *   0: success
> > + *   LOADVM_QUIT: success, but stop
> > + *   -1: error
> > + */
> > +int qemu_loadvm_state_main(QEMUFile *f, MigrationIncomingState *mis,
> > +                           Error **errp)
> >  {
> >      uint8_t section_type;
> >      int ret = 0;
> > @@ -2579,7 +2592,9 @@ retry:
> >  
> >          if (qemu_file_get_error(f)) {
> >              ret = qemu_file_get_error(f);
> > -            break;
> > +            error_setg(errp,
> > +                       "Failed to load device state section ID: %d", ret);
> 
> Can I ask why these don't use strerror(ret) ?

No good reason.

> 
> The test I'm running is, start a VM with an actual guest and a useful
> amount of ram:
> 
> ./x86_64-softmmu/qemu-system-x86_64 -M pc,accel=kvm -nographic -m 8G -drive if=virtio,file=/home/vmimages/fedora-33-nest.qcow
> 
> ./x86_64-softmmu/qemu-system-x86_64 -M pc,accel=kvm -nographic -m 8G -drive if=virtio,file=/home/vmimages/fedora-33-nest.qcow -incoming tcp:0:4444
> 
> source:
>   migrate_set_speed 1m
>   migrate -d tcp:0:4444
>   <Now quickly>
>   migrate_cancel
> 
> In the old world I get:
> qemu-system-x86_64: load of migration failed: Input/output error
> 
> In your world I get:
> qemu-system-x86_64: Failed to load device state section ID: -5
> 
> (5 being EIO)

Yep, looks like I should fix that.


Regards,
Daniel
Daniel P. Berrangé March 11, 2021, 12:17 p.m. UTC | #3
On Mon, Feb 15, 2021 at 06:35:15PM +0000, Dr. David Alan Gilbert wrote:
> * Daniel P. Berrangé (berrange@redhat.com) wrote:
> > This is an incremental step in converting vmstate loading code to report
> > via Error objects instead of printing directly to the console/monitor.
> > 
> > Signed-off-by: Daniel P. Berrangé <berrange@redhat.com>
> > ---
> >  migration/colo.c   |  3 +-
> >  migration/savevm.c | 73 +++++++++++++++++++++++++++++++---------------
> >  migration/savevm.h |  3 +-
> >  3 files changed, 52 insertions(+), 27 deletions(-)
> > 
> > diff --git a/migration/colo.c b/migration/colo.c
> > index e344b7cf32..4a050ac579 100644
> > --- a/migration/colo.c
> > +++ b/migration/colo.c
> > @@ -705,11 +705,10 @@ static void colo_incoming_process_checkpoint(MigrationIncomingState *mis,
> >  
> >      qemu_mutex_lock_iothread();
> >      cpu_synchronize_all_states();
> > -    ret = qemu_loadvm_state_main(mis->from_src_file, mis);
> > +    ret = qemu_loadvm_state_main(mis->from_src_file, mis, errp);
> >      qemu_mutex_unlock_iothread();
> >  
> >      if (ret < 0) {
> > -        error_setg(errp, "Load VM's live state (ram) error");
> >          return;
> >      }
> >  
> > diff --git a/migration/savevm.c b/migration/savevm.c
> > index dd41292d4e..e47aec435c 100644
> > --- a/migration/savevm.c
> > +++ b/migration/savevm.c
> > @@ -1819,6 +1819,7 @@ static void *postcopy_ram_listen_thread(void *opaque)
> >      QEMUFile *f = mis->from_src_file;
> >      int load_res;
> >      MigrationState *migr = migrate_get_current();
> > +    Error *local_err = NULL;
> >  
> >      object_ref(OBJECT(migr));
> >  
> > @@ -1833,7 +1834,7 @@ static void *postcopy_ram_listen_thread(void *opaque)
> >       * in qemu_file, and thus we must be blocking now.
> >       */
> >      qemu_file_set_blocking(f, true);
> > -    load_res = qemu_loadvm_state_main(f, mis);
> > +    load_res = qemu_loadvm_state_main(f, mis, &local_err);
> >  
> >      /*
> >       * This is tricky, but, mis->from_src_file can change after it
> > @@ -1849,6 +1850,7 @@ static void *postcopy_ram_listen_thread(void *opaque)
> >      if (load_res < 0) {
> >          qemu_file_set_error(f, load_res);
> >          dirty_bitmap_mig_cancel_incoming();
> > +        error_report_err(local_err);
> >          if (postcopy_state_get() == POSTCOPY_INCOMING_RUNNING &&
> >              !migrate_postcopy_ram() && migrate_dirty_bitmaps())
> >          {
> > @@ -1859,12 +1861,10 @@ static void *postcopy_ram_listen_thread(void *opaque)
> >                           __func__, load_res);
> >              load_res = 0; /* prevent further exit() */
> >          } else {
> > -            error_report("%s: loadvm failed: %d", __func__, load_res);
> >              migrate_set_state(&mis->state, MIGRATION_STATUS_POSTCOPY_ACTIVE,
> >                                             MIGRATION_STATUS_FAILED);
> >          }
> > -    }
> > -    if (load_res >= 0) {
> > +    } else {
> >          /*
> >           * This looks good, but it's possible that the device loading in the
> >           * main thread hasn't finished yet, and so we might not be in 'RUN'
> > @@ -2116,14 +2116,17 @@ static int loadvm_postcopy_handle_resume(MigrationIncomingState *mis)
> >   * @mis: Incoming state
> >   * @length: Length of packaged data to read
> >   *
> > - * Returns: Negative values on error
> > - *
> > + * Returns:
> > + *   0: success
> > + *   LOADVM_QUIT: success, but stop
> > + *   -1: error
> >   */
> >  static int loadvm_handle_cmd_packaged(MigrationIncomingState *mis)
> >  {
> >      int ret;
> >      size_t length;
> >      QIOChannelBuffer *bioc;
> > +    Error *local_err = NULL;
> >  
> >      length = qemu_get_be32(mis->from_src_file);
> >      trace_loadvm_handle_cmd_packaged(length);
> > @@ -2149,8 +2152,11 @@ static int loadvm_handle_cmd_packaged(MigrationIncomingState *mis)
> >  
> >      QEMUFile *packf = qemu_fopen_channel_input(QIO_CHANNEL(bioc));
> >  
> > -    ret = qemu_loadvm_state_main(packf, mis);
> > +    ret = qemu_loadvm_state_main(packf, mis, &local_err);
> >      trace_loadvm_handle_cmd_packaged_main(ret);
> > +    if (ret < 0) {
> > +        error_report_err(local_err);
> > +    }
> >      qemu_fclose(packf);
> >      object_unref(OBJECT(bioc));
> >  
> > @@ -2568,7 +2574,14 @@ static bool postcopy_pause_incoming(MigrationIncomingState *mis)
> >      return true;
> >  }
> >  
> > -int qemu_loadvm_state_main(QEMUFile *f, MigrationIncomingState *mis)
> > +/*
> > + * Returns:
> > + *   0: success
> > + *   LOADVM_QUIT: success, but stop
> > + *   -1: error
> > + */
> > +int qemu_loadvm_state_main(QEMUFile *f, MigrationIncomingState *mis,
> > +                           Error **errp)
> >  {
> >      uint8_t section_type;
> >      int ret = 0;
> > @@ -2579,7 +2592,9 @@ retry:
> >  
> >          if (qemu_file_get_error(f)) {
> >              ret = qemu_file_get_error(f);
> > -            break;
> > +            error_setg(errp,
> > +                       "Failed to load device state section ID: %d", ret);
> 
> Can I ask why these don't use strerror(ret) ?
> 
> The test I'm running is, start a VM with an actual guest and a useful
> amount of ram:
> 
> ./x86_64-softmmu/qemu-system-x86_64 -M pc,accel=kvm -nographic -m 8G -drive if=virtio,file=/home/vmimages/fedora-33-nest.qcow
> 
> ./x86_64-softmmu/qemu-system-x86_64 -M pc,accel=kvm -nographic -m 8G -drive if=virtio,file=/home/vmimages/fedora-33-nest.qcow -incoming tcp:0:4444
> 
> source:
>   migrate_set_speed 1m
>   migrate -d tcp:0:4444
>   <Now quickly>
>   migrate_cancel
> 
> In the old world I get:
> qemu-system-x86_64: load of migration failed: Input/output error
> 
> In your world I get:
> qemu-system-x86_64: Failed to load device state section ID: -5
> 
> (5 being EIO)

Ok, so it looks like I do indeed need to pay more attention to
correctly using error_setg_errno() instead of error_setg(), as
Philippe suggested in the earlier patches.


Regards,
Daniel
diff mbox series

Patch

diff --git a/migration/colo.c b/migration/colo.c
index e344b7cf32..4a050ac579 100644
--- a/migration/colo.c
+++ b/migration/colo.c
@@ -705,11 +705,10 @@  static void colo_incoming_process_checkpoint(MigrationIncomingState *mis,
 
     qemu_mutex_lock_iothread();
     cpu_synchronize_all_states();
-    ret = qemu_loadvm_state_main(mis->from_src_file, mis);
+    ret = qemu_loadvm_state_main(mis->from_src_file, mis, errp);
     qemu_mutex_unlock_iothread();
 
     if (ret < 0) {
-        error_setg(errp, "Load VM's live state (ram) error");
         return;
     }
 
diff --git a/migration/savevm.c b/migration/savevm.c
index dd41292d4e..e47aec435c 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -1819,6 +1819,7 @@  static void *postcopy_ram_listen_thread(void *opaque)
     QEMUFile *f = mis->from_src_file;
     int load_res;
     MigrationState *migr = migrate_get_current();
+    Error *local_err = NULL;
 
     object_ref(OBJECT(migr));
 
@@ -1833,7 +1834,7 @@  static void *postcopy_ram_listen_thread(void *opaque)
      * in qemu_file, and thus we must be blocking now.
      */
     qemu_file_set_blocking(f, true);
-    load_res = qemu_loadvm_state_main(f, mis);
+    load_res = qemu_loadvm_state_main(f, mis, &local_err);
 
     /*
      * This is tricky, but, mis->from_src_file can change after it
@@ -1849,6 +1850,7 @@  static void *postcopy_ram_listen_thread(void *opaque)
     if (load_res < 0) {
         qemu_file_set_error(f, load_res);
         dirty_bitmap_mig_cancel_incoming();
+        error_report_err(local_err);
         if (postcopy_state_get() == POSTCOPY_INCOMING_RUNNING &&
             !migrate_postcopy_ram() && migrate_dirty_bitmaps())
         {
@@ -1859,12 +1861,10 @@  static void *postcopy_ram_listen_thread(void *opaque)
                          __func__, load_res);
             load_res = 0; /* prevent further exit() */
         } else {
-            error_report("%s: loadvm failed: %d", __func__, load_res);
             migrate_set_state(&mis->state, MIGRATION_STATUS_POSTCOPY_ACTIVE,
                                            MIGRATION_STATUS_FAILED);
         }
-    }
-    if (load_res >= 0) {
+    } else {
         /*
          * This looks good, but it's possible that the device loading in the
          * main thread hasn't finished yet, and so we might not be in 'RUN'
@@ -2116,14 +2116,17 @@  static int loadvm_postcopy_handle_resume(MigrationIncomingState *mis)
  * @mis: Incoming state
  * @length: Length of packaged data to read
  *
- * Returns: Negative values on error
- *
+ * Returns:
+ *   0: success
+ *   LOADVM_QUIT: success, but stop
+ *   -1: error
  */
 static int loadvm_handle_cmd_packaged(MigrationIncomingState *mis)
 {
     int ret;
     size_t length;
     QIOChannelBuffer *bioc;
+    Error *local_err = NULL;
 
     length = qemu_get_be32(mis->from_src_file);
     trace_loadvm_handle_cmd_packaged(length);
@@ -2149,8 +2152,11 @@  static int loadvm_handle_cmd_packaged(MigrationIncomingState *mis)
 
     QEMUFile *packf = qemu_fopen_channel_input(QIO_CHANNEL(bioc));
 
-    ret = qemu_loadvm_state_main(packf, mis);
+    ret = qemu_loadvm_state_main(packf, mis, &local_err);
     trace_loadvm_handle_cmd_packaged_main(ret);
+    if (ret < 0) {
+        error_report_err(local_err);
+    }
     qemu_fclose(packf);
     object_unref(OBJECT(bioc));
 
@@ -2568,7 +2574,14 @@  static bool postcopy_pause_incoming(MigrationIncomingState *mis)
     return true;
 }
 
-int qemu_loadvm_state_main(QEMUFile *f, MigrationIncomingState *mis)
+/*
+ * Returns:
+ *   0: success
+ *   LOADVM_QUIT: success, but stop
+ *   -1: error
+ */
+int qemu_loadvm_state_main(QEMUFile *f, MigrationIncomingState *mis,
+                           Error **errp)
 {
     uint8_t section_type;
     int ret = 0;
@@ -2579,7 +2592,9 @@  retry:
 
         if (qemu_file_get_error(f)) {
             ret = qemu_file_get_error(f);
-            break;
+            error_setg(errp,
+                       "Failed to load device state section ID: %d", ret);
+            goto out;
         }
 
         trace_qemu_loadvm_state_section(section_type);
@@ -2588,6 +2603,9 @@  retry:
         case QEMU_VM_SECTION_FULL:
             ret = qemu_loadvm_section_start_full(f, mis);
             if (ret < 0) {
+                error_setg(errp,
+                           "Failed to load device state section start: %d",
+                           ret);
                 goto out;
             }
             break;
@@ -2595,29 +2613,38 @@  retry:
         case QEMU_VM_SECTION_END:
             ret = qemu_loadvm_section_part_end(f, mis);
             if (ret < 0) {
+                error_setg(errp,
+                           "Failed to load device state section end: %d", ret);
                 goto out;
             }
             break;
         case QEMU_VM_COMMAND:
             ret = loadvm_process_command(f);
             trace_qemu_loadvm_state_section_command(ret);
-            if ((ret < 0) || (ret == LOADVM_QUIT)) {
+            if (ret < 0) {
+                error_setg(errp,
+                           "Failed to load device state command: %d", ret);
+                goto out;
+            }
+            if (ret == LOADVM_QUIT) {
                 goto out;
             }
             break;
         case QEMU_VM_EOF:
             /* This is the end of migration */
+            ret = 0;
             goto out;
         default:
-            error_report("Unknown savevm section type %d", section_type);
-            ret = -EINVAL;
+            error_setg(errp,
+                       "Unknown savevm section type %d", section_type);
+            ret = -1;
             goto out;
         }
     }
 
 out:
     if (ret < 0) {
-        qemu_file_set_error(f, ret);
+        qemu_file_set_error(f, -EINVAL);
 
         /* Cancel bitmaps incoming regardless of recovery */
         dirty_bitmap_mig_cancel_incoming();
@@ -2643,6 +2670,12 @@  out:
     return ret;
 }
 
+/*
+ * Returns:
+ *   0: success
+ *   LOADVM_QUIT: success, but stop
+ *   -1: error
+ */
 int qemu_loadvm_state(QEMUFile *f, Error **errp)
 {
     MigrationIncomingState *mis = migration_incoming_get_current();
@@ -2662,17 +2695,12 @@  int qemu_loadvm_state(QEMUFile *f, Error **errp)
 
     cpu_synchronize_all_pre_loadvm();
 
-    ret = qemu_loadvm_state_main(f, mis);
-    if (ret < 0) {
-        error_setg(errp, "Error %d while loading VM state", ret);
-        ret = -1;
-    }
+    ret = qemu_loadvm_state_main(f, mis, errp);
     qemu_event_set(&mis->main_thread_load_event);
 
     trace_qemu_loadvm_state_post_main(ret);
 
     if (mis->have_listen_thread) {
-        error_setg(errp, "Error %d while loading VM state", ret);
         /* Listen thread still going, can't clean up yet */
         return ret;
     }
@@ -2729,13 +2757,10 @@  int qemu_loadvm_state(QEMUFile *f, Error **errp)
 int qemu_load_device_state(QEMUFile *f, Error **errp)
 {
     MigrationIncomingState *mis = migration_incoming_get_current();
-    int ret;
 
     /* Load QEMU_VM_SECTION_FULL section */
-    ret = qemu_loadvm_state_main(f, mis);
-    if (ret < 0) {
-        error_setg(errp, "Failed to load device state: %d", ret);
-        return ret;
+    if (qemu_loadvm_state_main(f, mis, errp) < 0) {
+        return -1;
     }
 
     cpu_synchronize_all_post_init();
diff --git a/migration/savevm.h b/migration/savevm.h
index c727bc103e..1cec83c729 100644
--- a/migration/savevm.h
+++ b/migration/savevm.h
@@ -62,7 +62,8 @@  int qemu_save_device_state(QEMUFile *f);
 
 int qemu_loadvm_state(QEMUFile *f, Error **errp);
 void qemu_loadvm_state_cleanup(void);
-int qemu_loadvm_state_main(QEMUFile *f, MigrationIncomingState *mis);
+int qemu_loadvm_state_main(QEMUFile *f, MigrationIncomingState *mis,
+                           Error **errp);
 int qemu_load_device_state(QEMUFile *f, Error **errp);
 
 #endif