diff mbox series

[v26,09/17] vfio: Add load state functions to SaveVMHandlers

Message ID 1600817059-26721-10-git-send-email-kwankhede@nvidia.com (mailing list archive)
State New, archived
Headers show
Series Add migration support for VFIO devices | expand

Commit Message

Kirti Wankhede Sept. 22, 2020, 11:24 p.m. UTC
Sequence  during _RESUMING device state:
While data for this device is available, repeat below steps:
a. read data_offset from where user application should write data.
b. write data of data_size to migration region from data_offset.
c. write data_size which indicates vendor driver that data is written in
   staging buffer.

For user, data is opaque. User should write data in the same order as
received.

Signed-off-by: Kirti Wankhede <kwankhede@nvidia.com>
Reviewed-by: Neo Jia <cjia@nvidia.com>
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
---
 hw/vfio/migration.c  | 170 +++++++++++++++++++++++++++++++++++++++++++++++++++
 hw/vfio/trace-events |   3 +
 2 files changed, 173 insertions(+)

Comments

Cornelia Huck Oct. 1, 2020, 10:07 a.m. UTC | #1
On Wed, 23 Sep 2020 04:54:11 +0530
Kirti Wankhede <kwankhede@nvidia.com> wrote:

> Sequence  during _RESUMING device state:
> While data for this device is available, repeat below steps:
> a. read data_offset from where user application should write data.
> b. write data of data_size to migration region from data_offset.
> c. write data_size which indicates vendor driver that data is written in
>    staging buffer.
> 
> For user, data is opaque. User should write data in the same order as
> received.
> 
> Signed-off-by: Kirti Wankhede <kwankhede@nvidia.com>
> Reviewed-by: Neo Jia <cjia@nvidia.com>
> Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
> ---
>  hw/vfio/migration.c  | 170 +++++++++++++++++++++++++++++++++++++++++++++++++++
>  hw/vfio/trace-events |   3 +
>  2 files changed, 173 insertions(+)
> 
> diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
> index 4611bb972228..ffd70282dd0e 100644
> --- a/hw/vfio/migration.c
> +++ b/hw/vfio/migration.c
> @@ -328,6 +328,33 @@ static int vfio_save_device_config_state(QEMUFile *f, void *opaque)
>      return qemu_file_get_error(f);
>  }
>  
> +static int vfio_load_device_config_state(QEMUFile *f, void *opaque)
> +{
> +    VFIODevice *vbasedev = opaque;
> +    uint64_t data;
> +
> +    if (vbasedev->ops && vbasedev->ops->vfio_load_config) {
> +        int ret;
> +
> +        ret = vbasedev->ops->vfio_load_config(vbasedev, f);
> +        if (ret) {
> +            error_report("%s: Failed to load device config space",
> +                         vbasedev->name);
> +            return ret;
> +        }
> +    }
> +
> +    data = qemu_get_be64(f);
> +    if (data != VFIO_MIG_FLAG_END_OF_STATE) {
> +        error_report("%s: Failed loading device config space, "
> +                     "end flag incorrect 0x%"PRIx64, vbasedev->name, data);

I'm confused here: If we don't have a vfio_load_config callback, or if
that callback did not read everything, we also might end up with a
value that's not END_OF_STATE... in that case, the problem is not with
the stream, but rather with the consumer?

> +        return -EINVAL;
> +    }
> +
> +    trace_vfio_load_device_config_state(vbasedev->name);
> +    return qemu_file_get_error(f);
> +}
> +
>  /* ---------------------------------------------------------------------- */
>  
>  static int vfio_save_setup(QEMUFile *f, void *opaque)
> @@ -502,12 +529,155 @@ static int vfio_save_complete_precopy(QEMUFile *f, void *opaque)
>      return ret;
>  }
>  
> +static int vfio_load_setup(QEMUFile *f, void *opaque)
> +{
> +    VFIODevice *vbasedev = opaque;
> +    VFIOMigration *migration = vbasedev->migration;
> +    int ret = 0;
> +
> +    if (migration->region.mmaps) {
> +        ret = vfio_region_mmap(&migration->region);
> +        if (ret) {
> +            error_report("%s: Failed to mmap VFIO migration region %d: %s",
> +                         vbasedev->name, migration->region.nr,
> +                         strerror(-ret));
> +            error_report("%s: Falling back to slow path", vbasedev->name);
> +        }
> +    }
> +
> +    ret = vfio_migration_set_state(vbasedev, ~VFIO_DEVICE_STATE_MASK,
> +                                   VFIO_DEVICE_STATE_RESUMING);
> +    if (ret) {
> +        error_report("%s: Failed to set state RESUMING", vbasedev->name);
> +    }
> +    return ret;

If I follow the code correctly, the cleanup callback will not be
invoked if you return != 0 here... should you clean up possible
mappings on error here?

> +}
> +
> +static int vfio_load_cleanup(void *opaque)
> +{
> +    vfio_save_cleanup(opaque);
> +    return 0;
> +}
> +
> +static int vfio_load_state(QEMUFile *f, void *opaque, int version_id)
> +{
> +    VFIODevice *vbasedev = opaque;
> +    VFIOMigration *migration = vbasedev->migration;
> +    int ret = 0;
> +    uint64_t data, data_size;
> +
> +    data = qemu_get_be64(f);
> +    while (data != VFIO_MIG_FLAG_END_OF_STATE) {
> +
> +        trace_vfio_load_state(vbasedev->name, data);
> +
> +        switch (data) {
> +        case VFIO_MIG_FLAG_DEV_CONFIG_STATE:
> +        {
> +            ret = vfio_load_device_config_state(f, opaque);
> +            if (ret) {
> +                return ret;
> +            }
> +            break;
> +        }
> +        case VFIO_MIG_FLAG_DEV_SETUP_STATE:
> +        {
> +            data = qemu_get_be64(f);
> +            if (data == VFIO_MIG_FLAG_END_OF_STATE) {
> +                return ret;
> +            } else {
> +                error_report("%s: SETUP STATE: EOS not found 0x%"PRIx64,
> +                             vbasedev->name, data);
> +                return -EINVAL;
> +            }
> +            break;
> +        }
> +        case VFIO_MIG_FLAG_DEV_DATA_STATE:
> +        {
> +            VFIORegion *region = &migration->region;
> +            uint64_t data_offset = 0, size;

I think this function would benefit from splitting this off into a
function handling DEV_DATA_STATE. It is quite hard to follow through
all the checks and find out when we continue, and when we break off.

Some documentation about the markers would also be really helpful.
The logic seems to be:
- DEV_CONFIG_STATE has config data and must be ended by END_OF_STATE
- DEV_SETUP_STATE has only END_OF_STATE, no data
- DEV_DATA_STATE has... data; if there's any END_OF_STATE, it's buried
  far down in the called functions


> +
> +            data_size = size = qemu_get_be64(f);
> +            if (data_size == 0) {
> +                break;
> +            }
> +
> +            ret = vfio_mig_read(vbasedev, &data_offset, sizeof(data_offset),
> +                                region->fd_offset +
> +                                offsetof(struct vfio_device_migration_info,
> +                                         data_offset));
> +            if (ret < 0) {
> +                return ret;
> +            }
> +
> +            trace_vfio_load_state_device_data(vbasedev->name, data_offset,
> +                                              data_size);
> +
> +            while (size) {
> +                void *buf = NULL;
> +                uint64_t sec_size;
> +                bool buf_alloc = false;
> +
> +                buf = get_data_section_size(region, data_offset, size,
> +                                            &sec_size);
> +
> +                if (!buf) {
> +                    buf = g_try_malloc(sec_size);
> +                    if (!buf) {
> +                        error_report("%s: Error allocating buffer ", __func__);
> +                        return -ENOMEM;
> +                    }
> +                    buf_alloc = true;
> +                }
> +
> +                qemu_get_buffer(f, buf, sec_size);
> +
> +                if (buf_alloc) {
> +                    ret = vfio_mig_write(vbasedev, buf, sec_size,
> +                                         region->fd_offset + data_offset);
> +                    g_free(buf);
> +
> +                    if (ret < 0) {
> +                        return ret;
> +                    }
> +                }
> +                size -= sec_size;
> +                data_offset += sec_size;
> +            }
> +
> +            ret = vfio_mig_write(vbasedev, &data_size, sizeof(data_size),
> +                                 region->fd_offset +
> +                       offsetof(struct vfio_device_migration_info, data_size));
> +            if (ret < 0) {
> +                return ret;
> +            }
> +            break;
> +        }
> +
> +        default:
> +            error_report("%s: Unknown tag 0x%"PRIx64, vbasedev->name, data);
> +            return -EINVAL;
> +        }
> +
> +        data = qemu_get_be64(f);
> +        ret = qemu_file_get_error(f);
> +        if (ret) {
> +            return ret;
> +        }
> +    }
> +
> +    return ret;
> +}
> +
>  static SaveVMHandlers savevm_vfio_handlers = {
>      .save_setup = vfio_save_setup,
>      .save_cleanup = vfio_save_cleanup,
>      .save_live_pending = vfio_save_pending,
>      .save_live_iterate = vfio_save_iterate,
>      .save_live_complete_precopy = vfio_save_complete_precopy,
> +    .load_setup = vfio_load_setup,
> +    .load_cleanup = vfio_load_cleanup,

Unrelated to this patch: It's a bit odd that load_cleanup() (unlike
save_cleanup()) has a return code (that seems unused).

> +    .load_state = vfio_load_state,
>  };
>  
>  /* ---------------------------------------------------------------------- */
Kirti Wankhede Oct. 18, 2020, 8:47 p.m. UTC | #2
On 10/1/2020 3:37 PM, Cornelia Huck wrote:
> On Wed, 23 Sep 2020 04:54:11 +0530
> Kirti Wankhede <kwankhede@nvidia.com> wrote:
> 
>> Sequence  during _RESUMING device state:
>> While data for this device is available, repeat below steps:
>> a. read data_offset from where user application should write data.
>> b. write data of data_size to migration region from data_offset.
>> c. write data_size which indicates vendor driver that data is written in
>>     staging buffer.
>>
>> For user, data is opaque. User should write data in the same order as
>> received.
>>
>> Signed-off-by: Kirti Wankhede <kwankhede@nvidia.com>
>> Reviewed-by: Neo Jia <cjia@nvidia.com>
>> Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
>> ---
>>   hw/vfio/migration.c  | 170 +++++++++++++++++++++++++++++++++++++++++++++++++++
>>   hw/vfio/trace-events |   3 +
>>   2 files changed, 173 insertions(+)
>>
>> diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
>> index 4611bb972228..ffd70282dd0e 100644
>> --- a/hw/vfio/migration.c
>> +++ b/hw/vfio/migration.c
>> @@ -328,6 +328,33 @@ static int vfio_save_device_config_state(QEMUFile *f, void *opaque)
>>       return qemu_file_get_error(f);
>>   }
>>   
>> +static int vfio_load_device_config_state(QEMUFile *f, void *opaque)
>> +{
>> +    VFIODevice *vbasedev = opaque;
>> +    uint64_t data;
>> +
>> +    if (vbasedev->ops && vbasedev->ops->vfio_load_config) {
>> +        int ret;
>> +
>> +        ret = vbasedev->ops->vfio_load_config(vbasedev, f);
>> +        if (ret) {
>> +            error_report("%s: Failed to load device config space",
>> +                         vbasedev->name);
>> +            return ret;
>> +        }
>> +    }
>> +
>> +    data = qemu_get_be64(f);
>> +    if (data != VFIO_MIG_FLAG_END_OF_STATE) {
>> +        error_report("%s: Failed loading device config space, "
>> +                     "end flag incorrect 0x%"PRIx64, vbasedev->name, data);
> 
> I'm confused here: If we don't have a vfio_load_config callback, or if
> that callback did not read everything, we also might end up with a
> value that's not END_OF_STATE... in that case, the problem is not with
> the stream, but rather with the consumer?

Right, hence "end flag incorrect" is reported.

> 
>> +        return -EINVAL;
>> +    }
>> +
>> +    trace_vfio_load_device_config_state(vbasedev->name);
>> +    return qemu_file_get_error(f);
>> +}
>> +
>>   /* ---------------------------------------------------------------------- */
>>   
>>   static int vfio_save_setup(QEMUFile *f, void *opaque)
>> @@ -502,12 +529,155 @@ static int vfio_save_complete_precopy(QEMUFile *f, void *opaque)
>>       return ret;
>>   }
>>   
>> +static int vfio_load_setup(QEMUFile *f, void *opaque)
>> +{
>> +    VFIODevice *vbasedev = opaque;
>> +    VFIOMigration *migration = vbasedev->migration;
>> +    int ret = 0;
>> +
>> +    if (migration->region.mmaps) {
>> +        ret = vfio_region_mmap(&migration->region);
>> +        if (ret) {
>> +            error_report("%s: Failed to mmap VFIO migration region %d: %s",
>> +                         vbasedev->name, migration->region.nr,
>> +                         strerror(-ret));
>> +            error_report("%s: Falling back to slow path", vbasedev->name);
>> +        }
>> +    }
>> +
>> +    ret = vfio_migration_set_state(vbasedev, ~VFIO_DEVICE_STATE_MASK,
>> +                                   VFIO_DEVICE_STATE_RESUMING);
>> +    if (ret) {
>> +        error_report("%s: Failed to set state RESUMING", vbasedev->name);
>> +    }
>> +    return ret;
> 
> If I follow the code correctly, the cleanup callback will not be
> invoked if you return != 0 here... should you clean up possible
> mappings on error here?
> 

Makes sense, adding region ummap on error.

>> +}
>> +
>> +static int vfio_load_cleanup(void *opaque)
>> +{
>> +    vfio_save_cleanup(opaque);
>> +    return 0;
>> +}
>> +
>> +static int vfio_load_state(QEMUFile *f, void *opaque, int version_id)
>> +{
>> +    VFIODevice *vbasedev = opaque;
>> +    VFIOMigration *migration = vbasedev->migration;
>> +    int ret = 0;
>> +    uint64_t data, data_size;
>> +
>> +    data = qemu_get_be64(f);
>> +    while (data != VFIO_MIG_FLAG_END_OF_STATE) {
>> +
>> +        trace_vfio_load_state(vbasedev->name, data);
>> +
>> +        switch (data) {
>> +        case VFIO_MIG_FLAG_DEV_CONFIG_STATE:
>> +        {
>> +            ret = vfio_load_device_config_state(f, opaque);
>> +            if (ret) {
>> +                return ret;
>> +            }
>> +            break;
>> +        }
>> +        case VFIO_MIG_FLAG_DEV_SETUP_STATE:
>> +        {
>> +            data = qemu_get_be64(f);
>> +            if (data == VFIO_MIG_FLAG_END_OF_STATE) {
>> +                return ret;
>> +            } else {
>> +                error_report("%s: SETUP STATE: EOS not found 0x%"PRIx64,
>> +                             vbasedev->name, data);
>> +                return -EINVAL;
>> +            }
>> +            break;
>> +        }
>> +        case VFIO_MIG_FLAG_DEV_DATA_STATE:
>> +        {
>> +            VFIORegion *region = &migration->region;
>> +            uint64_t data_offset = 0, size;
> 
> I think this function would benefit from splitting this off into a
> function handling DEV_DATA_STATE. It is quite hard to follow through
> all the checks and find out when we continue, and when we break off.
> 

Each switch case has a break, we break off on success cases, where as we 
return error if we encounter any case where (ret < 0)


> Some documentation about the markers would also be really helpful.

Sure adding it in patch 07, where these are defined.

> The logic seems to be:
> - DEV_CONFIG_STATE has config data and must be ended by END_OF_STATE
Right

> - DEV_SETUP_STATE has only END_OF_STATE, no data
Right now there is no data, but this is provision to add data if 
required in future.

> - DEV_DATA_STATE has... data; if there's any END_OF_STATE, it's buried
>    far down in the called functions
>

This is not correct, END_OF_STATE is after data. Moved data buffer 
loading logic to function vfio_load_buffer(), so DEV_DATA_STATE looks 
simplified as below. Hope this helps.

         case VFIO_MIG_FLAG_DEV_DATA_STATE:
         {
             uint64_t data_size;

             data_size = qemu_get_be64(f);
             if (data_size == 0) {
                 break;
             }

             ret = vfio_load_buffer(f, vbasedev, data_size);
             if (ret < 0) {
                 return ret;
             }
             break;
         }

Also handling the case if data_size is greater than the data section of 
migration region at destination in vfio_load_buffer()in my next version.

Thanks,
Kirti

> 
>> +
>> +            data_size = size = qemu_get_be64(f);
>> +            if (data_size == 0) {
>> +                break;
>> +            }
>> +
>> +            ret = vfio_mig_read(vbasedev, &data_offset, sizeof(data_offset),
>> +                                region->fd_offset +
>> +                                offsetof(struct vfio_device_migration_info,
>> +                                         data_offset));
>> +            if (ret < 0) {
>> +                return ret;
>> +            }
>> +
>> +            trace_vfio_load_state_device_data(vbasedev->name, data_offset,
>> +                                              data_size);
>> +
>> +            while (size) {
>> +                void *buf = NULL;
>> +                uint64_t sec_size;
>> +                bool buf_alloc = false;
>> +
>> +                buf = get_data_section_size(region, data_offset, size,
>> +                                            &sec_size);
>> +
>> +                if (!buf) {
>> +                    buf = g_try_malloc(sec_size);
>> +                    if (!buf) {
>> +                        error_report("%s: Error allocating buffer ", __func__);
>> +                        return -ENOMEM;
>> +                    }
>> +                    buf_alloc = true;
>> +                }
>> +
>> +                qemu_get_buffer(f, buf, sec_size);
>> +
>> +                if (buf_alloc) {
>> +                    ret = vfio_mig_write(vbasedev, buf, sec_size,
>> +                                         region->fd_offset + data_offset);
>> +                    g_free(buf);
>> +
>> +                    if (ret < 0) {
>> +                        return ret;
>> +                    }
>> +                }
>> +                size -= sec_size;
>> +                data_offset += sec_size;
>> +            }
>> +
>> +            ret = vfio_mig_write(vbasedev, &data_size, sizeof(data_size),
>> +                                 region->fd_offset +
>> +                       offsetof(struct vfio_device_migration_info, data_size));
>> +            if (ret < 0) {
>> +                return ret;
>> +            }
>> +            break;
>> +        }
>> +
>> +        default:
>> +            error_report("%s: Unknown tag 0x%"PRIx64, vbasedev->name, data);
>> +            return -EINVAL;
>> +        }
>> +
>> +        data = qemu_get_be64(f);
>> +        ret = qemu_file_get_error(f);
>> +        if (ret) {
>> +            return ret;
>> +        }
>> +    }
>> +
>> +    return ret;
>> +}
>> +
>>   static SaveVMHandlers savevm_vfio_handlers = {
>>       .save_setup = vfio_save_setup,
>>       .save_cleanup = vfio_save_cleanup,
>>       .save_live_pending = vfio_save_pending,
>>       .save_live_iterate = vfio_save_iterate,
>>       .save_live_complete_precopy = vfio_save_complete_precopy,
>> +    .load_setup = vfio_load_setup,
>> +    .load_cleanup = vfio_load_cleanup,
> 
> Unrelated to this patch: It's a bit odd that load_cleanup() (unlike
> save_cleanup()) has a return code (that seems unused).
> 
>> +    .load_state = vfio_load_state,
>>   };
>>   
>>   /* ---------------------------------------------------------------------- */
>
Cornelia Huck Oct. 20, 2020, 4:25 p.m. UTC | #3
On Mon, 19 Oct 2020 02:17:43 +0530
Kirti Wankhede <kwankhede@nvidia.com> wrote:

> On 10/1/2020 3:37 PM, Cornelia Huck wrote:
> > On Wed, 23 Sep 2020 04:54:11 +0530
> > Kirti Wankhede <kwankhede@nvidia.com> wrote:
> >   
> >> Sequence  during _RESUMING device state:
> >> While data for this device is available, repeat below steps:
> >> a. read data_offset from where user application should write data.
> >> b. write data of data_size to migration region from data_offset.
> >> c. write data_size which indicates vendor driver that data is written in
> >>     staging buffer.
> >>
> >> For user, data is opaque. User should write data in the same order as
> >> received.
> >>
> >> Signed-off-by: Kirti Wankhede <kwankhede@nvidia.com>
> >> Reviewed-by: Neo Jia <cjia@nvidia.com>
> >> Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
> >> ---
> >>   hw/vfio/migration.c  | 170 +++++++++++++++++++++++++++++++++++++++++++++++++++
> >>   hw/vfio/trace-events |   3 +
> >>   2 files changed, 173 insertions(+)
> >>
> >> diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
> >> index 4611bb972228..ffd70282dd0e 100644
> >> --- a/hw/vfio/migration.c
> >> +++ b/hw/vfio/migration.c
> >> @@ -328,6 +328,33 @@ static int vfio_save_device_config_state(QEMUFile *f, void *opaque)
> >>       return qemu_file_get_error(f);
> >>   }
> >>   
> >> +static int vfio_load_device_config_state(QEMUFile *f, void *opaque)
> >> +{
> >> +    VFIODevice *vbasedev = opaque;
> >> +    uint64_t data;
> >> +
> >> +    if (vbasedev->ops && vbasedev->ops->vfio_load_config) {
> >> +        int ret;
> >> +
> >> +        ret = vbasedev->ops->vfio_load_config(vbasedev, f);
> >> +        if (ret) {
> >> +            error_report("%s: Failed to load device config space",
> >> +                         vbasedev->name);
> >> +            return ret;
> >> +        }
> >> +    }
> >> +
> >> +    data = qemu_get_be64(f);
> >> +    if (data != VFIO_MIG_FLAG_END_OF_STATE) {
> >> +        error_report("%s: Failed loading device config space, "
> >> +                     "end flag incorrect 0x%"PRIx64, vbasedev->name, data);  
> > 
> > I'm confused here: If we don't have a vfio_load_config callback, or if
> > that callback did not read everything, we also might end up with a
> > value that's not END_OF_STATE... in that case, the problem is not with
> > the stream, but rather with the consumer?  
> 
> Right, hence "end flag incorrect" is reported.

Yes, but that's what I find confusing... a missing or incorrect
vfio_load_config callback does not have anything to do with incorrect
end flags as present in the stream, but with the consumer not reading
things correctly. If I got this error, I would go looking whether
there's anything wrong with the stream and the code that produced it,
and that's the wrong direction.

(...)

> >> +static int vfio_load_state(QEMUFile *f, void *opaque, int version_id)
> >> +{
> >> +    VFIODevice *vbasedev = opaque;
> >> +    VFIOMigration *migration = vbasedev->migration;
> >> +    int ret = 0;
> >> +    uint64_t data, data_size;
> >> +
> >> +    data = qemu_get_be64(f);
> >> +    while (data != VFIO_MIG_FLAG_END_OF_STATE) {
> >> +
> >> +        trace_vfio_load_state(vbasedev->name, data);
> >> +
> >> +        switch (data) {
> >> +        case VFIO_MIG_FLAG_DEV_CONFIG_STATE:
> >> +        {
> >> +            ret = vfio_load_device_config_state(f, opaque);
> >> +            if (ret) {
> >> +                return ret;
> >> +            }
> >> +            break;
> >> +        }
> >> +        case VFIO_MIG_FLAG_DEV_SETUP_STATE:
> >> +        {
> >> +            data = qemu_get_be64(f);
> >> +            if (data == VFIO_MIG_FLAG_END_OF_STATE) {
> >> +                return ret;
> >> +            } else {
> >> +                error_report("%s: SETUP STATE: EOS not found 0x%"PRIx64,
> >> +                             vbasedev->name, data);
> >> +                return -EINVAL;
> >> +            }
> >> +            break;
> >> +        }
> >> +        case VFIO_MIG_FLAG_DEV_DATA_STATE:
> >> +        {
> >> +            VFIORegion *region = &migration->region;
> >> +            uint64_t data_offset = 0, size;  
> > 
> > I think this function would benefit from splitting this off into a
> > function handling DEV_DATA_STATE. It is quite hard to follow through
> > all the checks and find out when we continue, and when we break off.
> >   
> 
> Each switch case has a break, we break off on success cases, where as we 
> return error if we encounter any case where (ret < 0)

Of course, but I don't find it easy to follow when the errors are
happening.

> 
> 
> > Some documentation about the markers would also be really helpful.  
> 
> Sure adding it in patch 07, where these are defined.
> 
> > The logic seems to be:
> > - DEV_CONFIG_STATE has config data and must be ended by END_OF_STATE  
> Right
> 
> > - DEV_SETUP_STATE has only END_OF_STATE, no data  
> Right now there is no data, but this is provision to add data if 
> required in future.
> 
> > - DEV_DATA_STATE has... data; if there's any END_OF_STATE, it's buried
> >    far down in the called functions
> >  
> 
> This is not correct, END_OF_STATE is after data. Moved data buffer 
> loading logic to function vfio_load_buffer(), so DEV_DATA_STATE looks 
> simplified as below. Hope this helps.
> 
>          case VFIO_MIG_FLAG_DEV_DATA_STATE:
>          {
>              uint64_t data_size;
> 
>              data_size = qemu_get_be64(f);
>              if (data_size == 0) {
>                  break;
>              }
> 
>              ret = vfio_load_buffer(f, vbasedev, data_size);
>              if (ret < 0) {
>                  return ret;
>              }
>              break;
>          }

Hm.

What I find not that easy to follow is the structure here:

while (!end_marker) {
    switch (data) {
    case config_state:
        if (load_config)
            return error;
        break;
    case setup_state:
        read_next_value();
        if (end_marker)
            return 0;
        else
            return error;
        break;
    case data_state:
        size = read_next_value();
        if (!size)
            break;
        if (vfio_load_buffer())
            return error;
        break;
    default:
        return error;
    }
    read_next_value();
    if (qemu_file_get_error())
        return error;
}

So, what I don't understand is:
- Why do we call qemu_file_get_error() only after we went through the
  whole switch? This means it is never called for the
  setup_state/end_marker pair.
- If we look for an end marker for config_state and data_state, it's
  buried in the called functions. How can we be sure they actually do
  look for it? That needs at least a comment.
- If we find a valid setup_state section, we return success
  immediately. If we find valid config_state or data_state sections, we
  keep looking for more sections. Why? This also needs at least a
  comment.

> 
> Also handling the case if data_size is greater than the data section of 
> migration region at destination in vfio_load_buffer()in my next version.
> 
> Thanks,
> Kirti
> 
> >   
> >> +
> >> +            data_size = size = qemu_get_be64(f);
> >> +            if (data_size == 0) {
> >> +                break;
> >> +            }
> >> +
> >> +            ret = vfio_mig_read(vbasedev, &data_offset, sizeof(data_offset),
> >> +                                region->fd_offset +
> >> +                                offsetof(struct vfio_device_migration_info,
> >> +                                         data_offset));
> >> +            if (ret < 0) {
> >> +                return ret;
> >> +            }
> >> +
> >> +            trace_vfio_load_state_device_data(vbasedev->name, data_offset,
> >> +                                              data_size);
> >> +
> >> +            while (size) {
> >> +                void *buf = NULL;
> >> +                uint64_t sec_size;
> >> +                bool buf_alloc = false;
> >> +
> >> +                buf = get_data_section_size(region, data_offset, size,
> >> +                                            &sec_size);
> >> +
> >> +                if (!buf) {
> >> +                    buf = g_try_malloc(sec_size);
> >> +                    if (!buf) {
> >> +                        error_report("%s: Error allocating buffer ", __func__);
> >> +                        return -ENOMEM;
> >> +                    }
> >> +                    buf_alloc = true;
> >> +                }
> >> +
> >> +                qemu_get_buffer(f, buf, sec_size);
> >> +
> >> +                if (buf_alloc) {
> >> +                    ret = vfio_mig_write(vbasedev, buf, sec_size,
> >> +                                         region->fd_offset + data_offset);
> >> +                    g_free(buf);
> >> +
> >> +                    if (ret < 0) {
> >> +                        return ret;
> >> +                    }
> >> +                }
> >> +                size -= sec_size;
> >> +                data_offset += sec_size;
> >> +            }
> >> +
> >> +            ret = vfio_mig_write(vbasedev, &data_size, sizeof(data_size),
> >> +                                 region->fd_offset +
> >> +                       offsetof(struct vfio_device_migration_info, data_size));
> >> +            if (ret < 0) {
> >> +                return ret;
> >> +            }
> >> +            break;
> >> +        }
> >> +
> >> +        default:
> >> +            error_report("%s: Unknown tag 0x%"PRIx64, vbasedev->name, data);
> >> +            return -EINVAL;
> >> +        }
> >> +
> >> +        data = qemu_get_be64(f);
> >> +        ret = qemu_file_get_error(f);
> >> +        if (ret) {
> >> +            return ret;
> >> +        }
> >> +    }
> >> +
> >> +    return ret;
> >> +}
> >> +
diff mbox series

Patch

diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
index 4611bb972228..ffd70282dd0e 100644
--- a/hw/vfio/migration.c
+++ b/hw/vfio/migration.c
@@ -328,6 +328,33 @@  static int vfio_save_device_config_state(QEMUFile *f, void *opaque)
     return qemu_file_get_error(f);
 }
 
+static int vfio_load_device_config_state(QEMUFile *f, void *opaque)
+{
+    VFIODevice *vbasedev = opaque;
+    uint64_t data;
+
+    if (vbasedev->ops && vbasedev->ops->vfio_load_config) {
+        int ret;
+
+        ret = vbasedev->ops->vfio_load_config(vbasedev, f);
+        if (ret) {
+            error_report("%s: Failed to load device config space",
+                         vbasedev->name);
+            return ret;
+        }
+    }
+
+    data = qemu_get_be64(f);
+    if (data != VFIO_MIG_FLAG_END_OF_STATE) {
+        error_report("%s: Failed loading device config space, "
+                     "end flag incorrect 0x%"PRIx64, vbasedev->name, data);
+        return -EINVAL;
+    }
+
+    trace_vfio_load_device_config_state(vbasedev->name);
+    return qemu_file_get_error(f);
+}
+
 /* ---------------------------------------------------------------------- */
 
 static int vfio_save_setup(QEMUFile *f, void *opaque)
@@ -502,12 +529,155 @@  static int vfio_save_complete_precopy(QEMUFile *f, void *opaque)
     return ret;
 }
 
+static int vfio_load_setup(QEMUFile *f, void *opaque)
+{
+    VFIODevice *vbasedev = opaque;
+    VFIOMigration *migration = vbasedev->migration;
+    int ret = 0;
+
+    if (migration->region.mmaps) {
+        ret = vfio_region_mmap(&migration->region);
+        if (ret) {
+            error_report("%s: Failed to mmap VFIO migration region %d: %s",
+                         vbasedev->name, migration->region.nr,
+                         strerror(-ret));
+            error_report("%s: Falling back to slow path", vbasedev->name);
+        }
+    }
+
+    ret = vfio_migration_set_state(vbasedev, ~VFIO_DEVICE_STATE_MASK,
+                                   VFIO_DEVICE_STATE_RESUMING);
+    if (ret) {
+        error_report("%s: Failed to set state RESUMING", vbasedev->name);
+    }
+    return ret;
+}
+
+static int vfio_load_cleanup(void *opaque)
+{
+    vfio_save_cleanup(opaque);
+    return 0;
+}
+
+static int vfio_load_state(QEMUFile *f, void *opaque, int version_id)
+{
+    VFIODevice *vbasedev = opaque;
+    VFIOMigration *migration = vbasedev->migration;
+    int ret = 0;
+    uint64_t data, data_size;
+
+    data = qemu_get_be64(f);
+    while (data != VFIO_MIG_FLAG_END_OF_STATE) {
+
+        trace_vfio_load_state(vbasedev->name, data);
+
+        switch (data) {
+        case VFIO_MIG_FLAG_DEV_CONFIG_STATE:
+        {
+            ret = vfio_load_device_config_state(f, opaque);
+            if (ret) {
+                return ret;
+            }
+            break;
+        }
+        case VFIO_MIG_FLAG_DEV_SETUP_STATE:
+        {
+            data = qemu_get_be64(f);
+            if (data == VFIO_MIG_FLAG_END_OF_STATE) {
+                return ret;
+            } else {
+                error_report("%s: SETUP STATE: EOS not found 0x%"PRIx64,
+                             vbasedev->name, data);
+                return -EINVAL;
+            }
+            break;
+        }
+        case VFIO_MIG_FLAG_DEV_DATA_STATE:
+        {
+            VFIORegion *region = &migration->region;
+            uint64_t data_offset = 0, size;
+
+            data_size = size = qemu_get_be64(f);
+            if (data_size == 0) {
+                break;
+            }
+
+            ret = vfio_mig_read(vbasedev, &data_offset, sizeof(data_offset),
+                                region->fd_offset +
+                                offsetof(struct vfio_device_migration_info,
+                                         data_offset));
+            if (ret < 0) {
+                return ret;
+            }
+
+            trace_vfio_load_state_device_data(vbasedev->name, data_offset,
+                                              data_size);
+
+            while (size) {
+                void *buf = NULL;
+                uint64_t sec_size;
+                bool buf_alloc = false;
+
+                buf = get_data_section_size(region, data_offset, size,
+                                            &sec_size);
+
+                if (!buf) {
+                    buf = g_try_malloc(sec_size);
+                    if (!buf) {
+                        error_report("%s: Error allocating buffer ", __func__);
+                        return -ENOMEM;
+                    }
+                    buf_alloc = true;
+                }
+
+                qemu_get_buffer(f, buf, sec_size);
+
+                if (buf_alloc) {
+                    ret = vfio_mig_write(vbasedev, buf, sec_size,
+                                         region->fd_offset + data_offset);
+                    g_free(buf);
+
+                    if (ret < 0) {
+                        return ret;
+                    }
+                }
+                size -= sec_size;
+                data_offset += sec_size;
+            }
+
+            ret = vfio_mig_write(vbasedev, &data_size, sizeof(data_size),
+                                 region->fd_offset +
+                       offsetof(struct vfio_device_migration_info, data_size));
+            if (ret < 0) {
+                return ret;
+            }
+            break;
+        }
+
+        default:
+            error_report("%s: Unknown tag 0x%"PRIx64, vbasedev->name, data);
+            return -EINVAL;
+        }
+
+        data = qemu_get_be64(f);
+        ret = qemu_file_get_error(f);
+        if (ret) {
+            return ret;
+        }
+    }
+
+    return ret;
+}
+
 static SaveVMHandlers savevm_vfio_handlers = {
     .save_setup = vfio_save_setup,
     .save_cleanup = vfio_save_cleanup,
     .save_live_pending = vfio_save_pending,
     .save_live_iterate = vfio_save_iterate,
     .save_live_complete_precopy = vfio_save_complete_precopy,
+    .load_setup = vfio_load_setup,
+    .load_cleanup = vfio_load_cleanup,
+    .load_state = vfio_load_state,
 };
 
 /* ---------------------------------------------------------------------- */
diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events
index 118b5547c921..94ba4696f0c6 100644
--- a/hw/vfio/trace-events
+++ b/hw/vfio/trace-events
@@ -160,3 +160,6 @@  vfio_save_device_config_state(const char *name) " (%s)"
 vfio_save_pending(const char *name, uint64_t precopy, uint64_t postcopy, uint64_t compatible) " (%s) precopy 0x%"PRIx64" postcopy 0x%"PRIx64" compatible 0x%"PRIx64
 vfio_save_iterate(const char *name, int data_size) " (%s) data_size %d"
 vfio_save_complete_precopy(const char *name) " (%s)"
+vfio_load_device_config_state(const char *name) " (%s)"
+vfio_load_state(const char *name, uint64_t data) " (%s) data 0x%"PRIx64
+vfio_load_state_device_data(const char *name, uint64_t data_offset, uint64_t data_size) " (%s) Offset 0x%"PRIx64" size 0x%"PRIx64