diff mbox series

[vfio] vfio/mlx5: Enforce PRE_COPY support

Message ID 20240305103037.61144-1-yishaih@nvidia.com (mailing list archive)
State New, archived
Headers show
Series [vfio] vfio/mlx5: Enforce PRE_COPY support | expand

Commit Message

Yishai Hadas March 5, 2024, 10:30 a.m. UTC
Enable live migration only once the firmware supports PRE_COPY.

PRE_COPY has been supported by the firmware for a long time already and
is required to achieve a low downtime upon live migration.

This lets us clean up some old code that is not applicable those days
while PRE_COPY is fully supported by the firmware.

Signed-off-by: Yishai Hadas <yishaih@nvidia.com>
---
 drivers/vfio/pci/mlx5/cmd.c  |  83 +++++++++++++++++++-------
 drivers/vfio/pci/mlx5/cmd.h  |   6 --
 drivers/vfio/pci/mlx5/main.c | 109 +++--------------------------------
 3 files changed, 71 insertions(+), 127 deletions(-)

Comments

Alex Williamson March 5, 2024, 9:56 p.m. UTC | #1
On Tue, 5 Mar 2024 12:30:37 +0200
Yishai Hadas <yishaih@nvidia.com> wrote:

> Enable live migration only once the firmware supports PRE_COPY.
> 
> PRE_COPY has been supported by the firmware for a long time already and
> is required to achieve a low downtime upon live migration.
> 
> This lets us clean up some old code that is not applicable those days
> while PRE_COPY is fully supported by the firmware.

Was firmware without PRE_COPY support ever available to users?  AIUI
this would disable migration support on devices with older firmware, so
if that firmware exists in the wild this should go through a
deprecation process.

We should also likely note a minimum firmware revision and time frame
when PRE_COPY support was added in firmware.  Thanks,

Alex


> 
> Signed-off-by: Yishai Hadas <yishaih@nvidia.com>
> ---
>  drivers/vfio/pci/mlx5/cmd.c  |  83 +++++++++++++++++++-------
>  drivers/vfio/pci/mlx5/cmd.h  |   6 --
>  drivers/vfio/pci/mlx5/main.c | 109 +++--------------------------------
>  3 files changed, 71 insertions(+), 127 deletions(-)
> 
> diff --git a/drivers/vfio/pci/mlx5/cmd.c b/drivers/vfio/pci/mlx5/cmd.c
> index c54bcd5d0917..41a4b0cf4297 100644
> --- a/drivers/vfio/pci/mlx5/cmd.c
> +++ b/drivers/vfio/pci/mlx5/cmd.c
> @@ -233,6 +233,10 @@ void mlx5vf_cmd_set_migratable(struct mlx5vf_pci_core_device *mvdev,
>  	if (!MLX5_CAP_GEN(mvdev->mdev, migration))
>  		goto end;
>  
> +	if (!(MLX5_CAP_GEN_2(mvdev->mdev, migration_multi_load) &&
> +	      MLX5_CAP_GEN_2(mvdev->mdev, migration_tracking_state)))
> +		goto end;
> +
>  	mvdev->vf_id = pci_iov_vf_id(pdev);
>  	if (mvdev->vf_id < 0)
>  		goto end;
> @@ -262,17 +266,14 @@ void mlx5vf_cmd_set_migratable(struct mlx5vf_pci_core_device *mvdev,
>  	mvdev->migrate_cap = 1;
>  	mvdev->core_device.vdev.migration_flags =
>  		VFIO_MIGRATION_STOP_COPY |
> -		VFIO_MIGRATION_P2P;
> +		VFIO_MIGRATION_P2P |
> +		VFIO_MIGRATION_PRE_COPY;
> +
>  	mvdev->core_device.vdev.mig_ops = mig_ops;
>  	init_completion(&mvdev->tracker_comp);
>  	if (MLX5_CAP_GEN(mvdev->mdev, adv_virtualization))
>  		mvdev->core_device.vdev.log_ops = log_ops;
>  
> -	if (MLX5_CAP_GEN_2(mvdev->mdev, migration_multi_load) &&
> -	    MLX5_CAP_GEN_2(mvdev->mdev, migration_tracking_state))
> -		mvdev->core_device.vdev.migration_flags |=
> -			VFIO_MIGRATION_PRE_COPY;
> -
>  	if (MLX5_CAP_GEN_2(mvdev->mdev, migration_in_chunks))
>  		mvdev->chunk_mode = 1;
>  
> @@ -414,6 +415,50 @@ void mlx5vf_free_data_buffer(struct mlx5_vhca_data_buffer *buf)
>  	kfree(buf);
>  }
>  
> +static int mlx5vf_add_migration_pages(struct mlx5_vhca_data_buffer *buf,
> +				      unsigned int npages)
> +{
> +	unsigned int to_alloc = npages;
> +	struct page **page_list;
> +	unsigned long filled;
> +	unsigned int to_fill;
> +	int ret;
> +
> +	to_fill = min_t(unsigned int, npages, PAGE_SIZE / sizeof(*page_list));
> +	page_list = kvzalloc(to_fill * sizeof(*page_list), GFP_KERNEL_ACCOUNT);
> +	if (!page_list)
> +		return -ENOMEM;
> +
> +	do {
> +		filled = alloc_pages_bulk_array(GFP_KERNEL_ACCOUNT, to_fill,
> +						page_list);
> +		if (!filled) {
> +			ret = -ENOMEM;
> +			goto err;
> +		}
> +		to_alloc -= filled;
> +		ret = sg_alloc_append_table_from_pages(
> +			&buf->table, page_list, filled, 0,
> +			filled << PAGE_SHIFT, UINT_MAX, SG_MAX_SINGLE_ALLOC,
> +			GFP_KERNEL_ACCOUNT);
> +
> +		if (ret)
> +			goto err;
> +		buf->allocated_length += filled * PAGE_SIZE;
> +		/* clean input for another bulk allocation */
> +		memset(page_list, 0, filled * sizeof(*page_list));
> +		to_fill = min_t(unsigned int, to_alloc,
> +				PAGE_SIZE / sizeof(*page_list));
> +	} while (to_alloc > 0);
> +
> +	kvfree(page_list);
> +	return 0;
> +
> +err:
> +	kvfree(page_list);
> +	return ret;
> +}
> +
>  struct mlx5_vhca_data_buffer *
>  mlx5vf_alloc_data_buffer(struct mlx5_vf_migration_file *migf,
>  			 size_t length,
> @@ -680,22 +725,20 @@ int mlx5vf_cmd_save_vhca_state(struct mlx5vf_pci_core_device *mvdev,
>  		goto err_out;
>  	}
>  
> -	if (MLX5VF_PRE_COPY_SUPP(mvdev)) {
> -		if (async_data->stop_copy_chunk) {
> -			u8 header_idx = buf->stop_copy_chunk_num ?
> -				buf->stop_copy_chunk_num - 1 : 0;
> +	if (async_data->stop_copy_chunk) {
> +		u8 header_idx = buf->stop_copy_chunk_num ?
> +			buf->stop_copy_chunk_num - 1 : 0;
>  
> -			header_buf = migf->buf_header[header_idx];
> -			migf->buf_header[header_idx] = NULL;
> -		}
> +		header_buf = migf->buf_header[header_idx];
> +		migf->buf_header[header_idx] = NULL;
> +	}
>  
> -		if (!header_buf) {
> -			header_buf = mlx5vf_get_data_buffer(migf,
> -				sizeof(struct mlx5_vf_migration_header), DMA_NONE);
> -			if (IS_ERR(header_buf)) {
> -				err = PTR_ERR(header_buf);
> -				goto err_free;
> -			}
> +	if (!header_buf) {
> +		header_buf = mlx5vf_get_data_buffer(migf,
> +			sizeof(struct mlx5_vf_migration_header), DMA_NONE);
> +		if (IS_ERR(header_buf)) {
> +			err = PTR_ERR(header_buf);
> +			goto err_free;
>  		}
>  	}
>  
> diff --git a/drivers/vfio/pci/mlx5/cmd.h b/drivers/vfio/pci/mlx5/cmd.h
> index 707393df36c4..df421dc6de04 100644
> --- a/drivers/vfio/pci/mlx5/cmd.h
> +++ b/drivers/vfio/pci/mlx5/cmd.h
> @@ -13,9 +13,6 @@
>  #include <linux/mlx5/cq.h>
>  #include <linux/mlx5/qp.h>
>  
> -#define MLX5VF_PRE_COPY_SUPP(mvdev) \
> -	((mvdev)->core_device.vdev.migration_flags & VFIO_MIGRATION_PRE_COPY)
> -
>  enum mlx5_vf_migf_state {
>  	MLX5_MIGF_STATE_ERROR = 1,
>  	MLX5_MIGF_STATE_PRE_COPY_ERROR,
> @@ -25,7 +22,6 @@ enum mlx5_vf_migf_state {
>  };
>  
>  enum mlx5_vf_load_state {
> -	MLX5_VF_LOAD_STATE_READ_IMAGE_NO_HEADER,
>  	MLX5_VF_LOAD_STATE_READ_HEADER,
>  	MLX5_VF_LOAD_STATE_PREP_HEADER_DATA,
>  	MLX5_VF_LOAD_STATE_READ_HEADER_DATA,
> @@ -228,8 +224,6 @@ struct mlx5_vhca_data_buffer *
>  mlx5vf_get_data_buffer(struct mlx5_vf_migration_file *migf,
>  		       size_t length, enum dma_data_direction dma_dir);
>  void mlx5vf_put_data_buffer(struct mlx5_vhca_data_buffer *buf);
> -int mlx5vf_add_migration_pages(struct mlx5_vhca_data_buffer *buf,
> -			       unsigned int npages);
>  struct page *mlx5vf_get_migration_page(struct mlx5_vhca_data_buffer *buf,
>  				       unsigned long offset);
>  void mlx5vf_state_mutex_unlock(struct mlx5vf_pci_core_device *mvdev);
> diff --git a/drivers/vfio/pci/mlx5/main.c b/drivers/vfio/pci/mlx5/main.c
> index 3982fcf60cf2..61d9b0f9146d 100644
> --- a/drivers/vfio/pci/mlx5/main.c
> +++ b/drivers/vfio/pci/mlx5/main.c
> @@ -65,50 +65,6 @@ mlx5vf_get_migration_page(struct mlx5_vhca_data_buffer *buf,
>  	return NULL;
>  }
>  
> -int mlx5vf_add_migration_pages(struct mlx5_vhca_data_buffer *buf,
> -			       unsigned int npages)
> -{
> -	unsigned int to_alloc = npages;
> -	struct page **page_list;
> -	unsigned long filled;
> -	unsigned int to_fill;
> -	int ret;
> -
> -	to_fill = min_t(unsigned int, npages, PAGE_SIZE / sizeof(*page_list));
> -	page_list = kvzalloc(to_fill * sizeof(*page_list), GFP_KERNEL_ACCOUNT);
> -	if (!page_list)
> -		return -ENOMEM;
> -
> -	do {
> -		filled = alloc_pages_bulk_array(GFP_KERNEL_ACCOUNT, to_fill,
> -						page_list);
> -		if (!filled) {
> -			ret = -ENOMEM;
> -			goto err;
> -		}
> -		to_alloc -= filled;
> -		ret = sg_alloc_append_table_from_pages(
> -			&buf->table, page_list, filled, 0,
> -			filled << PAGE_SHIFT, UINT_MAX, SG_MAX_SINGLE_ALLOC,
> -			GFP_KERNEL_ACCOUNT);
> -
> -		if (ret)
> -			goto err;
> -		buf->allocated_length += filled * PAGE_SIZE;
> -		/* clean input for another bulk allocation */
> -		memset(page_list, 0, filled * sizeof(*page_list));
> -		to_fill = min_t(unsigned int, to_alloc,
> -				PAGE_SIZE / sizeof(*page_list));
> -	} while (to_alloc > 0);
> -
> -	kvfree(page_list);
> -	return 0;
> -
> -err:
> -	kvfree(page_list);
> -	return ret;
> -}
> -
>  static void mlx5vf_disable_fd(struct mlx5_vf_migration_file *migf)
>  {
>  	mutex_lock(&migf->lock);
> @@ -777,36 +733,6 @@ mlx5vf_append_page_to_mig_buf(struct mlx5_vhca_data_buffer *vhca_buf,
>  	return 0;
>  }
>  
> -static int
> -mlx5vf_resume_read_image_no_header(struct mlx5_vhca_data_buffer *vhca_buf,
> -				   loff_t requested_length,
> -				   const char __user **buf, size_t *len,
> -				   loff_t *pos, ssize_t *done)
> -{
> -	int ret;
> -
> -	if (requested_length > MAX_LOAD_SIZE)
> -		return -ENOMEM;
> -
> -	if (vhca_buf->allocated_length < requested_length) {
> -		ret = mlx5vf_add_migration_pages(
> -			vhca_buf,
> -			DIV_ROUND_UP(requested_length - vhca_buf->allocated_length,
> -				     PAGE_SIZE));
> -		if (ret)
> -			return ret;
> -	}
> -
> -	while (*len) {
> -		ret = mlx5vf_append_page_to_mig_buf(vhca_buf, buf, len, pos,
> -						    done);
> -		if (ret)
> -			return ret;
> -	}
> -
> -	return 0;
> -}
> -
>  static ssize_t
>  mlx5vf_resume_read_image(struct mlx5_vf_migration_file *migf,
>  			 struct mlx5_vhca_data_buffer *vhca_buf,
> @@ -1038,13 +964,6 @@ static ssize_t mlx5vf_resume_write(struct file *filp, const char __user *buf,
>  			migf->load_state = MLX5_VF_LOAD_STATE_READ_IMAGE;
>  			break;
>  		}
> -		case MLX5_VF_LOAD_STATE_READ_IMAGE_NO_HEADER:
> -			ret = mlx5vf_resume_read_image_no_header(vhca_buf,
> -						requested_length,
> -						&buf, &len, pos, &done);
> -			if (ret)
> -				goto out_unlock;
> -			break;
>  		case MLX5_VF_LOAD_STATE_READ_IMAGE:
>  			ret = mlx5vf_resume_read_image(migf, vhca_buf,
>  						migf->record_size,
> @@ -1114,21 +1033,16 @@ mlx5vf_pci_resume_device_data(struct mlx5vf_pci_core_device *mvdev)
>  	}
>  
>  	migf->buf[0] = buf;
> -	if (MLX5VF_PRE_COPY_SUPP(mvdev)) {
> -		buf = mlx5vf_alloc_data_buffer(migf,
> -			sizeof(struct mlx5_vf_migration_header), DMA_NONE);
> -		if (IS_ERR(buf)) {
> -			ret = PTR_ERR(buf);
> -			goto out_buf;
> -		}
> -
> -		migf->buf_header[0] = buf;
> -		migf->load_state = MLX5_VF_LOAD_STATE_READ_HEADER;
> -	} else {
> -		/* Initial state will be to read the image */
> -		migf->load_state = MLX5_VF_LOAD_STATE_READ_IMAGE_NO_HEADER;
> +	buf = mlx5vf_alloc_data_buffer(migf,
> +		sizeof(struct mlx5_vf_migration_header), DMA_NONE);
> +	if (IS_ERR(buf)) {
> +		ret = PTR_ERR(buf);
> +		goto out_buf;
>  	}
>  
> +	migf->buf_header[0] = buf;
> +	migf->load_state = MLX5_VF_LOAD_STATE_READ_HEADER;
> +
>  	stream_open(migf->filp->f_inode, migf->filp);
>  	mutex_init(&migf->lock);
>  	INIT_LIST_HEAD(&migf->buf_list);
> @@ -1262,13 +1176,6 @@ mlx5vf_pci_step_device_state_locked(struct mlx5vf_pci_core_device *mvdev,
>  	}
>  
>  	if (cur == VFIO_DEVICE_STATE_RESUMING && new == VFIO_DEVICE_STATE_STOP) {
> -		if (!MLX5VF_PRE_COPY_SUPP(mvdev)) {
> -			ret = mlx5vf_cmd_load_vhca_state(mvdev,
> -							 mvdev->resuming_migf,
> -							 mvdev->resuming_migf->buf[0]);
> -			if (ret)
> -				return ERR_PTR(ret);
> -		}
>  		mlx5vf_disable_fds(mvdev, NULL);
>  		return NULL;
>  	}
Yishai Hadas March 6, 2024, 11:01 a.m. UTC | #2
On 05/03/2024 23:56, Alex Williamson wrote:
> On Tue, 5 Mar 2024 12:30:37 +0200
> Yishai Hadas <yishaih@nvidia.com> wrote:
> 
>> Enable live migration only once the firmware supports PRE_COPY.
>>
>> PRE_COPY has been supported by the firmware for a long time already and
>> is required to achieve a low downtime upon live migration.
>>
>> This lets us clean up some old code that is not applicable those days
>> while PRE_COPY is fully supported by the firmware.
> 
> Was firmware without PRE_COPY support ever available to users?  AIUI
> this would disable migration support on devices with older firmware, so
> if that firmware exists in the wild this should go through a
> deprecation process.

No firmware without PRE_COPY support ever available to users.

> 
> We should also likely note a minimum firmware revision and time frame
> when PRE_COPY support was added in firmware.  Thanks,
> 

Sure, I just sent V1 having that information.

Thanks,
Yishai

> Alex
> 
> 
>>
>> Signed-off-by: Yishai Hadas <yishaih@nvidia.com>
>> ---
>>   drivers/vfio/pci/mlx5/cmd.c  |  83 +++++++++++++++++++-------
>>   drivers/vfio/pci/mlx5/cmd.h  |   6 --
>>   drivers/vfio/pci/mlx5/main.c | 109 +++--------------------------------
>>   3 files changed, 71 insertions(+), 127 deletions(-)
>>
>> diff --git a/drivers/vfio/pci/mlx5/cmd.c b/drivers/vfio/pci/mlx5/cmd.c
>> index c54bcd5d0917..41a4b0cf4297 100644
>> --- a/drivers/vfio/pci/mlx5/cmd.c
>> +++ b/drivers/vfio/pci/mlx5/cmd.c
>> @@ -233,6 +233,10 @@ void mlx5vf_cmd_set_migratable(struct mlx5vf_pci_core_device *mvdev,
>>   	if (!MLX5_CAP_GEN(mvdev->mdev, migration))
>>   		goto end;
>>   
>> +	if (!(MLX5_CAP_GEN_2(mvdev->mdev, migration_multi_load) &&
>> +	      MLX5_CAP_GEN_2(mvdev->mdev, migration_tracking_state)))
>> +		goto end;
>> +
>>   	mvdev->vf_id = pci_iov_vf_id(pdev);
>>   	if (mvdev->vf_id < 0)
>>   		goto end;
>> @@ -262,17 +266,14 @@ void mlx5vf_cmd_set_migratable(struct mlx5vf_pci_core_device *mvdev,
>>   	mvdev->migrate_cap = 1;
>>   	mvdev->core_device.vdev.migration_flags =
>>   		VFIO_MIGRATION_STOP_COPY |
>> -		VFIO_MIGRATION_P2P;
>> +		VFIO_MIGRATION_P2P |
>> +		VFIO_MIGRATION_PRE_COPY;
>> +
>>   	mvdev->core_device.vdev.mig_ops = mig_ops;
>>   	init_completion(&mvdev->tracker_comp);
>>   	if (MLX5_CAP_GEN(mvdev->mdev, adv_virtualization))
>>   		mvdev->core_device.vdev.log_ops = log_ops;
>>   
>> -	if (MLX5_CAP_GEN_2(mvdev->mdev, migration_multi_load) &&
>> -	    MLX5_CAP_GEN_2(mvdev->mdev, migration_tracking_state))
>> -		mvdev->core_device.vdev.migration_flags |=
>> -			VFIO_MIGRATION_PRE_COPY;
>> -
>>   	if (MLX5_CAP_GEN_2(mvdev->mdev, migration_in_chunks))
>>   		mvdev->chunk_mode = 1;
>>   
>> @@ -414,6 +415,50 @@ void mlx5vf_free_data_buffer(struct mlx5_vhca_data_buffer *buf)
>>   	kfree(buf);
>>   }
>>   
>> +static int mlx5vf_add_migration_pages(struct mlx5_vhca_data_buffer *buf,
>> +				      unsigned int npages)
>> +{
>> +	unsigned int to_alloc = npages;
>> +	struct page **page_list;
>> +	unsigned long filled;
>> +	unsigned int to_fill;
>> +	int ret;
>> +
>> +	to_fill = min_t(unsigned int, npages, PAGE_SIZE / sizeof(*page_list));
>> +	page_list = kvzalloc(to_fill * sizeof(*page_list), GFP_KERNEL_ACCOUNT);
>> +	if (!page_list)
>> +		return -ENOMEM;
>> +
>> +	do {
>> +		filled = alloc_pages_bulk_array(GFP_KERNEL_ACCOUNT, to_fill,
>> +						page_list);
>> +		if (!filled) {
>> +			ret = -ENOMEM;
>> +			goto err;
>> +		}
>> +		to_alloc -= filled;
>> +		ret = sg_alloc_append_table_from_pages(
>> +			&buf->table, page_list, filled, 0,
>> +			filled << PAGE_SHIFT, UINT_MAX, SG_MAX_SINGLE_ALLOC,
>> +			GFP_KERNEL_ACCOUNT);
>> +
>> +		if (ret)
>> +			goto err;
>> +		buf->allocated_length += filled * PAGE_SIZE;
>> +		/* clean input for another bulk allocation */
>> +		memset(page_list, 0, filled * sizeof(*page_list));
>> +		to_fill = min_t(unsigned int, to_alloc,
>> +				PAGE_SIZE / sizeof(*page_list));
>> +	} while (to_alloc > 0);
>> +
>> +	kvfree(page_list);
>> +	return 0;
>> +
>> +err:
>> +	kvfree(page_list);
>> +	return ret;
>> +}
>> +
>>   struct mlx5_vhca_data_buffer *
>>   mlx5vf_alloc_data_buffer(struct mlx5_vf_migration_file *migf,
>>   			 size_t length,
>> @@ -680,22 +725,20 @@ int mlx5vf_cmd_save_vhca_state(struct mlx5vf_pci_core_device *mvdev,
>>   		goto err_out;
>>   	}
>>   
>> -	if (MLX5VF_PRE_COPY_SUPP(mvdev)) {
>> -		if (async_data->stop_copy_chunk) {
>> -			u8 header_idx = buf->stop_copy_chunk_num ?
>> -				buf->stop_copy_chunk_num - 1 : 0;
>> +	if (async_data->stop_copy_chunk) {
>> +		u8 header_idx = buf->stop_copy_chunk_num ?
>> +			buf->stop_copy_chunk_num - 1 : 0;
>>   
>> -			header_buf = migf->buf_header[header_idx];
>> -			migf->buf_header[header_idx] = NULL;
>> -		}
>> +		header_buf = migf->buf_header[header_idx];
>> +		migf->buf_header[header_idx] = NULL;
>> +	}
>>   
>> -		if (!header_buf) {
>> -			header_buf = mlx5vf_get_data_buffer(migf,
>> -				sizeof(struct mlx5_vf_migration_header), DMA_NONE);
>> -			if (IS_ERR(header_buf)) {
>> -				err = PTR_ERR(header_buf);
>> -				goto err_free;
>> -			}
>> +	if (!header_buf) {
>> +		header_buf = mlx5vf_get_data_buffer(migf,
>> +			sizeof(struct mlx5_vf_migration_header), DMA_NONE);
>> +		if (IS_ERR(header_buf)) {
>> +			err = PTR_ERR(header_buf);
>> +			goto err_free;
>>   		}
>>   	}
>>   
>> diff --git a/drivers/vfio/pci/mlx5/cmd.h b/drivers/vfio/pci/mlx5/cmd.h
>> index 707393df36c4..df421dc6de04 100644
>> --- a/drivers/vfio/pci/mlx5/cmd.h
>> +++ b/drivers/vfio/pci/mlx5/cmd.h
>> @@ -13,9 +13,6 @@
>>   #include <linux/mlx5/cq.h>
>>   #include <linux/mlx5/qp.h>
>>   
>> -#define MLX5VF_PRE_COPY_SUPP(mvdev) \
>> -	((mvdev)->core_device.vdev.migration_flags & VFIO_MIGRATION_PRE_COPY)
>> -
>>   enum mlx5_vf_migf_state {
>>   	MLX5_MIGF_STATE_ERROR = 1,
>>   	MLX5_MIGF_STATE_PRE_COPY_ERROR,
>> @@ -25,7 +22,6 @@ enum mlx5_vf_migf_state {
>>   };
>>   
>>   enum mlx5_vf_load_state {
>> -	MLX5_VF_LOAD_STATE_READ_IMAGE_NO_HEADER,
>>   	MLX5_VF_LOAD_STATE_READ_HEADER,
>>   	MLX5_VF_LOAD_STATE_PREP_HEADER_DATA,
>>   	MLX5_VF_LOAD_STATE_READ_HEADER_DATA,
>> @@ -228,8 +224,6 @@ struct mlx5_vhca_data_buffer *
>>   mlx5vf_get_data_buffer(struct mlx5_vf_migration_file *migf,
>>   		       size_t length, enum dma_data_direction dma_dir);
>>   void mlx5vf_put_data_buffer(struct mlx5_vhca_data_buffer *buf);
>> -int mlx5vf_add_migration_pages(struct mlx5_vhca_data_buffer *buf,
>> -			       unsigned int npages);
>>   struct page *mlx5vf_get_migration_page(struct mlx5_vhca_data_buffer *buf,
>>   				       unsigned long offset);
>>   void mlx5vf_state_mutex_unlock(struct mlx5vf_pci_core_device *mvdev);
>> diff --git a/drivers/vfio/pci/mlx5/main.c b/drivers/vfio/pci/mlx5/main.c
>> index 3982fcf60cf2..61d9b0f9146d 100644
>> --- a/drivers/vfio/pci/mlx5/main.c
>> +++ b/drivers/vfio/pci/mlx5/main.c
>> @@ -65,50 +65,6 @@ mlx5vf_get_migration_page(struct mlx5_vhca_data_buffer *buf,
>>   	return NULL;
>>   }
>>   
>> -int mlx5vf_add_migration_pages(struct mlx5_vhca_data_buffer *buf,
>> -			       unsigned int npages)
>> -{
>> -	unsigned int to_alloc = npages;
>> -	struct page **page_list;
>> -	unsigned long filled;
>> -	unsigned int to_fill;
>> -	int ret;
>> -
>> -	to_fill = min_t(unsigned int, npages, PAGE_SIZE / sizeof(*page_list));
>> -	page_list = kvzalloc(to_fill * sizeof(*page_list), GFP_KERNEL_ACCOUNT);
>> -	if (!page_list)
>> -		return -ENOMEM;
>> -
>> -	do {
>> -		filled = alloc_pages_bulk_array(GFP_KERNEL_ACCOUNT, to_fill,
>> -						page_list);
>> -		if (!filled) {
>> -			ret = -ENOMEM;
>> -			goto err;
>> -		}
>> -		to_alloc -= filled;
>> -		ret = sg_alloc_append_table_from_pages(
>> -			&buf->table, page_list, filled, 0,
>> -			filled << PAGE_SHIFT, UINT_MAX, SG_MAX_SINGLE_ALLOC,
>> -			GFP_KERNEL_ACCOUNT);
>> -
>> -		if (ret)
>> -			goto err;
>> -		buf->allocated_length += filled * PAGE_SIZE;
>> -		/* clean input for another bulk allocation */
>> -		memset(page_list, 0, filled * sizeof(*page_list));
>> -		to_fill = min_t(unsigned int, to_alloc,
>> -				PAGE_SIZE / sizeof(*page_list));
>> -	} while (to_alloc > 0);
>> -
>> -	kvfree(page_list);
>> -	return 0;
>> -
>> -err:
>> -	kvfree(page_list);
>> -	return ret;
>> -}
>> -
>>   static void mlx5vf_disable_fd(struct mlx5_vf_migration_file *migf)
>>   {
>>   	mutex_lock(&migf->lock);
>> @@ -777,36 +733,6 @@ mlx5vf_append_page_to_mig_buf(struct mlx5_vhca_data_buffer *vhca_buf,
>>   	return 0;
>>   }
>>   
>> -static int
>> -mlx5vf_resume_read_image_no_header(struct mlx5_vhca_data_buffer *vhca_buf,
>> -				   loff_t requested_length,
>> -				   const char __user **buf, size_t *len,
>> -				   loff_t *pos, ssize_t *done)
>> -{
>> -	int ret;
>> -
>> -	if (requested_length > MAX_LOAD_SIZE)
>> -		return -ENOMEM;
>> -
>> -	if (vhca_buf->allocated_length < requested_length) {
>> -		ret = mlx5vf_add_migration_pages(
>> -			vhca_buf,
>> -			DIV_ROUND_UP(requested_length - vhca_buf->allocated_length,
>> -				     PAGE_SIZE));
>> -		if (ret)
>> -			return ret;
>> -	}
>> -
>> -	while (*len) {
>> -		ret = mlx5vf_append_page_to_mig_buf(vhca_buf, buf, len, pos,
>> -						    done);
>> -		if (ret)
>> -			return ret;
>> -	}
>> -
>> -	return 0;
>> -}
>> -
>>   static ssize_t
>>   mlx5vf_resume_read_image(struct mlx5_vf_migration_file *migf,
>>   			 struct mlx5_vhca_data_buffer *vhca_buf,
>> @@ -1038,13 +964,6 @@ static ssize_t mlx5vf_resume_write(struct file *filp, const char __user *buf,
>>   			migf->load_state = MLX5_VF_LOAD_STATE_READ_IMAGE;
>>   			break;
>>   		}
>> -		case MLX5_VF_LOAD_STATE_READ_IMAGE_NO_HEADER:
>> -			ret = mlx5vf_resume_read_image_no_header(vhca_buf,
>> -						requested_length,
>> -						&buf, &len, pos, &done);
>> -			if (ret)
>> -				goto out_unlock;
>> -			break;
>>   		case MLX5_VF_LOAD_STATE_READ_IMAGE:
>>   			ret = mlx5vf_resume_read_image(migf, vhca_buf,
>>   						migf->record_size,
>> @@ -1114,21 +1033,16 @@ mlx5vf_pci_resume_device_data(struct mlx5vf_pci_core_device *mvdev)
>>   	}
>>   
>>   	migf->buf[0] = buf;
>> -	if (MLX5VF_PRE_COPY_SUPP(mvdev)) {
>> -		buf = mlx5vf_alloc_data_buffer(migf,
>> -			sizeof(struct mlx5_vf_migration_header), DMA_NONE);
>> -		if (IS_ERR(buf)) {
>> -			ret = PTR_ERR(buf);
>> -			goto out_buf;
>> -		}
>> -
>> -		migf->buf_header[0] = buf;
>> -		migf->load_state = MLX5_VF_LOAD_STATE_READ_HEADER;
>> -	} else {
>> -		/* Initial state will be to read the image */
>> -		migf->load_state = MLX5_VF_LOAD_STATE_READ_IMAGE_NO_HEADER;
>> +	buf = mlx5vf_alloc_data_buffer(migf,
>> +		sizeof(struct mlx5_vf_migration_header), DMA_NONE);
>> +	if (IS_ERR(buf)) {
>> +		ret = PTR_ERR(buf);
>> +		goto out_buf;
>>   	}
>>   
>> +	migf->buf_header[0] = buf;
>> +	migf->load_state = MLX5_VF_LOAD_STATE_READ_HEADER;
>> +
>>   	stream_open(migf->filp->f_inode, migf->filp);
>>   	mutex_init(&migf->lock);
>>   	INIT_LIST_HEAD(&migf->buf_list);
>> @@ -1262,13 +1176,6 @@ mlx5vf_pci_step_device_state_locked(struct mlx5vf_pci_core_device *mvdev,
>>   	}
>>   
>>   	if (cur == VFIO_DEVICE_STATE_RESUMING && new == VFIO_DEVICE_STATE_STOP) {
>> -		if (!MLX5VF_PRE_COPY_SUPP(mvdev)) {
>> -			ret = mlx5vf_cmd_load_vhca_state(mvdev,
>> -							 mvdev->resuming_migf,
>> -							 mvdev->resuming_migf->buf[0]);
>> -			if (ret)
>> -				return ERR_PTR(ret);
>> -		}
>>   		mlx5vf_disable_fds(mvdev, NULL);
>>   		return NULL;
>>   	}
>
diff mbox series

Patch

diff --git a/drivers/vfio/pci/mlx5/cmd.c b/drivers/vfio/pci/mlx5/cmd.c
index c54bcd5d0917..41a4b0cf4297 100644
--- a/drivers/vfio/pci/mlx5/cmd.c
+++ b/drivers/vfio/pci/mlx5/cmd.c
@@ -233,6 +233,10 @@  void mlx5vf_cmd_set_migratable(struct mlx5vf_pci_core_device *mvdev,
 	if (!MLX5_CAP_GEN(mvdev->mdev, migration))
 		goto end;
 
+	if (!(MLX5_CAP_GEN_2(mvdev->mdev, migration_multi_load) &&
+	      MLX5_CAP_GEN_2(mvdev->mdev, migration_tracking_state)))
+		goto end;
+
 	mvdev->vf_id = pci_iov_vf_id(pdev);
 	if (mvdev->vf_id < 0)
 		goto end;
@@ -262,17 +266,14 @@  void mlx5vf_cmd_set_migratable(struct mlx5vf_pci_core_device *mvdev,
 	mvdev->migrate_cap = 1;
 	mvdev->core_device.vdev.migration_flags =
 		VFIO_MIGRATION_STOP_COPY |
-		VFIO_MIGRATION_P2P;
+		VFIO_MIGRATION_P2P |
+		VFIO_MIGRATION_PRE_COPY;
+
 	mvdev->core_device.vdev.mig_ops = mig_ops;
 	init_completion(&mvdev->tracker_comp);
 	if (MLX5_CAP_GEN(mvdev->mdev, adv_virtualization))
 		mvdev->core_device.vdev.log_ops = log_ops;
 
-	if (MLX5_CAP_GEN_2(mvdev->mdev, migration_multi_load) &&
-	    MLX5_CAP_GEN_2(mvdev->mdev, migration_tracking_state))
-		mvdev->core_device.vdev.migration_flags |=
-			VFIO_MIGRATION_PRE_COPY;
-
 	if (MLX5_CAP_GEN_2(mvdev->mdev, migration_in_chunks))
 		mvdev->chunk_mode = 1;
 
@@ -414,6 +415,50 @@  void mlx5vf_free_data_buffer(struct mlx5_vhca_data_buffer *buf)
 	kfree(buf);
 }
 
+static int mlx5vf_add_migration_pages(struct mlx5_vhca_data_buffer *buf,
+				      unsigned int npages)
+{
+	unsigned int to_alloc = npages;
+	struct page **page_list;
+	unsigned long filled;
+	unsigned int to_fill;
+	int ret;
+
+	to_fill = min_t(unsigned int, npages, PAGE_SIZE / sizeof(*page_list));
+	page_list = kvzalloc(to_fill * sizeof(*page_list), GFP_KERNEL_ACCOUNT);
+	if (!page_list)
+		return -ENOMEM;
+
+	do {
+		filled = alloc_pages_bulk_array(GFP_KERNEL_ACCOUNT, to_fill,
+						page_list);
+		if (!filled) {
+			ret = -ENOMEM;
+			goto err;
+		}
+		to_alloc -= filled;
+		ret = sg_alloc_append_table_from_pages(
+			&buf->table, page_list, filled, 0,
+			filled << PAGE_SHIFT, UINT_MAX, SG_MAX_SINGLE_ALLOC,
+			GFP_KERNEL_ACCOUNT);
+
+		if (ret)
+			goto err;
+		buf->allocated_length += filled * PAGE_SIZE;
+		/* clean input for another bulk allocation */
+		memset(page_list, 0, filled * sizeof(*page_list));
+		to_fill = min_t(unsigned int, to_alloc,
+				PAGE_SIZE / sizeof(*page_list));
+	} while (to_alloc > 0);
+
+	kvfree(page_list);
+	return 0;
+
+err:
+	kvfree(page_list);
+	return ret;
+}
+
 struct mlx5_vhca_data_buffer *
 mlx5vf_alloc_data_buffer(struct mlx5_vf_migration_file *migf,
 			 size_t length,
@@ -680,22 +725,20 @@  int mlx5vf_cmd_save_vhca_state(struct mlx5vf_pci_core_device *mvdev,
 		goto err_out;
 	}
 
-	if (MLX5VF_PRE_COPY_SUPP(mvdev)) {
-		if (async_data->stop_copy_chunk) {
-			u8 header_idx = buf->stop_copy_chunk_num ?
-				buf->stop_copy_chunk_num - 1 : 0;
+	if (async_data->stop_copy_chunk) {
+		u8 header_idx = buf->stop_copy_chunk_num ?
+			buf->stop_copy_chunk_num - 1 : 0;
 
-			header_buf = migf->buf_header[header_idx];
-			migf->buf_header[header_idx] = NULL;
-		}
+		header_buf = migf->buf_header[header_idx];
+		migf->buf_header[header_idx] = NULL;
+	}
 
-		if (!header_buf) {
-			header_buf = mlx5vf_get_data_buffer(migf,
-				sizeof(struct mlx5_vf_migration_header), DMA_NONE);
-			if (IS_ERR(header_buf)) {
-				err = PTR_ERR(header_buf);
-				goto err_free;
-			}
+	if (!header_buf) {
+		header_buf = mlx5vf_get_data_buffer(migf,
+			sizeof(struct mlx5_vf_migration_header), DMA_NONE);
+		if (IS_ERR(header_buf)) {
+			err = PTR_ERR(header_buf);
+			goto err_free;
 		}
 	}
 
diff --git a/drivers/vfio/pci/mlx5/cmd.h b/drivers/vfio/pci/mlx5/cmd.h
index 707393df36c4..df421dc6de04 100644
--- a/drivers/vfio/pci/mlx5/cmd.h
+++ b/drivers/vfio/pci/mlx5/cmd.h
@@ -13,9 +13,6 @@ 
 #include <linux/mlx5/cq.h>
 #include <linux/mlx5/qp.h>
 
-#define MLX5VF_PRE_COPY_SUPP(mvdev) \
-	((mvdev)->core_device.vdev.migration_flags & VFIO_MIGRATION_PRE_COPY)
-
 enum mlx5_vf_migf_state {
 	MLX5_MIGF_STATE_ERROR = 1,
 	MLX5_MIGF_STATE_PRE_COPY_ERROR,
@@ -25,7 +22,6 @@  enum mlx5_vf_migf_state {
 };
 
 enum mlx5_vf_load_state {
-	MLX5_VF_LOAD_STATE_READ_IMAGE_NO_HEADER,
 	MLX5_VF_LOAD_STATE_READ_HEADER,
 	MLX5_VF_LOAD_STATE_PREP_HEADER_DATA,
 	MLX5_VF_LOAD_STATE_READ_HEADER_DATA,
@@ -228,8 +224,6 @@  struct mlx5_vhca_data_buffer *
 mlx5vf_get_data_buffer(struct mlx5_vf_migration_file *migf,
 		       size_t length, enum dma_data_direction dma_dir);
 void mlx5vf_put_data_buffer(struct mlx5_vhca_data_buffer *buf);
-int mlx5vf_add_migration_pages(struct mlx5_vhca_data_buffer *buf,
-			       unsigned int npages);
 struct page *mlx5vf_get_migration_page(struct mlx5_vhca_data_buffer *buf,
 				       unsigned long offset);
 void mlx5vf_state_mutex_unlock(struct mlx5vf_pci_core_device *mvdev);
diff --git a/drivers/vfio/pci/mlx5/main.c b/drivers/vfio/pci/mlx5/main.c
index 3982fcf60cf2..61d9b0f9146d 100644
--- a/drivers/vfio/pci/mlx5/main.c
+++ b/drivers/vfio/pci/mlx5/main.c
@@ -65,50 +65,6 @@  mlx5vf_get_migration_page(struct mlx5_vhca_data_buffer *buf,
 	return NULL;
 }
 
-int mlx5vf_add_migration_pages(struct mlx5_vhca_data_buffer *buf,
-			       unsigned int npages)
-{
-	unsigned int to_alloc = npages;
-	struct page **page_list;
-	unsigned long filled;
-	unsigned int to_fill;
-	int ret;
-
-	to_fill = min_t(unsigned int, npages, PAGE_SIZE / sizeof(*page_list));
-	page_list = kvzalloc(to_fill * sizeof(*page_list), GFP_KERNEL_ACCOUNT);
-	if (!page_list)
-		return -ENOMEM;
-
-	do {
-		filled = alloc_pages_bulk_array(GFP_KERNEL_ACCOUNT, to_fill,
-						page_list);
-		if (!filled) {
-			ret = -ENOMEM;
-			goto err;
-		}
-		to_alloc -= filled;
-		ret = sg_alloc_append_table_from_pages(
-			&buf->table, page_list, filled, 0,
-			filled << PAGE_SHIFT, UINT_MAX, SG_MAX_SINGLE_ALLOC,
-			GFP_KERNEL_ACCOUNT);
-
-		if (ret)
-			goto err;
-		buf->allocated_length += filled * PAGE_SIZE;
-		/* clean input for another bulk allocation */
-		memset(page_list, 0, filled * sizeof(*page_list));
-		to_fill = min_t(unsigned int, to_alloc,
-				PAGE_SIZE / sizeof(*page_list));
-	} while (to_alloc > 0);
-
-	kvfree(page_list);
-	return 0;
-
-err:
-	kvfree(page_list);
-	return ret;
-}
-
 static void mlx5vf_disable_fd(struct mlx5_vf_migration_file *migf)
 {
 	mutex_lock(&migf->lock);
@@ -777,36 +733,6 @@  mlx5vf_append_page_to_mig_buf(struct mlx5_vhca_data_buffer *vhca_buf,
 	return 0;
 }
 
-static int
-mlx5vf_resume_read_image_no_header(struct mlx5_vhca_data_buffer *vhca_buf,
-				   loff_t requested_length,
-				   const char __user **buf, size_t *len,
-				   loff_t *pos, ssize_t *done)
-{
-	int ret;
-
-	if (requested_length > MAX_LOAD_SIZE)
-		return -ENOMEM;
-
-	if (vhca_buf->allocated_length < requested_length) {
-		ret = mlx5vf_add_migration_pages(
-			vhca_buf,
-			DIV_ROUND_UP(requested_length - vhca_buf->allocated_length,
-				     PAGE_SIZE));
-		if (ret)
-			return ret;
-	}
-
-	while (*len) {
-		ret = mlx5vf_append_page_to_mig_buf(vhca_buf, buf, len, pos,
-						    done);
-		if (ret)
-			return ret;
-	}
-
-	return 0;
-}
-
 static ssize_t
 mlx5vf_resume_read_image(struct mlx5_vf_migration_file *migf,
 			 struct mlx5_vhca_data_buffer *vhca_buf,
@@ -1038,13 +964,6 @@  static ssize_t mlx5vf_resume_write(struct file *filp, const char __user *buf,
 			migf->load_state = MLX5_VF_LOAD_STATE_READ_IMAGE;
 			break;
 		}
-		case MLX5_VF_LOAD_STATE_READ_IMAGE_NO_HEADER:
-			ret = mlx5vf_resume_read_image_no_header(vhca_buf,
-						requested_length,
-						&buf, &len, pos, &done);
-			if (ret)
-				goto out_unlock;
-			break;
 		case MLX5_VF_LOAD_STATE_READ_IMAGE:
 			ret = mlx5vf_resume_read_image(migf, vhca_buf,
 						migf->record_size,
@@ -1114,21 +1033,16 @@  mlx5vf_pci_resume_device_data(struct mlx5vf_pci_core_device *mvdev)
 	}
 
 	migf->buf[0] = buf;
-	if (MLX5VF_PRE_COPY_SUPP(mvdev)) {
-		buf = mlx5vf_alloc_data_buffer(migf,
-			sizeof(struct mlx5_vf_migration_header), DMA_NONE);
-		if (IS_ERR(buf)) {
-			ret = PTR_ERR(buf);
-			goto out_buf;
-		}
-
-		migf->buf_header[0] = buf;
-		migf->load_state = MLX5_VF_LOAD_STATE_READ_HEADER;
-	} else {
-		/* Initial state will be to read the image */
-		migf->load_state = MLX5_VF_LOAD_STATE_READ_IMAGE_NO_HEADER;
+	buf = mlx5vf_alloc_data_buffer(migf,
+		sizeof(struct mlx5_vf_migration_header), DMA_NONE);
+	if (IS_ERR(buf)) {
+		ret = PTR_ERR(buf);
+		goto out_buf;
 	}
 
+	migf->buf_header[0] = buf;
+	migf->load_state = MLX5_VF_LOAD_STATE_READ_HEADER;
+
 	stream_open(migf->filp->f_inode, migf->filp);
 	mutex_init(&migf->lock);
 	INIT_LIST_HEAD(&migf->buf_list);
@@ -1262,13 +1176,6 @@  mlx5vf_pci_step_device_state_locked(struct mlx5vf_pci_core_device *mvdev,
 	}
 
 	if (cur == VFIO_DEVICE_STATE_RESUMING && new == VFIO_DEVICE_STATE_STOP) {
-		if (!MLX5VF_PRE_COPY_SUPP(mvdev)) {
-			ret = mlx5vf_cmd_load_vhca_state(mvdev,
-							 mvdev->resuming_migf,
-							 mvdev->resuming_migf->buf[0]);
-			if (ret)
-				return ERR_PTR(ret);
-		}
 		mlx5vf_disable_fds(mvdev, NULL);
 		return NULL;
 	}