diff mbox series

[v4,1/5] hisi_acc_vfio_pci: fix XQE dma address error

Message ID 20250225062757.19692-2-liulongfang@huawei.com (mailing list archive)
State New
Headers show
Series bugfix some driver issues | expand

Commit Message

Longfang Liu Feb. 25, 2025, 6:27 a.m. UTC
The dma addresses of EQE and AEQE are wrong after migration and
results in guest kernel-mode encryption services  failure.
Comparing the definition of hardware registers, we found that
there was an error when the data read from the register was
combined into an address. Therefore, the address combination
sequence needs to be corrected.

Even after fixing the above problem, we still have an issue
where the Guest from an old kernel can get migrated to
new kernel and may result in wrong data.

In order to ensure that the address is correct after migration,
if an old magic number is detected, the dma address needs to be
updated.

Fixes: b0eed085903e ("hisi_acc_vfio_pci: Add support for VFIO live migration")
Signed-off-by: Longfang Liu <liulongfang@huawei.com>
---
 .../vfio/pci/hisilicon/hisi_acc_vfio_pci.c    | 40 ++++++++++++++++---
 .../vfio/pci/hisilicon/hisi_acc_vfio_pci.h    | 14 ++++++-
 2 files changed, 46 insertions(+), 8 deletions(-)

Comments

Alex Williamson Feb. 26, 2025, 12:09 a.m. UTC | #1
On Tue, 25 Feb 2025 14:27:53 +0800
Longfang Liu <liulongfang@huawei.com> wrote:

> The dma addresses of EQE and AEQE are wrong after migration and
> results in guest kernel-mode encryption services  failure.
> Comparing the definition of hardware registers, we found that
> there was an error when the data read from the register was
> combined into an address. Therefore, the address combination
> sequence needs to be corrected.
> 
> Even after fixing the above problem, we still have an issue
> where the Guest from an old kernel can get migrated to
> new kernel and may result in wrong data.
> 
> In order to ensure that the address is correct after migration,
> if an old magic number is detected, the dma address needs to be
> updated.
> 
> Fixes: b0eed085903e ("hisi_acc_vfio_pci: Add support for VFIO live migration")
> Signed-off-by: Longfang Liu <liulongfang@huawei.com>
> ---
>  .../vfio/pci/hisilicon/hisi_acc_vfio_pci.c    | 40 ++++++++++++++++---
>  .../vfio/pci/hisilicon/hisi_acc_vfio_pci.h    | 14 ++++++-
>  2 files changed, 46 insertions(+), 8 deletions(-)
> 
> diff --git a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c
> index 451c639299eb..35316984089b 100644
> --- a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c
> +++ b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c
> @@ -350,6 +350,31 @@ static int vf_qm_func_stop(struct hisi_qm *qm)
>  	return hisi_qm_mb(qm, QM_MB_CMD_PAUSE_QM, 0, 0, 0);
>  }
>  
> +static int vf_qm_version_check(struct acc_vf_data *vf_data, struct device *dev)
> +{
> +	switch (vf_data->acc_magic) {
> +	case ACC_DEV_MAGIC_V2:
> +		if (vf_data->major_ver < ACC_DRV_MAJOR_VER ||
> +		    vf_data->minor_ver < ACC_DRV_MINOR_VER)
> +			dev_info(dev, "migration driver version not match!\n");
> +			return -EINVAL;
> +		break;

What's your major/minor update strategy?

Note that minor_ver is a u16 and ACC_DRV_MINOR_VER is defined as 0, so
testing less than 0 against an unsigned is useless.

Arguably testing major and minor independently is pretty useless too.

You're defining what a future "old" driver version will accept, which
is very nearly anything, so any breaking change *again* requires a new
magic, so we're accomplishing very little here.

Maybe you want to consider a strategy where you'd increment the major
number for a breaking change and minor for a compatible feature.  In
that case you'd want to verify the major_ver matches ACC_DRV_MAJOR_VER
exactly and minor_ver would be more of a feature level.

> +	case ACC_DEV_MAGIC_V1:
> +		/* Correct dma address */
> +		vf_data->eqe_dma = vf_data->qm_eqc_dw[QM_XQC_ADDR_HIGH];
> +		vf_data->eqe_dma <<= QM_XQC_ADDR_OFFSET;
> +		vf_data->eqe_dma |= vf_data->qm_eqc_dw[QM_XQC_ADDR_LOW];
> +		vf_data->aeqe_dma = vf_data->qm_aeqc_dw[QM_XQC_ADDR_HIGH];
> +		vf_data->aeqe_dma <<= QM_XQC_ADDR_OFFSET;
> +		vf_data->aeqe_dma |= vf_data->qm_aeqc_dw[QM_XQC_ADDR_LOW];
> +		break;
> +	default:
> +		return -EINVAL;
> +	}
> +
> +	return 0;
> +}
> +
>  static int vf_qm_check_match(struct hisi_acc_vf_core_device *hisi_acc_vdev,
>  			     struct hisi_acc_vf_migration_file *migf)
>  {
> @@ -363,7 +388,8 @@ static int vf_qm_check_match(struct hisi_acc_vf_core_device *hisi_acc_vdev,
>  	if (migf->total_length < QM_MATCH_SIZE || hisi_acc_vdev->match_done)
>  		return 0;
>  
> -	if (vf_data->acc_magic != ACC_DEV_MAGIC) {
> +	ret = vf_qm_version_check(vf_data, dev);
> +	if (ret) {
>  		dev_err(dev, "failed to match ACC_DEV_MAGIC\n");
>  		return -EINVAL;
>  	}
> @@ -418,7 +444,9 @@ static int vf_qm_get_match_data(struct hisi_acc_vf_core_device *hisi_acc_vdev,
>  	int vf_id = hisi_acc_vdev->vf_id;
>  	int ret;
>  
> -	vf_data->acc_magic = ACC_DEV_MAGIC;
> +	vf_data->acc_magic = ACC_DEV_MAGIC_V2;
> +	vf_data->major_ver = ACC_DRV_MAR;
> +	vf_data->minor_ver = ACC_DRV_MIN;

Where are "MAR" and "MIN" defined?  I can't see how this would even
compile.  Thanks,

Alex

>  	/* Save device id */
>  	vf_data->dev_id = hisi_acc_vdev->vf_dev->device;
>  
> @@ -496,12 +524,12 @@ static int vf_qm_read_data(struct hisi_qm *vf_qm, struct acc_vf_data *vf_data)
>  		return -EINVAL;
>  
>  	/* Every reg is 32 bit, the dma address is 64 bit. */
> -	vf_data->eqe_dma = vf_data->qm_eqc_dw[1];
> +	vf_data->eqe_dma = vf_data->qm_eqc_dw[QM_XQC_ADDR_HIGH];
>  	vf_data->eqe_dma <<= QM_XQC_ADDR_OFFSET;
> -	vf_data->eqe_dma |= vf_data->qm_eqc_dw[0];
> -	vf_data->aeqe_dma = vf_data->qm_aeqc_dw[1];
> +	vf_data->eqe_dma |= vf_data->qm_eqc_dw[QM_XQC_ADDR_LOW];
> +	vf_data->aeqe_dma = vf_data->qm_aeqc_dw[QM_XQC_ADDR_HIGH];
>  	vf_data->aeqe_dma <<= QM_XQC_ADDR_OFFSET;
> -	vf_data->aeqe_dma |= vf_data->qm_aeqc_dw[0];
> +	vf_data->aeqe_dma |= vf_data->qm_aeqc_dw[QM_XQC_ADDR_LOW];
>  
>  	/* Through SQC_BT/CQC_BT to get sqc and cqc address */
>  	ret = qm_get_sqc(vf_qm, &vf_data->sqc_dma);
> diff --git a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.h b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.h
> index 245d7537b2bc..91002ceeebc1 100644
> --- a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.h
> +++ b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.h
> @@ -39,6 +39,9 @@
>  #define QM_REG_ADDR_OFFSET	0x0004
>  
>  #define QM_XQC_ADDR_OFFSET	32U
> +#define QM_XQC_ADDR_LOW	0x1
> +#define QM_XQC_ADDR_HIGH	0x2
> +
>  #define QM_VF_AEQ_INT_MASK	0x0004
>  #define QM_VF_EQ_INT_MASK	0x000c
>  #define QM_IFC_INT_SOURCE_V	0x0020
> @@ -50,10 +53,15 @@
>  #define QM_EQC_DW0		0X8000
>  #define QM_AEQC_DW0		0X8020
>  
> +#define ACC_DRV_MAJOR_VER 1
> +#define ACC_DRV_MINOR_VER 0
> +
> +#define ACC_DEV_MAGIC_V1	0XCDCDCDCDFEEDAACC
> +#define ACC_DEV_MAGIC_V2	0xAACCFEEDDECADEDE
> +
>  struct acc_vf_data {
>  #define QM_MATCH_SIZE offsetofend(struct acc_vf_data, qm_rsv_state)
>  	/* QM match information */
> -#define ACC_DEV_MAGIC	0XCDCDCDCDFEEDAACC
>  	u64 acc_magic;
>  	u32 qp_num;
>  	u32 dev_id;
> @@ -61,7 +69,9 @@ struct acc_vf_data {
>  	u32 qp_base;
>  	u32 vf_qm_state;
>  	/* QM reserved match information */
> -	u32 qm_rsv_state[3];
> +	u16 major_ver;
> +	u16 minor_ver;
> +	u32 qm_rsv_state[2];
>  
>  	/* QM RW regs */
>  	u32 aeq_int_mask;
Shameerali Kolothum Thodi Feb. 26, 2025, 8:11 a.m. UTC | #2
> -----Original Message-----
> From: Alex Williamson <alex.williamson@redhat.com>
> Sent: Wednesday, February 26, 2025 12:10 AM
> To: liulongfang <liulongfang@huawei.com>
> Cc: jgg@nvidia.com; Shameerali Kolothum Thodi
> <shameerali.kolothum.thodi@huawei.com>; Jonathan Cameron
> <jonathan.cameron@huawei.com>; kvm@vger.kernel.org; linux-
> kernel@vger.kernel.org; linuxarm@openeuler.org
> Subject: Re: [PATCH v4 1/5] hisi_acc_vfio_pci: fix XQE dma address error
> 
> On Tue, 25 Feb 2025 14:27:53 +0800
> Longfang Liu <liulongfang@huawei.com> wrote:
> 
> > The dma addresses of EQE and AEQE are wrong after migration and
> > results in guest kernel-mode encryption services  failure.
> > Comparing the definition of hardware registers, we found that
> > there was an error when the data read from the register was
> > combined into an address. Therefore, the address combination
> > sequence needs to be corrected.
> >
> > Even after fixing the above problem, we still have an issue
> > where the Guest from an old kernel can get migrated to
> > new kernel and may result in wrong data.
> >
> > In order to ensure that the address is correct after migration,
> > if an old magic number is detected, the dma address needs to be
> > updated.
> >
> > Fixes: b0eed085903e ("hisi_acc_vfio_pci: Add support for VFIO live
> migration")
> > Signed-off-by: Longfang Liu <liulongfang@huawei.com>
> > ---
> >  .../vfio/pci/hisilicon/hisi_acc_vfio_pci.c    | 40 ++++++++++++++++---
> >  .../vfio/pci/hisilicon/hisi_acc_vfio_pci.h    | 14 ++++++-
> >  2 files changed, 46 insertions(+), 8 deletions(-)
> >
> > diff --git a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c
> b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c
> > index 451c639299eb..35316984089b 100644
> > --- a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c
> > +++ b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c
> > @@ -350,6 +350,31 @@ static int vf_qm_func_stop(struct hisi_qm *qm)
> >  	return hisi_qm_mb(qm, QM_MB_CMD_PAUSE_QM, 0, 0, 0);
> >  }
> >
> > +static int vf_qm_version_check(struct acc_vf_data *vf_data, struct device
> *dev)
> > +{
> > +	switch (vf_data->acc_magic) {
> > +	case ACC_DEV_MAGIC_V2:
> > +		if (vf_data->major_ver < ACC_DRV_MAJOR_VER ||
> > +		    vf_data->minor_ver < ACC_DRV_MINOR_VER)
> > +			dev_info(dev, "migration driver version not
> match!\n");
> > +			return -EINVAL;
> > +		break;
> 
> What's your major/minor update strategy?
> 
> Note that minor_ver is a u16 and ACC_DRV_MINOR_VER is defined as 0, so
> testing less than 0 against an unsigned is useless.
> 
> Arguably testing major and minor independently is pretty useless too.
> 
> You're defining what a future "old" driver version will accept, which
> is very nearly anything, so any breaking change *again* requires a new
> magic, so we're accomplishing very little here.
> 
> Maybe you want to consider a strategy where you'd increment the major
> number for a breaking change and minor for a compatible feature.  In
> that case you'd want to verify the major_ver matches
> ACC_DRV_MAJOR_VER
> exactly and minor_ver would be more of a feature level.

Agree. I think the above check should be just major_ver != ACC_DRV_MAJOR_VER
and we can make use of minor version whenever we need a specific handling for
a feature support.

Also I think it would be good to print the version info above in case of mismatch.

> 
> > +	case ACC_DEV_MAGIC_V1:
> > +		/* Correct dma address */
> > +		vf_data->eqe_dma = vf_data-
> >qm_eqc_dw[QM_XQC_ADDR_HIGH];
> > +		vf_data->eqe_dma <<= QM_XQC_ADDR_OFFSET;
> > +		vf_data->eqe_dma |= vf_data-
> >qm_eqc_dw[QM_XQC_ADDR_LOW];
> > +		vf_data->aeqe_dma = vf_data-
> >qm_aeqc_dw[QM_XQC_ADDR_HIGH];
> > +		vf_data->aeqe_dma <<= QM_XQC_ADDR_OFFSET;
> > +		vf_data->aeqe_dma |= vf_data-
> >qm_aeqc_dw[QM_XQC_ADDR_LOW];
> > +		break;
> > +	default:
> > +		return -EINVAL;
> > +	}
> > +
> > +	return 0;
> > +}
> > +
> >  static int vf_qm_check_match(struct hisi_acc_vf_core_device
> *hisi_acc_vdev,
> >  			     struct hisi_acc_vf_migration_file *migf)
> >  {
> > @@ -363,7 +388,8 @@ static int vf_qm_check_match(struct
> hisi_acc_vf_core_device *hisi_acc_vdev,
> >  	if (migf->total_length < QM_MATCH_SIZE || hisi_acc_vdev-
> >match_done)
> >  		return 0;
> >
> > -	if (vf_data->acc_magic != ACC_DEV_MAGIC) {
> > +	ret = vf_qm_version_check(vf_data, dev);
> > +	if (ret) {
> >  		dev_err(dev, "failed to match ACC_DEV_MAGIC\n");
> >  		return -EINVAL;
> >  	}
> > @@ -418,7 +444,9 @@ static int vf_qm_get_match_data(struct
> hisi_acc_vf_core_device *hisi_acc_vdev,
> >  	int vf_id = hisi_acc_vdev->vf_id;
> >  	int ret;
> >
> > -	vf_data->acc_magic = ACC_DEV_MAGIC;
> > +	vf_data->acc_magic = ACC_DEV_MAGIC_V2;
> > +	vf_data->major_ver = ACC_DRV_MAR;
> > +	vf_data->minor_ver = ACC_DRV_MIN;
> 
> Where are "MAR" and "MIN" defined?  I can't see how this would even
> compile.  Thanks,

Yes. Please  make sure to do a compile test and run basic sanity tested even if you
think the changes are minor. Chances are there that you will miss out things like
this.

Thanks,
Shameer
Longfang Liu Feb. 26, 2025, 11:21 a.m. UTC | #3
On 2025/2/25 14:27, Longfang Liu wrote:
> The dma addresses of EQE and AEQE are wrong after migration and
> results in guest kernel-mode encryption services  failure.
> Comparing the definition of hardware registers, we found that
> there was an error when the data read from the register was
> combined into an address. Therefore, the address combination
> sequence needs to be corrected.
> 
> Even after fixing the above problem, we still have an issue
> where the Guest from an old kernel can get migrated to
> new kernel and may result in wrong data.
> 
> In order to ensure that the address is correct after migration,
> if an old magic number is detected, the dma address needs to be
> updated.
> 
> Fixes: b0eed085903e ("hisi_acc_vfio_pci: Add support for VFIO live migration")
> Signed-off-by: Longfang Liu <liulongfang@huawei.com>
> ---
>  .../vfio/pci/hisilicon/hisi_acc_vfio_pci.c    | 40 ++++++++++++++++---
>  .../vfio/pci/hisilicon/hisi_acc_vfio_pci.h    | 14 ++++++-
>  2 files changed, 46 insertions(+), 8 deletions(-)
> 
> diff --git a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c
> index 451c639299eb..35316984089b 100644
> --- a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c
> +++ b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c
> @@ -350,6 +350,31 @@ static int vf_qm_func_stop(struct hisi_qm *qm)
>  	return hisi_qm_mb(qm, QM_MB_CMD_PAUSE_QM, 0, 0, 0);
>  }
>  
> +static int vf_qm_version_check(struct acc_vf_data *vf_data, struct device *dev)
> +{
> +	switch (vf_data->acc_magic) {
> +	case ACC_DEV_MAGIC_V2:
> +		if (vf_data->major_ver < ACC_DRV_MAJOR_VER ||
> +		    vf_data->minor_ver < ACC_DRV_MINOR_VER)
> +			dev_info(dev, "migration driver version not match!\n");
> +			return -EINVAL;
> +		break;
> +	case ACC_DEV_MAGIC_V1:
> +		/* Correct dma address */
> +		vf_data->eqe_dma = vf_data->qm_eqc_dw[QM_XQC_ADDR_HIGH];
> +		vf_data->eqe_dma <<= QM_XQC_ADDR_OFFSET;
> +		vf_data->eqe_dma |= vf_data->qm_eqc_dw[QM_XQC_ADDR_LOW];
> +		vf_data->aeqe_dma = vf_data->qm_aeqc_dw[QM_XQC_ADDR_HIGH];
> +		vf_data->aeqe_dma <<= QM_XQC_ADDR_OFFSET;
> +		vf_data->aeqe_dma |= vf_data->qm_aeqc_dw[QM_XQC_ADDR_LOW];
> +		break;
> +	default:
> +		return -EINVAL;
> +	}
> +
> +	return 0;
> +}
> +
>  static int vf_qm_check_match(struct hisi_acc_vf_core_device *hisi_acc_vdev,
>  			     struct hisi_acc_vf_migration_file *migf)
>  {
> @@ -363,7 +388,8 @@ static int vf_qm_check_match(struct hisi_acc_vf_core_device *hisi_acc_vdev,
>  	if (migf->total_length < QM_MATCH_SIZE || hisi_acc_vdev->match_done)
>  		return 0;
>  
> -	if (vf_data->acc_magic != ACC_DEV_MAGIC) {
> +	ret = vf_qm_version_check(vf_data, dev);
> +	if (ret) {
>  		dev_err(dev, "failed to match ACC_DEV_MAGIC\n");
>  		return -EINVAL;
>  	}
> @@ -418,7 +444,9 @@ static int vf_qm_get_match_data(struct hisi_acc_vf_core_device *hisi_acc_vdev,
>  	int vf_id = hisi_acc_vdev->vf_id;
>  	int ret;
>  
> -	vf_data->acc_magic = ACC_DEV_MAGIC;
> +	vf_data->acc_magic = ACC_DEV_MAGIC_V2;
> +	vf_data->major_ver = ACC_DRV_MAR;
> +	vf_data->minor_ver = ACC_DRV_MIN;

The values ​​here should be ACC_DRV_MAJOR_VER and ACC_DRV_MINOR_VER
I will fix this in the next version.

Thanks.
Longfang.

>  	/* Save device id */
>  	vf_data->dev_id = hisi_acc_vdev->vf_dev->device;
>  
> @@ -496,12 +524,12 @@ static int vf_qm_read_data(struct hisi_qm *vf_qm, struct acc_vf_data *vf_data)
>  		return -EINVAL;
>  
>  	/* Every reg is 32 bit, the dma address is 64 bit. */
> -	vf_data->eqe_dma = vf_data->qm_eqc_dw[1];
> +	vf_data->eqe_dma = vf_data->qm_eqc_dw[QM_XQC_ADDR_HIGH];
>  	vf_data->eqe_dma <<= QM_XQC_ADDR_OFFSET;
> -	vf_data->eqe_dma |= vf_data->qm_eqc_dw[0];
> -	vf_data->aeqe_dma = vf_data->qm_aeqc_dw[1];
> +	vf_data->eqe_dma |= vf_data->qm_eqc_dw[QM_XQC_ADDR_LOW];
> +	vf_data->aeqe_dma = vf_data->qm_aeqc_dw[QM_XQC_ADDR_HIGH];
>  	vf_data->aeqe_dma <<= QM_XQC_ADDR_OFFSET;
> -	vf_data->aeqe_dma |= vf_data->qm_aeqc_dw[0];
> +	vf_data->aeqe_dma |= vf_data->qm_aeqc_dw[QM_XQC_ADDR_LOW];
>  
>  	/* Through SQC_BT/CQC_BT to get sqc and cqc address */
>  	ret = qm_get_sqc(vf_qm, &vf_data->sqc_dma);
> diff --git a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.h b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.h
> index 245d7537b2bc..91002ceeebc1 100644
> --- a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.h
> +++ b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.h
> @@ -39,6 +39,9 @@
>  #define QM_REG_ADDR_OFFSET	0x0004
>  
>  #define QM_XQC_ADDR_OFFSET	32U
> +#define QM_XQC_ADDR_LOW	0x1
> +#define QM_XQC_ADDR_HIGH	0x2
> +
>  #define QM_VF_AEQ_INT_MASK	0x0004
>  #define QM_VF_EQ_INT_MASK	0x000c
>  #define QM_IFC_INT_SOURCE_V	0x0020
> @@ -50,10 +53,15 @@
>  #define QM_EQC_DW0		0X8000
>  #define QM_AEQC_DW0		0X8020
>  
> +#define ACC_DRV_MAJOR_VER 1
> +#define ACC_DRV_MINOR_VER 0
> +
> +#define ACC_DEV_MAGIC_V1	0XCDCDCDCDFEEDAACC
> +#define ACC_DEV_MAGIC_V2	0xAACCFEEDDECADEDE
> +
>  struct acc_vf_data {
>  #define QM_MATCH_SIZE offsetofend(struct acc_vf_data, qm_rsv_state)
>  	/* QM match information */
> -#define ACC_DEV_MAGIC	0XCDCDCDCDFEEDAACC
>  	u64 acc_magic;
>  	u32 qp_num;
>  	u32 dev_id;
> @@ -61,7 +69,9 @@ struct acc_vf_data {
>  	u32 qp_base;
>  	u32 vf_qm_state;
>  	/* QM reserved match information */
> -	u32 qm_rsv_state[3];
> +	u16 major_ver;
> +	u16 minor_ver;
> +	u32 qm_rsv_state[2];
>  
>  	/* QM RW regs */
>  	u32 aeq_int_mask;
>
Longfang Liu Feb. 26, 2025, 11:41 a.m. UTC | #4
On 2025/2/26 16:11, Shameerali Kolothum Thodi wrote:
> 
> 
>> -----Original Message-----
>> From: Alex Williamson <alex.williamson@redhat.com>
>> Sent: Wednesday, February 26, 2025 12:10 AM
>> To: liulongfang <liulongfang@huawei.com>
>> Cc: jgg@nvidia.com; Shameerali Kolothum Thodi
>> <shameerali.kolothum.thodi@huawei.com>; Jonathan Cameron
>> <jonathan.cameron@huawei.com>; kvm@vger.kernel.org; linux-
>> kernel@vger.kernel.org; linuxarm@openeuler.org
>> Subject: Re: [PATCH v4 1/5] hisi_acc_vfio_pci: fix XQE dma address error
>>
>> On Tue, 25 Feb 2025 14:27:53 +0800
>> Longfang Liu <liulongfang@huawei.com> wrote:
>>
>>> The dma addresses of EQE and AEQE are wrong after migration and
>>> results in guest kernel-mode encryption services  failure.
>>> Comparing the definition of hardware registers, we found that
>>> there was an error when the data read from the register was
>>> combined into an address. Therefore, the address combination
>>> sequence needs to be corrected.
>>>
>>> Even after fixing the above problem, we still have an issue
>>> where the Guest from an old kernel can get migrated to
>>> new kernel and may result in wrong data.
>>>
>>> In order to ensure that the address is correct after migration,
>>> if an old magic number is detected, the dma address needs to be
>>> updated.
>>>
>>> Fixes: b0eed085903e ("hisi_acc_vfio_pci: Add support for VFIO live
>> migration")
>>> Signed-off-by: Longfang Liu <liulongfang@huawei.com>
>>> ---
>>>  .../vfio/pci/hisilicon/hisi_acc_vfio_pci.c    | 40 ++++++++++++++++---
>>>  .../vfio/pci/hisilicon/hisi_acc_vfio_pci.h    | 14 ++++++-
>>>  2 files changed, 46 insertions(+), 8 deletions(-)
>>>
>>> diff --git a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c
>> b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c
>>> index 451c639299eb..35316984089b 100644
>>> --- a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c
>>> +++ b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c
>>> @@ -350,6 +350,31 @@ static int vf_qm_func_stop(struct hisi_qm *qm)
>>>  	return hisi_qm_mb(qm, QM_MB_CMD_PAUSE_QM, 0, 0, 0);
>>>  }
>>>
>>> +static int vf_qm_version_check(struct acc_vf_data *vf_data, struct device
>> *dev)
>>> +{
>>> +	switch (vf_data->acc_magic) {
>>> +	case ACC_DEV_MAGIC_V2:
>>> +		if (vf_data->major_ver < ACC_DRV_MAJOR_VER ||
>>> +		    vf_data->minor_ver < ACC_DRV_MINOR_VER)
>>> +			dev_info(dev, "migration driver version not
>> match!\n");
>>> +			return -EINVAL;
>>> +		break;
>>
>> What's your major/minor update strategy?
>>
>> Note that minor_ver is a u16 and ACC_DRV_MINOR_VER is defined as 0, so
>> testing less than 0 against an unsigned is useless.
>>
>> Arguably testing major and minor independently is pretty useless too.
>>
>> You're defining what a future "old" driver version will accept, which
>> is very nearly anything, so any breaking change *again* requires a new
>> magic, so we're accomplishing very little here.
>>
>> Maybe you want to consider a strategy where you'd increment the major
>> number for a breaking change and minor for a compatible feature.  In
>> that case you'd want to verify the major_ver matches
>> ACC_DRV_MAJOR_VER
>> exactly and minor_ver would be more of a feature level.
> 
> Agree. I think the above check should be just major_ver != ACC_DRV_MAJOR_VER
> and we can make use of minor version whenever we need a specific handling for
> a feature support.
>

Well, that's a good way.
We only use minor_ver to record important updates of the driver. When there is
an important update, minor_ver increases by 1.
Major_ver is used to distinguish migration versions. After major_ver is updated,
minor_ver returns to 0.

Therefore, we can change it to only check major_ver.

> Also I think it would be good to print the version info above in case of mismatch.
>

OK.
Thanks.
Longfang.

>>
>>> +	case ACC_DEV_MAGIC_V1:
>>> +		/* Correct dma address */
>>> +		vf_data->eqe_dma = vf_data-
>>> qm_eqc_dw[QM_XQC_ADDR_HIGH];
>>> +		vf_data->eqe_dma <<= QM_XQC_ADDR_OFFSET;
>>> +		vf_data->eqe_dma |= vf_data-
>>> qm_eqc_dw[QM_XQC_ADDR_LOW];
>>> +		vf_data->aeqe_dma = vf_data-
>>> qm_aeqc_dw[QM_XQC_ADDR_HIGH];
>>> +		vf_data->aeqe_dma <<= QM_XQC_ADDR_OFFSET;
>>> +		vf_data->aeqe_dma |= vf_data-
>>> qm_aeqc_dw[QM_XQC_ADDR_LOW];
>>> +		break;
>>> +	default:
>>> +		return -EINVAL;
>>> +	}
>>> +
>>> +	return 0;
>>> +}
>>> +
>>>  static int vf_qm_check_match(struct hisi_acc_vf_core_device
>> *hisi_acc_vdev,
>>>  			     struct hisi_acc_vf_migration_file *migf)
>>>  {
>>> @@ -363,7 +388,8 @@ static int vf_qm_check_match(struct
>> hisi_acc_vf_core_device *hisi_acc_vdev,
>>>  	if (migf->total_length < QM_MATCH_SIZE || hisi_acc_vdev-
>>> match_done)
>>>  		return 0;
>>>
>>> -	if (vf_data->acc_magic != ACC_DEV_MAGIC) {
>>> +	ret = vf_qm_version_check(vf_data, dev);
>>> +	if (ret) {
>>>  		dev_err(dev, "failed to match ACC_DEV_MAGIC\n");
>>>  		return -EINVAL;
>>>  	}
>>> @@ -418,7 +444,9 @@ static int vf_qm_get_match_data(struct
>> hisi_acc_vf_core_device *hisi_acc_vdev,
>>>  	int vf_id = hisi_acc_vdev->vf_id;
>>>  	int ret;
>>>
>>> -	vf_data->acc_magic = ACC_DEV_MAGIC;
>>> +	vf_data->acc_magic = ACC_DEV_MAGIC_V2;
>>> +	vf_data->major_ver = ACC_DRV_MAR;
>>> +	vf_data->minor_ver = ACC_DRV_MIN;
>>
>> Where are "MAR" and "MIN" defined?  I can't see how this would even
>> compile.  Thanks,
> 
> Yes. Please  make sure to do a compile test and run basic sanity tested even if you
> think the changes are minor. Chances are there that you will miss out things like
> this.
> 
> Thanks,
> Shameer
>  
> .
>
kernel test robot Feb. 27, 2025, 6 p.m. UTC | #5
Hi Longfang,

kernel test robot noticed the following build warnings:

[auto build test WARNING on awilliam-vfio/next]
[also build test WARNING on awilliam-vfio/for-linus linus/master v6.14-rc4 next-20250227]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Longfang-Liu/hisi_acc_vfio_pci-fix-XQE-dma-address-error/20250225-143347
base:   https://github.com/awilliam/linux-vfio.git next
patch link:    https://lore.kernel.org/r/20250225062757.19692-2-liulongfang%40huawei.com
patch subject: [PATCH v4 1/5] hisi_acc_vfio_pci: fix XQE dma address error
config: loongarch-allyesconfig (https://download.01.org/0day-ci/archive/20250228/202502280126.kuSX5nFF-lkp@intel.com/config)
compiler: loongarch64-linux-gcc (GCC) 14.2.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20250228/202502280126.kuSX5nFF-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202502280126.kuSX5nFF-lkp@intel.com/

All warnings (new ones prefixed by >>):

   drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c: In function 'vf_qm_version_check':
>> drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c:357:17: warning: this 'if' clause does not guard... [-Wmisleading-indentation]
     357 |                 if (vf_data->major_ver < ACC_DRV_MAJOR_VER ||
         |                 ^~
   drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c:360:25: note: ...this statement, but the latter is misleadingly indented as if it were guarded by the 'if'
     360 |                         return -EINVAL;
         |                         ^~~~~~
   drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c: In function 'vf_qm_get_match_data':
   drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c:448:30: error: 'ACC_DRV_MAR' undeclared (first use in this function)
     448 |         vf_data->major_ver = ACC_DRV_MAR;
         |                              ^~~~~~~~~~~
   drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c:448:30: note: each undeclared identifier is reported only once for each function it appears in
   drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c:449:30: error: 'ACC_DRV_MIN' undeclared (first use in this function)
     449 |         vf_data->minor_ver = ACC_DRV_MIN;
         |                              ^~~~~~~~~~~


vim +/if +357 drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c

   352	
   353	static int vf_qm_version_check(struct acc_vf_data *vf_data, struct device *dev)
   354	{
   355		switch (vf_data->acc_magic) {
   356		case ACC_DEV_MAGIC_V2:
 > 357			if (vf_data->major_ver < ACC_DRV_MAJOR_VER ||
   358			    vf_data->minor_ver < ACC_DRV_MINOR_VER)
   359				dev_info(dev, "migration driver version not match!\n");
   360				return -EINVAL;
   361			break;
   362		case ACC_DEV_MAGIC_V1:
   363			/* Correct dma address */
   364			vf_data->eqe_dma = vf_data->qm_eqc_dw[QM_XQC_ADDR_HIGH];
   365			vf_data->eqe_dma <<= QM_XQC_ADDR_OFFSET;
   366			vf_data->eqe_dma |= vf_data->qm_eqc_dw[QM_XQC_ADDR_LOW];
   367			vf_data->aeqe_dma = vf_data->qm_aeqc_dw[QM_XQC_ADDR_HIGH];
   368			vf_data->aeqe_dma <<= QM_XQC_ADDR_OFFSET;
   369			vf_data->aeqe_dma |= vf_data->qm_aeqc_dw[QM_XQC_ADDR_LOW];
   370			break;
   371		default:
   372			return -EINVAL;
   373		}
   374	
   375		return 0;
   376	}
   377
kernel test robot Feb. 28, 2025, 11:55 a.m. UTC | #6
Hi Longfang,

kernel test robot noticed the following build errors:

[auto build test ERROR on awilliam-vfio/next]
[also build test ERROR on awilliam-vfio/for-linus linus/master v6.14-rc4 next-20250227]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Longfang-Liu/hisi_acc_vfio_pci-fix-XQE-dma-address-error/20250225-143347
base:   https://github.com/awilliam/linux-vfio.git next
patch link:    https://lore.kernel.org/r/20250225062757.19692-2-liulongfang%40huawei.com
patch subject: [PATCH v4 1/5] hisi_acc_vfio_pci: fix XQE dma address error
config: x86_64-allyesconfig (https://download.01.org/0day-ci/archive/20250228/202502281952.Z9JQ8jcK-lkp@intel.com/config)
compiler: clang version 19.1.7 (https://github.com/llvm/llvm-project cd708029e0b2869e80abe31ddb175f7c35361f90)
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20250228/202502281952.Z9JQ8jcK-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202502281952.Z9JQ8jcK-lkp@intel.com/

All errors (new ones prefixed by >>):

   In file included from drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c:9:
   In file included from include/linux/hisi_acc_qm.h:10:
   In file included from include/linux/pci.h:1644:
   In file included from include/linux/dmapool.h:14:
   In file included from include/linux/scatterlist.h:8:
   In file included from include/linux/mm.h:2224:
   include/linux/vmstat.h:504:43: warning: arithmetic between different enumeration types ('enum zone_stat_item' and 'enum numa_stat_item') [-Wenum-enum-conversion]
     504 |         return vmstat_text[NR_VM_ZONE_STAT_ITEMS +
         |                            ~~~~~~~~~~~~~~~~~~~~~ ^
     505 |                            item];
         |                            ~~~~
   include/linux/vmstat.h:511:43: warning: arithmetic between different enumeration types ('enum zone_stat_item' and 'enum numa_stat_item') [-Wenum-enum-conversion]
     511 |         return vmstat_text[NR_VM_ZONE_STAT_ITEMS +
         |                            ~~~~~~~~~~~~~~~~~~~~~ ^
     512 |                            NR_VM_NUMA_EVENT_ITEMS +
         |                            ~~~~~~~~~~~~~~~~~~~~~~
   include/linux/vmstat.h:524:43: warning: arithmetic between different enumeration types ('enum zone_stat_item' and 'enum numa_stat_item') [-Wenum-enum-conversion]
     524 |         return vmstat_text[NR_VM_ZONE_STAT_ITEMS +
         |                            ~~~~~~~~~~~~~~~~~~~~~ ^
     525 |                            NR_VM_NUMA_EVENT_ITEMS +
         |                            ~~~~~~~~~~~~~~~~~~~~~~
>> drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c:448:23: error: use of undeclared identifier 'ACC_DRV_MAR'
     448 |         vf_data->major_ver = ACC_DRV_MAR;
         |                              ^
>> drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c:449:23: error: use of undeclared identifier 'ACC_DRV_MIN'
     449 |         vf_data->minor_ver = ACC_DRV_MIN;
         |                              ^
   3 warnings and 2 errors generated.


vim +/ACC_DRV_MAR +448 drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c

   438	
   439	static int vf_qm_get_match_data(struct hisi_acc_vf_core_device *hisi_acc_vdev,
   440					struct acc_vf_data *vf_data)
   441	{
   442		struct hisi_qm *pf_qm = hisi_acc_vdev->pf_qm;
   443		struct device *dev = &pf_qm->pdev->dev;
   444		int vf_id = hisi_acc_vdev->vf_id;
   445		int ret;
   446	
   447		vf_data->acc_magic = ACC_DEV_MAGIC_V2;
 > 448		vf_data->major_ver = ACC_DRV_MAR;
 > 449		vf_data->minor_ver = ACC_DRV_MIN;
   450		/* Save device id */
   451		vf_data->dev_id = hisi_acc_vdev->vf_dev->device;
   452	
   453		/* VF qp num save from PF */
   454		ret = pf_qm_get_qp_num(pf_qm, vf_id, &vf_data->qp_base);
   455		if (ret <= 0) {
   456			dev_err(dev, "failed to get vft qp nums!\n");
   457			return -EINVAL;
   458		}
   459	
   460		vf_data->qp_num = ret;
   461	
   462		/* VF isolation state save from PF */
   463		ret = qm_read_regs(pf_qm, QM_QUE_ISO_CFG_V, &vf_data->que_iso_cfg, 1);
   464		if (ret) {
   465			dev_err(dev, "failed to read QM_QUE_ISO_CFG_V!\n");
   466			return ret;
   467		}
   468	
   469		return 0;
   470	}
   471
Longfang Liu March 3, 2025, 11:14 a.m. UTC | #7
On 2025/2/28 19:55, kernel test robot wrote:
> Hi Longfang,
> 
> kernel test robot noticed the following build errors:
> 
> [auto build test ERROR on awilliam-vfio/next]
> [also build test ERROR on awilliam-vfio/for-linus linus/master v6.14-rc4 next-20250227]
> [If your patch is applied to the wrong git tree, kindly drop us a note.
> And when submitting patch, we suggest to use '--base' as documented in
> https://git-scm.com/docs/git-format-patch#_base_tree_information]
> 
> url:    https://github.com/intel-lab-lkp/linux/commits/Longfang-Liu/hisi_acc_vfio_pci-fix-XQE-dma-address-error/20250225-143347
> base:   https://github.com/awilliam/linux-vfio.git next
> patch link:    https://lore.kernel.org/r/20250225062757.19692-2-liulongfang%40huawei.com
> patch subject: [PATCH v4 1/5] hisi_acc_vfio_pci: fix XQE dma address error
> config: x86_64-allyesconfig (https://download.01.org/0day-ci/archive/20250228/202502281952.Z9JQ8jcK-lkp@intel.com/config)
> compiler: clang version 19.1.7 (https://github.com/llvm/llvm-project cd708029e0b2869e80abe31ddb175f7c35361f90)
> reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20250228/202502281952.Z9JQ8jcK-lkp@intel.com/reproduce)
> 
> If you fix the issue in a separate patch/commit (i.e. not just a new version of
> the same patch/commit), kindly add following tags
> | Reported-by: kernel test robot <lkp@intel.com>
> | Closes: https://lore.kernel.org/oe-kbuild-all/202502281952.Z9JQ8jcK-lkp@intel.com/
> 
> All errors (new ones prefixed by >>):
> 
>    In file included from drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c:9:
>    In file included from include/linux/hisi_acc_qm.h:10:
>    In file included from include/linux/pci.h:1644:
>    In file included from include/linux/dmapool.h:14:
>    In file included from include/linux/scatterlist.h:8:
>    In file included from include/linux/mm.h:2224:
>    include/linux/vmstat.h:504:43: warning: arithmetic between different enumeration types ('enum zone_stat_item' and 'enum numa_stat_item') [-Wenum-enum-conversion]
>      504 |         return vmstat_text[NR_VM_ZONE_STAT_ITEMS +
>          |                            ~~~~~~~~~~~~~~~~~~~~~ ^
>      505 |                            item];
>          |                            ~~~~
>    include/linux/vmstat.h:511:43: warning: arithmetic between different enumeration types ('enum zone_stat_item' and 'enum numa_stat_item') [-Wenum-enum-conversion]
>      511 |         return vmstat_text[NR_VM_ZONE_STAT_ITEMS +
>          |                            ~~~~~~~~~~~~~~~~~~~~~ ^
>      512 |                            NR_VM_NUMA_EVENT_ITEMS +
>          |                            ~~~~~~~~~~~~~~~~~~~~~~
>    include/linux/vmstat.h:524:43: warning: arithmetic between different enumeration types ('enum zone_stat_item' and 'enum numa_stat_item') [-Wenum-enum-conversion]
>      524 |         return vmstat_text[NR_VM_ZONE_STAT_ITEMS +
>          |                            ~~~~~~~~~~~~~~~~~~~~~ ^
>      525 |                            NR_VM_NUMA_EVENT_ITEMS +
>          |                            ~~~~~~~~~~~~~~~~~~~~~~
>>> drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c:448:23: error: use of undeclared identifier 'ACC_DRV_MAR'
>      448 |         vf_data->major_ver = ACC_DRV_MAR;
>          |                              ^
>>> drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c:449:23: error: use of undeclared identifier 'ACC_DRV_MIN'
>      449 |         vf_data->minor_ver = ACC_DRV_MIN;
>          |                              ^
>    3 warnings and 2 errors generated.
> 
> 
> vim +/ACC_DRV_MAR +448 drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c
> 
>    438	
>    439	static int vf_qm_get_match_data(struct hisi_acc_vf_core_device *hisi_acc_vdev,
>    440					struct acc_vf_data *vf_data)
>    441	{
>    442		struct hisi_qm *pf_qm = hisi_acc_vdev->pf_qm;
>    443		struct device *dev = &pf_qm->pdev->dev;
>    444		int vf_id = hisi_acc_vdev->vf_id;
>    445		int ret;
>    446	
>    447		vf_data->acc_magic = ACC_DEV_MAGIC_V2;
>  > 448		vf_data->major_ver = ACC_DRV_MAR;
>  > 449		vf_data->minor_ver = ACC_DRV_MIN;
>    450		/* Save device id */
>    451		vf_data->dev_id = hisi_acc_vdev->vf_dev->device;
>    452	
>    453		/* VF qp num save from PF */
>    454		ret = pf_qm_get_qp_num(pf_qm, vf_id, &vf_data->qp_base);
>    455		if (ret <= 0) {
>    456			dev_err(dev, "failed to get vft qp nums!\n");
>    457			return -EINVAL;
>    458		}
>    459	
>    460		vf_data->qp_num = ret;
>    461	
>    462		/* VF isolation state save from PF */
>    463		ret = qm_read_regs(pf_qm, QM_QUE_ISO_CFG_V, &vf_data->que_iso_cfg, 1);
>    464		if (ret) {
>    465			dev_err(dev, "failed to read QM_QUE_ISO_CFG_V!\n");
>    466			return ret;
>    467		}
>    468	
>    469		return 0;
>    470	}
>    471	
> 

Thank you for your test, I will fix it in the next version.

Thanks.
Longfang.
diff mbox series

Patch

diff --git a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c
index 451c639299eb..35316984089b 100644
--- a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c
+++ b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c
@@ -350,6 +350,31 @@  static int vf_qm_func_stop(struct hisi_qm *qm)
 	return hisi_qm_mb(qm, QM_MB_CMD_PAUSE_QM, 0, 0, 0);
 }
 
+static int vf_qm_version_check(struct acc_vf_data *vf_data, struct device *dev)
+{
+	switch (vf_data->acc_magic) {
+	case ACC_DEV_MAGIC_V2:
+		if (vf_data->major_ver < ACC_DRV_MAJOR_VER ||
+		    vf_data->minor_ver < ACC_DRV_MINOR_VER)
+			dev_info(dev, "migration driver version not match!\n");
+			return -EINVAL;
+		break;
+	case ACC_DEV_MAGIC_V1:
+		/* Correct dma address */
+		vf_data->eqe_dma = vf_data->qm_eqc_dw[QM_XQC_ADDR_HIGH];
+		vf_data->eqe_dma <<= QM_XQC_ADDR_OFFSET;
+		vf_data->eqe_dma |= vf_data->qm_eqc_dw[QM_XQC_ADDR_LOW];
+		vf_data->aeqe_dma = vf_data->qm_aeqc_dw[QM_XQC_ADDR_HIGH];
+		vf_data->aeqe_dma <<= QM_XQC_ADDR_OFFSET;
+		vf_data->aeqe_dma |= vf_data->qm_aeqc_dw[QM_XQC_ADDR_LOW];
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
 static int vf_qm_check_match(struct hisi_acc_vf_core_device *hisi_acc_vdev,
 			     struct hisi_acc_vf_migration_file *migf)
 {
@@ -363,7 +388,8 @@  static int vf_qm_check_match(struct hisi_acc_vf_core_device *hisi_acc_vdev,
 	if (migf->total_length < QM_MATCH_SIZE || hisi_acc_vdev->match_done)
 		return 0;
 
-	if (vf_data->acc_magic != ACC_DEV_MAGIC) {
+	ret = vf_qm_version_check(vf_data, dev);
+	if (ret) {
 		dev_err(dev, "failed to match ACC_DEV_MAGIC\n");
 		return -EINVAL;
 	}
@@ -418,7 +444,9 @@  static int vf_qm_get_match_data(struct hisi_acc_vf_core_device *hisi_acc_vdev,
 	int vf_id = hisi_acc_vdev->vf_id;
 	int ret;
 
-	vf_data->acc_magic = ACC_DEV_MAGIC;
+	vf_data->acc_magic = ACC_DEV_MAGIC_V2;
+	vf_data->major_ver = ACC_DRV_MAR;
+	vf_data->minor_ver = ACC_DRV_MIN;
 	/* Save device id */
 	vf_data->dev_id = hisi_acc_vdev->vf_dev->device;
 
@@ -496,12 +524,12 @@  static int vf_qm_read_data(struct hisi_qm *vf_qm, struct acc_vf_data *vf_data)
 		return -EINVAL;
 
 	/* Every reg is 32 bit, the dma address is 64 bit. */
-	vf_data->eqe_dma = vf_data->qm_eqc_dw[1];
+	vf_data->eqe_dma = vf_data->qm_eqc_dw[QM_XQC_ADDR_HIGH];
 	vf_data->eqe_dma <<= QM_XQC_ADDR_OFFSET;
-	vf_data->eqe_dma |= vf_data->qm_eqc_dw[0];
-	vf_data->aeqe_dma = vf_data->qm_aeqc_dw[1];
+	vf_data->eqe_dma |= vf_data->qm_eqc_dw[QM_XQC_ADDR_LOW];
+	vf_data->aeqe_dma = vf_data->qm_aeqc_dw[QM_XQC_ADDR_HIGH];
 	vf_data->aeqe_dma <<= QM_XQC_ADDR_OFFSET;
-	vf_data->aeqe_dma |= vf_data->qm_aeqc_dw[0];
+	vf_data->aeqe_dma |= vf_data->qm_aeqc_dw[QM_XQC_ADDR_LOW];
 
 	/* Through SQC_BT/CQC_BT to get sqc and cqc address */
 	ret = qm_get_sqc(vf_qm, &vf_data->sqc_dma);
diff --git a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.h b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.h
index 245d7537b2bc..91002ceeebc1 100644
--- a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.h
+++ b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.h
@@ -39,6 +39,9 @@ 
 #define QM_REG_ADDR_OFFSET	0x0004
 
 #define QM_XQC_ADDR_OFFSET	32U
+#define QM_XQC_ADDR_LOW	0x1
+#define QM_XQC_ADDR_HIGH	0x2
+
 #define QM_VF_AEQ_INT_MASK	0x0004
 #define QM_VF_EQ_INT_MASK	0x000c
 #define QM_IFC_INT_SOURCE_V	0x0020
@@ -50,10 +53,15 @@ 
 #define QM_EQC_DW0		0X8000
 #define QM_AEQC_DW0		0X8020
 
+#define ACC_DRV_MAJOR_VER 1
+#define ACC_DRV_MINOR_VER 0
+
+#define ACC_DEV_MAGIC_V1	0XCDCDCDCDFEEDAACC
+#define ACC_DEV_MAGIC_V2	0xAACCFEEDDECADEDE
+
 struct acc_vf_data {
 #define QM_MATCH_SIZE offsetofend(struct acc_vf_data, qm_rsv_state)
 	/* QM match information */
-#define ACC_DEV_MAGIC	0XCDCDCDCDFEEDAACC
 	u64 acc_magic;
 	u32 qp_num;
 	u32 dev_id;
@@ -61,7 +69,9 @@  struct acc_vf_data {
 	u32 qp_base;
 	u32 vf_qm_state;
 	/* QM reserved match information */
-	u32 qm_rsv_state[3];
+	u16 major_ver;
+	u16 minor_ver;
+	u32 qm_rsv_state[2];
 
 	/* QM RW regs */
 	u32 aeq_int_mask;