Message ID | 20220303230131.2103-6-shameerali.kolothum.thodi@huawei.com (mailing list archive) |
---|---|
State | Not Applicable |
Delegated to: | Herbert Xu |
Headers | show |
Series | vfio/hisilicon: add ACC live migration driver | expand |
On 2022/3/4 7:01, Shameer Kolothum wrote: > HiSilicon ACC VF device BAR2 region consists of both functional > register space and migration control register space. From a > security point of view, it's not advisable to export the migration > control region to Guest. > > Hence, introduce a separate struct vfio_device_ops for migration > support which will override the ioctl/read/write/mmap methods to > hide the migration region and limit the access only to the > functional register space. > > This will be used in subsequent patches when we add migration > support to the driver. > > Signed-off-by: Shameer Kolothum <shameerali.kolothum.thodi@huawei.com> Reviewed-by: Longfang Liu <liulongfang@huawei.com> Thanks, Longfang > --- > .../vfio/pci/hisilicon/hisi_acc_vfio_pci.c | 126 ++++++++++++++++++ > 1 file changed, 126 insertions(+) > > diff --git a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c > index 8129c3457b3b..582ee4fa4109 100644 > --- a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c > +++ b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c > @@ -13,6 +13,119 @@ > #include <linux/vfio.h> > #include <linux/vfio_pci_core.h> > > +static int hisi_acc_pci_rw_access_check(struct vfio_device *core_vdev, > + size_t count, loff_t *ppos, > + size_t *new_count) > +{ > + unsigned int index = VFIO_PCI_OFFSET_TO_INDEX(*ppos); > + struct vfio_pci_core_device *vdev = > + container_of(core_vdev, struct vfio_pci_core_device, vdev); > + > + if (index == VFIO_PCI_BAR2_REGION_INDEX) { > + loff_t pos = *ppos & VFIO_PCI_OFFSET_MASK; > + resource_size_t end = pci_resource_len(vdev->pdev, index) / 2; > + > + /* Check if access is for migration control region */ > + if (pos >= end) > + return -EINVAL; > + > + *new_count = min(count, (size_t)(end - pos)); > + } > + > + return 0; > +} > + > +static int hisi_acc_vfio_pci_mmap(struct vfio_device *core_vdev, > + struct vm_area_struct *vma) > +{ > + struct vfio_pci_core_device *vdev = > + container_of(core_vdev, struct vfio_pci_core_device, vdev); > + unsigned int index; > + > + index = vma->vm_pgoff >> (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT); > + if (index == VFIO_PCI_BAR2_REGION_INDEX) { > + u64 req_len, pgoff, req_start; > + resource_size_t end = pci_resource_len(vdev->pdev, index) / 2; > + > + req_len = vma->vm_end - vma->vm_start; > + pgoff = vma->vm_pgoff & > + ((1U << (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT)) - 1); > + req_start = pgoff << PAGE_SHIFT; > + > + if (req_start + req_len > end) > + return -EINVAL; > + } > + > + return vfio_pci_core_mmap(core_vdev, vma); > +} > + > +static ssize_t hisi_acc_vfio_pci_write(struct vfio_device *core_vdev, > + const char __user *buf, size_t count, > + loff_t *ppos) > +{ > + size_t new_count = count; > + int ret; > + > + ret = hisi_acc_pci_rw_access_check(core_vdev, count, ppos, &new_count); > + if (ret) > + return ret; > + > + return vfio_pci_core_write(core_vdev, buf, new_count, ppos); > +} > + > +static ssize_t hisi_acc_vfio_pci_read(struct vfio_device *core_vdev, > + char __user *buf, size_t count, > + loff_t *ppos) > +{ > + size_t new_count = count; > + int ret; > + > + ret = hisi_acc_pci_rw_access_check(core_vdev, count, ppos, &new_count); > + if (ret) > + return ret; > + > + return vfio_pci_core_read(core_vdev, buf, new_count, ppos); > +} > + > +static long hisi_acc_vfio_pci_ioctl(struct vfio_device *core_vdev, unsigned int cmd, > + unsigned long arg) > +{ > + if (cmd == VFIO_DEVICE_GET_REGION_INFO) { > + struct vfio_pci_core_device *vdev = > + container_of(core_vdev, struct vfio_pci_core_device, vdev); > + struct pci_dev *pdev = vdev->pdev; > + struct vfio_region_info info; > + unsigned long minsz; > + > + minsz = offsetofend(struct vfio_region_info, offset); > + > + if (copy_from_user(&info, (void __user *)arg, minsz)) > + return -EFAULT; > + > + if (info.argsz < minsz) > + return -EINVAL; > + > + if (info.index == VFIO_PCI_BAR2_REGION_INDEX) { > + info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index); > + > + /* > + * ACC VF dev BAR2 region consists of both functional > + * register space and migration control register space. > + * Report only the functional region to Guest. > + */ > + info.size = pci_resource_len(pdev, info.index) / 2; > + > + info.flags = VFIO_REGION_INFO_FLAG_READ | > + VFIO_REGION_INFO_FLAG_WRITE | > + VFIO_REGION_INFO_FLAG_MMAP; > + > + return copy_to_user((void __user *)arg, &info, minsz) ? > + -EFAULT : 0; > + } > + } > + return vfio_pci_core_ioctl(core_vdev, cmd, arg); > +} > + > static int hisi_acc_vfio_pci_open_device(struct vfio_device *core_vdev) > { > struct vfio_pci_core_device *vdev = > @@ -28,6 +141,19 @@ static int hisi_acc_vfio_pci_open_device(struct vfio_device *core_vdev) > return 0; > } > > +static const struct vfio_device_ops hisi_acc_vfio_pci_migrn_ops = { > + .name = "hisi-acc-vfio-pci-migration", > + .open_device = hisi_acc_vfio_pci_open_device, > + .close_device = vfio_pci_core_close_device, > + .ioctl = hisi_acc_vfio_pci_ioctl, > + .device_feature = vfio_pci_core_ioctl_feature, > + .read = hisi_acc_vfio_pci_read, > + .write = hisi_acc_vfio_pci_write, > + .mmap = hisi_acc_vfio_pci_mmap, > + .request = vfio_pci_core_request, > + .match = vfio_pci_core_match, > +}; > + > static const struct vfio_device_ops hisi_acc_vfio_pci_ops = { > .name = "hisi-acc-vfio-pci", > .open_device = hisi_acc_vfio_pci_open_device, >
Hi, Shameer, > From: Shameer Kolothum <shameerali.kolothum.thodi@huawei.com> > Sent: Friday, March 4, 2022 7:01 AM > > HiSilicon ACC VF device BAR2 region consists of both functional > register space and migration control register space. From a > security point of view, it's not advisable to export the migration > control region to Guest. > > Hence, introduce a separate struct vfio_device_ops for migration > support which will override the ioctl/read/write/mmap methods to > hide the migration region and limit the access only to the > functional register space. > > This will be used in subsequent patches when we add migration > support to the driver. As a security concern the migration control region should be always disabled regardless of whether migration support is added to the driver for such device... It sounds like we should first fix this security hole for acc device assignment and then add the migration support atop (at least organize the series in this way). Thanks Kevin
Hi Kevin, > -----Original Message----- > From: Tian, Kevin [mailto:kevin.tian@intel.com] > Sent: 08 March 2022 06:23 > To: Shameerali Kolothum Thodi <shameerali.kolothum.thodi@huawei.com>; > kvm@vger.kernel.org; linux-kernel@vger.kernel.org; > linux-crypto@vger.kernel.org > Cc: linux-pci@vger.kernel.org; alex.williamson@redhat.com; jgg@nvidia.com; > cohuck@redhat.com; mgurtovoy@nvidia.com; yishaih@nvidia.com; Linuxarm > <linuxarm@huawei.com>; liulongfang <liulongfang@huawei.com>; Zengtao (B) > <prime.zeng@hisilicon.com>; Jonathan Cameron > <jonathan.cameron@huawei.com>; Wangzhou (B) <wangzhou1@hisilicon.com> > Subject: RE: [PATCH v8 5/9] hisi_acc_vfio_pci: Restrict access to VF dev BAR2 > migration region > > Hi, Shameer, > > > From: Shameer Kolothum <shameerali.kolothum.thodi@huawei.com> > > Sent: Friday, March 4, 2022 7:01 AM > > > > HiSilicon ACC VF device BAR2 region consists of both functional > > register space and migration control register space. From a > > security point of view, it's not advisable to export the migration > > control region to Guest. > > > > Hence, introduce a separate struct vfio_device_ops for migration > > support which will override the ioctl/read/write/mmap methods to > > hide the migration region and limit the access only to the > > functional register space. > > > > This will be used in subsequent patches when we add migration > > support to the driver. > > As a security concern the migration control region should be always > disabled regardless of whether migration support is added to the > driver for such device... It sounds like we should first fix this security > hole for acc device assignment and then add the migration support > atop (at least organize the series in this way). By exposing the migration BAR region, there is a possibility that a malicious Guest can prevent migration from happening by manipulating the migration BAR region. I don't think there are any other security concerns now especially since we only support the STOP_COPY state. And the approach has been that we only restrict this if migration support is enabled. I think I can change the above "security concern" description to "malicious Guest can prevent migration" to make it more clear. Hope this is fine. Thanks, Shameer
> From: Shameerali Kolothum Thodi > <shameerali.kolothum.thodi@huawei.com> > Sent: Tuesday, March 8, 2022 4:33 PM > > Hi Kevin, > > > -----Original Message----- > > From: Tian, Kevin [mailto:kevin.tian@intel.com] > > Sent: 08 March 2022 06:23 > > To: Shameerali Kolothum Thodi > <shameerali.kolothum.thodi@huawei.com>; > > kvm@vger.kernel.org; linux-kernel@vger.kernel.org; > > linux-crypto@vger.kernel.org > > Cc: linux-pci@vger.kernel.org; alex.williamson@redhat.com; > jgg@nvidia.com; > > cohuck@redhat.com; mgurtovoy@nvidia.com; yishaih@nvidia.com; > Linuxarm > > <linuxarm@huawei.com>; liulongfang <liulongfang@huawei.com>; > Zengtao (B) > > <prime.zeng@hisilicon.com>; Jonathan Cameron > > <jonathan.cameron@huawei.com>; Wangzhou (B) > <wangzhou1@hisilicon.com> > > Subject: RE: [PATCH v8 5/9] hisi_acc_vfio_pci: Restrict access to VF dev > BAR2 > > migration region > > > > Hi, Shameer, > > > > > From: Shameer Kolothum <shameerali.kolothum.thodi@huawei.com> > > > Sent: Friday, March 4, 2022 7:01 AM > > > > > > HiSilicon ACC VF device BAR2 region consists of both functional > > > register space and migration control register space. From a > > > security point of view, it's not advisable to export the migration > > > control region to Guest. > > > > > > Hence, introduce a separate struct vfio_device_ops for migration > > > support which will override the ioctl/read/write/mmap methods to > > > hide the migration region and limit the access only to the > > > functional register space. > > > > > > This will be used in subsequent patches when we add migration > > > support to the driver. > > > > As a security concern the migration control region should be always > > disabled regardless of whether migration support is added to the > > driver for such device... It sounds like we should first fix this security > > hole for acc device assignment and then add the migration support > > atop (at least organize the series in this way). > > By exposing the migration BAR region, there is a possibility that a malicious > Guest can prevent migration from happening by manipulating the migration > BAR region. I don't think there are any other security concerns now especially > since we only support the STOP_COPY state. And the approach has been > that > we only restrict this if migration support is enabled. I think I can change the > above "security concern" description to "malicious Guest can prevent > migration" > to make it more clear. > In concept migrated device state may include both the state directly touched by the guest driver and also the one that is configured by the PF driver. Unless there is guarantee that the state managed via the migration control interface only touches the former (which implies the latter managed via the PF driver) this security concern will hold even for normal device assignment. If the acc device has such guarantee it's worth of a clarification here. Thanks Kevin
> -----Original Message----- > From: Tian, Kevin [mailto:kevin.tian@intel.com] > Sent: 08 March 2022 10:09 > To: Shameerali Kolothum Thodi <shameerali.kolothum.thodi@huawei.com>; > kvm@vger.kernel.org; linux-kernel@vger.kernel.org; > linux-crypto@vger.kernel.org > Cc: linux-pci@vger.kernel.org; alex.williamson@redhat.com; jgg@nvidia.com; > cohuck@redhat.com; mgurtovoy@nvidia.com; yishaih@nvidia.com; Linuxarm > <linuxarm@huawei.com>; liulongfang <liulongfang@huawei.com>; Zengtao (B) > <prime.zeng@hisilicon.com>; Jonathan Cameron > <jonathan.cameron@huawei.com>; Wangzhou (B) <wangzhou1@hisilicon.com> > Subject: RE: [PATCH v8 5/9] hisi_acc_vfio_pci: Restrict access to VF dev BAR2 > migration region > > > From: Shameerali Kolothum Thodi > > <shameerali.kolothum.thodi@huawei.com> > > Sent: Tuesday, March 8, 2022 4:33 PM > > > > Hi Kevin, > > > > > -----Original Message----- > > > From: Tian, Kevin [mailto:kevin.tian@intel.com] > > > Sent: 08 March 2022 06:23 > > > To: Shameerali Kolothum Thodi > > <shameerali.kolothum.thodi@huawei.com>; > > > kvm@vger.kernel.org; linux-kernel@vger.kernel.org; > > > linux-crypto@vger.kernel.org > > > Cc: linux-pci@vger.kernel.org; alex.williamson@redhat.com; > > jgg@nvidia.com; > > > cohuck@redhat.com; mgurtovoy@nvidia.com; yishaih@nvidia.com; > > Linuxarm > > > <linuxarm@huawei.com>; liulongfang <liulongfang@huawei.com>; > > Zengtao (B) > > > <prime.zeng@hisilicon.com>; Jonathan Cameron > > > <jonathan.cameron@huawei.com>; Wangzhou (B) > > <wangzhou1@hisilicon.com> > > > Subject: RE: [PATCH v8 5/9] hisi_acc_vfio_pci: Restrict access to VF dev > > BAR2 > > > migration region > > > > > > Hi, Shameer, > > > > > > > From: Shameer Kolothum <shameerali.kolothum.thodi@huawei.com> > > > > Sent: Friday, March 4, 2022 7:01 AM > > > > > > > > HiSilicon ACC VF device BAR2 region consists of both functional > > > > register space and migration control register space. From a > > > > security point of view, it's not advisable to export the migration > > > > control region to Guest. > > > > > > > > Hence, introduce a separate struct vfio_device_ops for migration > > > > support which will override the ioctl/read/write/mmap methods to > > > > hide the migration region and limit the access only to the > > > > functional register space. > > > > > > > > This will be used in subsequent patches when we add migration > > > > support to the driver. > > > > > > As a security concern the migration control region should be always > > > disabled regardless of whether migration support is added to the > > > driver for such device... It sounds like we should first fix this security > > > hole for acc device assignment and then add the migration support > > > atop (at least organize the series in this way). > > > > By exposing the migration BAR region, there is a possibility that a malicious > > Guest can prevent migration from happening by manipulating the migration > > BAR region. I don't think there are any other security concerns now > especially > > since we only support the STOP_COPY state. And the approach has been > > that > > we only restrict this if migration support is enabled. I think I can change the > > above "security concern" description to "malicious Guest can prevent > > migration" > > to make it more clear. > > > > In concept migrated device state may include both the state directly > touched by the guest driver and also the one that is configured by > the PF driver. Unless there is guarantee that the state managed via > the migration control interface only touches the former (which implies > the latter managed via the PF driver) this security concern will hold > even for normal device assignment. > > If the acc device has such guarantee it's worth of a clarification here. I just double-checked with our ACC team and the VF migration region manipulations will not affect the PF configurations. I will add a clarification here to make it clear. Thanks, Shameer
diff --git a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c index 8129c3457b3b..582ee4fa4109 100644 --- a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c +++ b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c @@ -13,6 +13,119 @@ #include <linux/vfio.h> #include <linux/vfio_pci_core.h> +static int hisi_acc_pci_rw_access_check(struct vfio_device *core_vdev, + size_t count, loff_t *ppos, + size_t *new_count) +{ + unsigned int index = VFIO_PCI_OFFSET_TO_INDEX(*ppos); + struct vfio_pci_core_device *vdev = + container_of(core_vdev, struct vfio_pci_core_device, vdev); + + if (index == VFIO_PCI_BAR2_REGION_INDEX) { + loff_t pos = *ppos & VFIO_PCI_OFFSET_MASK; + resource_size_t end = pci_resource_len(vdev->pdev, index) / 2; + + /* Check if access is for migration control region */ + if (pos >= end) + return -EINVAL; + + *new_count = min(count, (size_t)(end - pos)); + } + + return 0; +} + +static int hisi_acc_vfio_pci_mmap(struct vfio_device *core_vdev, + struct vm_area_struct *vma) +{ + struct vfio_pci_core_device *vdev = + container_of(core_vdev, struct vfio_pci_core_device, vdev); + unsigned int index; + + index = vma->vm_pgoff >> (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT); + if (index == VFIO_PCI_BAR2_REGION_INDEX) { + u64 req_len, pgoff, req_start; + resource_size_t end = pci_resource_len(vdev->pdev, index) / 2; + + req_len = vma->vm_end - vma->vm_start; + pgoff = vma->vm_pgoff & + ((1U << (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT)) - 1); + req_start = pgoff << PAGE_SHIFT; + + if (req_start + req_len > end) + return -EINVAL; + } + + return vfio_pci_core_mmap(core_vdev, vma); +} + +static ssize_t hisi_acc_vfio_pci_write(struct vfio_device *core_vdev, + const char __user *buf, size_t count, + loff_t *ppos) +{ + size_t new_count = count; + int ret; + + ret = hisi_acc_pci_rw_access_check(core_vdev, count, ppos, &new_count); + if (ret) + return ret; + + return vfio_pci_core_write(core_vdev, buf, new_count, ppos); +} + +static ssize_t hisi_acc_vfio_pci_read(struct vfio_device *core_vdev, + char __user *buf, size_t count, + loff_t *ppos) +{ + size_t new_count = count; + int ret; + + ret = hisi_acc_pci_rw_access_check(core_vdev, count, ppos, &new_count); + if (ret) + return ret; + + return vfio_pci_core_read(core_vdev, buf, new_count, ppos); +} + +static long hisi_acc_vfio_pci_ioctl(struct vfio_device *core_vdev, unsigned int cmd, + unsigned long arg) +{ + if (cmd == VFIO_DEVICE_GET_REGION_INFO) { + struct vfio_pci_core_device *vdev = + container_of(core_vdev, struct vfio_pci_core_device, vdev); + struct pci_dev *pdev = vdev->pdev; + struct vfio_region_info info; + unsigned long minsz; + + minsz = offsetofend(struct vfio_region_info, offset); + + if (copy_from_user(&info, (void __user *)arg, minsz)) + return -EFAULT; + + if (info.argsz < minsz) + return -EINVAL; + + if (info.index == VFIO_PCI_BAR2_REGION_INDEX) { + info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index); + + /* + * ACC VF dev BAR2 region consists of both functional + * register space and migration control register space. + * Report only the functional region to Guest. + */ + info.size = pci_resource_len(pdev, info.index) / 2; + + info.flags = VFIO_REGION_INFO_FLAG_READ | + VFIO_REGION_INFO_FLAG_WRITE | + VFIO_REGION_INFO_FLAG_MMAP; + + return copy_to_user((void __user *)arg, &info, minsz) ? + -EFAULT : 0; + } + } + return vfio_pci_core_ioctl(core_vdev, cmd, arg); +} + static int hisi_acc_vfio_pci_open_device(struct vfio_device *core_vdev) { struct vfio_pci_core_device *vdev = @@ -28,6 +141,19 @@ static int hisi_acc_vfio_pci_open_device(struct vfio_device *core_vdev) return 0; } +static const struct vfio_device_ops hisi_acc_vfio_pci_migrn_ops = { + .name = "hisi-acc-vfio-pci-migration", + .open_device = hisi_acc_vfio_pci_open_device, + .close_device = vfio_pci_core_close_device, + .ioctl = hisi_acc_vfio_pci_ioctl, + .device_feature = vfio_pci_core_ioctl_feature, + .read = hisi_acc_vfio_pci_read, + .write = hisi_acc_vfio_pci_write, + .mmap = hisi_acc_vfio_pci_mmap, + .request = vfio_pci_core_request, + .match = vfio_pci_core_match, +}; + static const struct vfio_device_ops hisi_acc_vfio_pci_ops = { .name = "hisi-acc-vfio-pci", .open_device = hisi_acc_vfio_pci_open_device,
HiSilicon ACC VF device BAR2 region consists of both functional register space and migration control register space. From a security point of view, it's not advisable to export the migration control region to Guest. Hence, introduce a separate struct vfio_device_ops for migration support which will override the ioctl/read/write/mmap methods to hide the migration region and limit the access only to the functional register space. This will be used in subsequent patches when we add migration support to the driver. Signed-off-by: Shameer Kolothum <shameerali.kolothum.thodi@huawei.com> --- .../vfio/pci/hisilicon/hisi_acc_vfio_pci.c | 126 ++++++++++++++++++ 1 file changed, 126 insertions(+)