Message ID | 20210315053721.189-8-xieyongji@bytedance.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Introduce VDUSE - vDPA Device in Userspace | expand |
在 2021/3/15 下午1:37, Xie Yongji 写道: > This patch introduces an attribute for vDPA device to indicate > whether virtual address can be used. If vDPA device driver set > it, vhost-vdpa bus driver will not pin user page and transfer > userspace virtual address instead of physical address during > DMA mapping. And corresponding vma->vm_file and offset will be > also passed as an opaque pointer. > > Suggested-by: Jason Wang <jasowang@redhat.com> > Signed-off-by: Xie Yongji <xieyongji@bytedance.com> > --- > drivers/vdpa/ifcvf/ifcvf_main.c | 2 +- > drivers/vdpa/mlx5/net/mlx5_vnet.c | 2 +- > drivers/vdpa/vdpa.c | 9 +++- > drivers/vdpa/vdpa_sim/vdpa_sim.c | 2 +- > drivers/vdpa/virtio_pci/vp_vdpa.c | 2 +- > drivers/vhost/vdpa.c | 104 +++++++++++++++++++++++++++++++------- > include/linux/vdpa.h | 19 +++++-- > 7 files changed, 113 insertions(+), 27 deletions(-) > > diff --git a/drivers/vdpa/ifcvf/ifcvf_main.c b/drivers/vdpa/ifcvf/ifcvf_main.c > index d555a6a5d1ba..aee013f3eb5f 100644 > --- a/drivers/vdpa/ifcvf/ifcvf_main.c > +++ b/drivers/vdpa/ifcvf/ifcvf_main.c > @@ -431,7 +431,7 @@ static int ifcvf_probe(struct pci_dev *pdev, const struct pci_device_id *id) > } > > adapter = vdpa_alloc_device(struct ifcvf_adapter, vdpa, > - dev, &ifc_vdpa_ops, NULL); > + dev, &ifc_vdpa_ops, NULL, false); > if (adapter == NULL) { > IFCVF_ERR(pdev, "Failed to allocate vDPA structure"); > return -ENOMEM; > diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c > index 71397fdafa6a..fb62ebcf464a 100644 > --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c > +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c > @@ -1982,7 +1982,7 @@ static int mlx5v_probe(struct auxiliary_device *adev, > max_vqs = min_t(u32, max_vqs, MLX5_MAX_SUPPORTED_VQS); > > ndev = vdpa_alloc_device(struct mlx5_vdpa_net, mvdev.vdev, mdev->device, &mlx5_vdpa_ops, > - NULL); > + NULL, false); > if (IS_ERR(ndev)) > return PTR_ERR(ndev); > > diff --git a/drivers/vdpa/vdpa.c b/drivers/vdpa/vdpa.c > index 5cffce67cab0..97fbac276c72 100644 > --- a/drivers/vdpa/vdpa.c > +++ b/drivers/vdpa/vdpa.c > @@ -71,6 +71,7 @@ static void vdpa_release_dev(struct device *d) > * @config: the bus operations that is supported by this device > * @size: size of the parent structure that contains private data > * @name: name of the vdpa device; optional. > + * @use_va: indicate whether virtual address must be used by this device > * > * Driver should use vdpa_alloc_device() wrapper macro instead of > * using this directly. > @@ -80,7 +81,8 @@ static void vdpa_release_dev(struct device *d) > */ > struct vdpa_device *__vdpa_alloc_device(struct device *parent, > const struct vdpa_config_ops *config, > - size_t size, const char *name) > + size_t size, const char *name, > + bool use_va) > { > struct vdpa_device *vdev; > int err = -EINVAL; > @@ -91,6 +93,10 @@ struct vdpa_device *__vdpa_alloc_device(struct device *parent, > if (!!config->dma_map != !!config->dma_unmap) > goto err; > > + /* It should only work for the device that use on-chip IOMMU */ > + if (use_va && !(config->dma_map || config->set_map)) > + goto err; > + > err = -ENOMEM; > vdev = kzalloc(size, GFP_KERNEL); > if (!vdev) > @@ -106,6 +112,7 @@ struct vdpa_device *__vdpa_alloc_device(struct device *parent, > vdev->index = err; > vdev->config = config; > vdev->features_valid = false; > + vdev->use_va = use_va; > > if (name) > err = dev_set_name(&vdev->dev, "%s", name); > diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.c b/drivers/vdpa/vdpa_sim/vdpa_sim.c > index ff331f088baf..d26334e9a412 100644 > --- a/drivers/vdpa/vdpa_sim/vdpa_sim.c > +++ b/drivers/vdpa/vdpa_sim/vdpa_sim.c > @@ -235,7 +235,7 @@ struct vdpasim *vdpasim_create(struct vdpasim_dev_attr *dev_attr) > ops = &vdpasim_config_ops; > > vdpasim = vdpa_alloc_device(struct vdpasim, vdpa, NULL, ops, > - dev_attr->name); > + dev_attr->name, false); > if (!vdpasim) > goto err_alloc; > > diff --git a/drivers/vdpa/virtio_pci/vp_vdpa.c b/drivers/vdpa/virtio_pci/vp_vdpa.c > index 1321a2fcd088..03b36aed48d6 100644 > --- a/drivers/vdpa/virtio_pci/vp_vdpa.c > +++ b/drivers/vdpa/virtio_pci/vp_vdpa.c > @@ -377,7 +377,7 @@ static int vp_vdpa_probe(struct pci_dev *pdev, const struct pci_device_id *id) > return ret; > > vp_vdpa = vdpa_alloc_device(struct vp_vdpa, vdpa, > - dev, &vp_vdpa_ops, NULL); > + dev, &vp_vdpa_ops, NULL, false); > if (vp_vdpa == NULL) { > dev_err(dev, "vp_vdpa: Failed to allocate vDPA structure\n"); > return -ENOMEM; > diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c > index 7c83fbf3edac..b65c21ae98d1 100644 > --- a/drivers/vhost/vdpa.c > +++ b/drivers/vhost/vdpa.c > @@ -480,21 +480,30 @@ static long vhost_vdpa_unlocked_ioctl(struct file *filep, > static void vhost_vdpa_iotlb_unmap(struct vhost_vdpa *v, u64 start, u64 last) > { > struct vhost_dev *dev = &v->vdev; > + struct vdpa_device *vdpa = v->vdpa; > struct vhost_iotlb *iotlb = dev->iotlb; > struct vhost_iotlb_map *map; > + struct vdpa_map_file *map_file; > struct page *page; > unsigned long pfn, pinned; > > while ((map = vhost_iotlb_itree_first(iotlb, start, last)) != NULL) { > - pinned = map->size >> PAGE_SHIFT; > - for (pfn = map->addr >> PAGE_SHIFT; > - pinned > 0; pfn++, pinned--) { > - page = pfn_to_page(pfn); > - if (map->perm & VHOST_ACCESS_WO) > - set_page_dirty_lock(page); > - unpin_user_page(page); > + if (!vdpa->use_va) { > + pinned = map->size >> PAGE_SHIFT; > + for (pfn = map->addr >> PAGE_SHIFT; > + pinned > 0; pfn++, pinned--) { > + page = pfn_to_page(pfn); > + if (map->perm & VHOST_ACCESS_WO) > + set_page_dirty_lock(page); > + unpin_user_page(page); > + } > + atomic64_sub(map->size >> PAGE_SHIFT, > + &dev->mm->pinned_vm); > + } else { > + map_file = (struct vdpa_map_file *)map->opaque; > + fput(map_file->file); > + kfree(map_file); Let's factor out the logic of pa and va separatedly here. Other looks good to me. Thanks > } > - atomic64_sub(map->size >> PAGE_SHIFT, &dev->mm->pinned_vm); > vhost_iotlb_map_free(iotlb, map); > } > } > @@ -530,21 +539,21 @@ static int perm_to_iommu_flags(u32 perm) > return flags | IOMMU_CACHE; > } > > -static int vhost_vdpa_map(struct vhost_vdpa *v, > - u64 iova, u64 size, u64 pa, u32 perm) > +static int vhost_vdpa_map(struct vhost_vdpa *v, u64 iova, > + u64 size, u64 pa, u32 perm, void *opaque) > { > struct vhost_dev *dev = &v->vdev; > struct vdpa_device *vdpa = v->vdpa; > const struct vdpa_config_ops *ops = vdpa->config; > int r = 0; > > - r = vhost_iotlb_add_range(dev->iotlb, iova, iova + size - 1, > - pa, perm); > + r = vhost_iotlb_add_range_ctx(dev->iotlb, iova, iova + size - 1, > + pa, perm, opaque); > if (r) > return r; > > if (ops->dma_map) { > - r = ops->dma_map(vdpa, iova, size, pa, perm, NULL); > + r = ops->dma_map(vdpa, iova, size, pa, perm, opaque); > } else if (ops->set_map) { > if (!v->in_batch) > r = ops->set_map(vdpa, dev->iotlb); > @@ -552,13 +561,15 @@ static int vhost_vdpa_map(struct vhost_vdpa *v, > r = iommu_map(v->domain, iova, pa, size, > perm_to_iommu_flags(perm)); > } > - > - if (r) > + if (r) { > vhost_iotlb_del_range(dev->iotlb, iova, iova + size - 1); > - else > + return r; > + } > + > + if (!vdpa->use_va) > atomic64_add(size >> PAGE_SHIFT, &dev->mm->pinned_vm); > > - return r; > + return 0; > } > > static void vhost_vdpa_unmap(struct vhost_vdpa *v, u64 iova, u64 size) > @@ -579,6 +590,56 @@ static void vhost_vdpa_unmap(struct vhost_vdpa *v, u64 iova, u64 size) > } > } > > +static int vhost_vdpa_va_map(struct vhost_vdpa *v, > + u64 iova, u64 size, u64 uaddr, u32 perm) > +{ > + struct vhost_dev *dev = &v->vdev; > + u64 offset, map_size, map_iova = iova; > + struct vdpa_map_file *map_file; > + struct vm_area_struct *vma; > + int ret; > + > + mmap_read_lock(dev->mm); > + > + while (size) { > + vma = find_vma(dev->mm, uaddr); > + if (!vma) { > + ret = -EINVAL; > + break; > + } > + map_size = min(size, vma->vm_end - uaddr); > + if (!(vma->vm_file && (vma->vm_flags & VM_SHARED) && > + !(vma->vm_flags & (VM_IO | VM_PFNMAP)))) > + goto next; > + > + map_file = kzalloc(sizeof(*map_file), GFP_KERNEL); > + if (!map_file) { > + ret = -ENOMEM; > + break; > + } > + offset = (vma->vm_pgoff << PAGE_SHIFT) + uaddr - vma->vm_start; > + map_file->offset = offset; > + map_file->file = get_file(vma->vm_file); > + ret = vhost_vdpa_map(v, map_iova, map_size, uaddr, > + perm, map_file); > + if (ret) { > + fput(map_file->file); > + kfree(map_file); > + break; > + } > +next: > + size -= map_size; > + uaddr += map_size; > + map_iova += map_size; > + } > + if (ret) > + vhost_vdpa_unmap(v, iova, map_iova - iova); > + > + mmap_read_unlock(dev->mm); > + > + return ret; > +} > + > static int vhost_vdpa_pa_map(struct vhost_vdpa *v, > u64 iova, u64 size, u64 uaddr, u32 perm) > { > @@ -645,7 +706,7 @@ static int vhost_vdpa_pa_map(struct vhost_vdpa *v, > csize = (last_pfn - map_pfn + 1) << PAGE_SHIFT; > ret = vhost_vdpa_map(v, iova, csize, > map_pfn << PAGE_SHIFT, > - perm); > + perm, NULL); > if (ret) { > /* > * Unpin the pages that are left unmapped > @@ -674,7 +735,7 @@ static int vhost_vdpa_pa_map(struct vhost_vdpa *v, > > /* Pin the rest chunk */ > ret = vhost_vdpa_map(v, iova, (last_pfn - map_pfn + 1) << PAGE_SHIFT, > - map_pfn << PAGE_SHIFT, perm); > + map_pfn << PAGE_SHIFT, perm, NULL); > out: > if (ret) { > if (nchunks) { > @@ -707,6 +768,7 @@ static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v, > struct vhost_iotlb_msg *msg) > { > struct vhost_dev *dev = &v->vdev; > + struct vdpa_device *vdpa = v->vdpa; > struct vhost_iotlb *iotlb = dev->iotlb; > > if (msg->iova < v->range.first || > @@ -717,6 +779,10 @@ static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v, > msg->iova + msg->size - 1)) > return -EEXIST; > > + if (vdpa->use_va) > + return vhost_vdpa_va_map(v, msg->iova, msg->size, > + msg->uaddr, msg->perm); > + > return vhost_vdpa_pa_map(v, msg->iova, msg->size, msg->uaddr, > msg->perm); > } > diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h > index b01f7c9096bf..e67404e4b23e 100644 > --- a/include/linux/vdpa.h > +++ b/include/linux/vdpa.h > @@ -44,6 +44,7 @@ struct vdpa_mgmt_dev; > * @config: the configuration ops for this device. > * @index: device index > * @features_valid: were features initialized? for legacy guests > + * @use_va: indicate whether virtual address must be used by this device > * @nvqs: maximum number of supported virtqueues > * @mdev: management device pointer; caller must setup when registering device as part > * of dev_add() mgmtdev ops callback before invoking _vdpa_register_device(). > @@ -54,6 +55,7 @@ struct vdpa_device { > const struct vdpa_config_ops *config; > unsigned int index; > bool features_valid; > + bool use_va; > int nvqs; > struct vdpa_mgmt_dev *mdev; > }; > @@ -69,6 +71,16 @@ struct vdpa_iova_range { > }; > > /** > + * Corresponding file area for device memory mapping > + * @file: vma->vm_file for the mapping > + * @offset: mapping offset in the vm_file > + */ > +struct vdpa_map_file { > + struct file *file; > + u64 offset; > +}; > + > +/** > * vDPA_config_ops - operations for configuring a vDPA device. > * Note: vDPA device drivers are required to implement all of the > * operations unless it is mentioned to be optional in the following > @@ -250,14 +262,15 @@ struct vdpa_config_ops { > > struct vdpa_device *__vdpa_alloc_device(struct device *parent, > const struct vdpa_config_ops *config, > - size_t size, const char *name); > + size_t size, const char *name, > + bool use_va); > > -#define vdpa_alloc_device(dev_struct, member, parent, config, name) \ > +#define vdpa_alloc_device(dev_struct, member, parent, config, name, use_va) \ > container_of(__vdpa_alloc_device( \ > parent, config, \ > sizeof(dev_struct) + \ > BUILD_BUG_ON_ZERO(offsetof( \ > - dev_struct, member)), name), \ > + dev_struct, member)), name, use_va), \ > dev_struct, member) > > int vdpa_register_device(struct vdpa_device *vdev, int nvqs);
On Tue, Mar 23, 2021 at 11:13 AM Jason Wang <jasowang@redhat.com> wrote: > > > 在 2021/3/15 下午1:37, Xie Yongji 写道: > > This patch introduces an attribute for vDPA device to indicate > > whether virtual address can be used. If vDPA device driver set > > it, vhost-vdpa bus driver will not pin user page and transfer > > userspace virtual address instead of physical address during > > DMA mapping. And corresponding vma->vm_file and offset will be > > also passed as an opaque pointer. > > > > Suggested-by: Jason Wang <jasowang@redhat.com> > > Signed-off-by: Xie Yongji <xieyongji@bytedance.com> > > --- > > drivers/vdpa/ifcvf/ifcvf_main.c | 2 +- > > drivers/vdpa/mlx5/net/mlx5_vnet.c | 2 +- > > drivers/vdpa/vdpa.c | 9 +++- > > drivers/vdpa/vdpa_sim/vdpa_sim.c | 2 +- > > drivers/vdpa/virtio_pci/vp_vdpa.c | 2 +- > > drivers/vhost/vdpa.c | 104 +++++++++++++++++++++++++++++++------- > > include/linux/vdpa.h | 19 +++++-- > > 7 files changed, 113 insertions(+), 27 deletions(-) > > > > diff --git a/drivers/vdpa/ifcvf/ifcvf_main.c b/drivers/vdpa/ifcvf/ifcvf_main.c > > index d555a6a5d1ba..aee013f3eb5f 100644 > > --- a/drivers/vdpa/ifcvf/ifcvf_main.c > > +++ b/drivers/vdpa/ifcvf/ifcvf_main.c > > @@ -431,7 +431,7 @@ static int ifcvf_probe(struct pci_dev *pdev, const struct pci_device_id *id) > > } > > > > adapter = vdpa_alloc_device(struct ifcvf_adapter, vdpa, > > - dev, &ifc_vdpa_ops, NULL); > > + dev, &ifc_vdpa_ops, NULL, false); > > if (adapter == NULL) { > > IFCVF_ERR(pdev, "Failed to allocate vDPA structure"); > > return -ENOMEM; > > diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c > > index 71397fdafa6a..fb62ebcf464a 100644 > > --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c > > +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c > > @@ -1982,7 +1982,7 @@ static int mlx5v_probe(struct auxiliary_device *adev, > > max_vqs = min_t(u32, max_vqs, MLX5_MAX_SUPPORTED_VQS); > > > > ndev = vdpa_alloc_device(struct mlx5_vdpa_net, mvdev.vdev, mdev->device, &mlx5_vdpa_ops, > > - NULL); > > + NULL, false); > > if (IS_ERR(ndev)) > > return PTR_ERR(ndev); > > > > diff --git a/drivers/vdpa/vdpa.c b/drivers/vdpa/vdpa.c > > index 5cffce67cab0..97fbac276c72 100644 > > --- a/drivers/vdpa/vdpa.c > > +++ b/drivers/vdpa/vdpa.c > > @@ -71,6 +71,7 @@ static void vdpa_release_dev(struct device *d) > > * @config: the bus operations that is supported by this device > > * @size: size of the parent structure that contains private data > > * @name: name of the vdpa device; optional. > > + * @use_va: indicate whether virtual address must be used by this device > > * > > * Driver should use vdpa_alloc_device() wrapper macro instead of > > * using this directly. > > @@ -80,7 +81,8 @@ static void vdpa_release_dev(struct device *d) > > */ > > struct vdpa_device *__vdpa_alloc_device(struct device *parent, > > const struct vdpa_config_ops *config, > > - size_t size, const char *name) > > + size_t size, const char *name, > > + bool use_va) > > { > > struct vdpa_device *vdev; > > int err = -EINVAL; > > @@ -91,6 +93,10 @@ struct vdpa_device *__vdpa_alloc_device(struct device *parent, > > if (!!config->dma_map != !!config->dma_unmap) > > goto err; > > > > + /* It should only work for the device that use on-chip IOMMU */ > > + if (use_va && !(config->dma_map || config->set_map)) > > + goto err; > > + > > err = -ENOMEM; > > vdev = kzalloc(size, GFP_KERNEL); > > if (!vdev) > > @@ -106,6 +112,7 @@ struct vdpa_device *__vdpa_alloc_device(struct device *parent, > > vdev->index = err; > > vdev->config = config; > > vdev->features_valid = false; > > + vdev->use_va = use_va; > > > > if (name) > > err = dev_set_name(&vdev->dev, "%s", name); > > diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.c b/drivers/vdpa/vdpa_sim/vdpa_sim.c > > index ff331f088baf..d26334e9a412 100644 > > --- a/drivers/vdpa/vdpa_sim/vdpa_sim.c > > +++ b/drivers/vdpa/vdpa_sim/vdpa_sim.c > > @@ -235,7 +235,7 @@ struct vdpasim *vdpasim_create(struct vdpasim_dev_attr *dev_attr) > > ops = &vdpasim_config_ops; > > > > vdpasim = vdpa_alloc_device(struct vdpasim, vdpa, NULL, ops, > > - dev_attr->name); > > + dev_attr->name, false); > > if (!vdpasim) > > goto err_alloc; > > > > diff --git a/drivers/vdpa/virtio_pci/vp_vdpa.c b/drivers/vdpa/virtio_pci/vp_vdpa.c > > index 1321a2fcd088..03b36aed48d6 100644 > > --- a/drivers/vdpa/virtio_pci/vp_vdpa.c > > +++ b/drivers/vdpa/virtio_pci/vp_vdpa.c > > @@ -377,7 +377,7 @@ static int vp_vdpa_probe(struct pci_dev *pdev, const struct pci_device_id *id) > > return ret; > > > > vp_vdpa = vdpa_alloc_device(struct vp_vdpa, vdpa, > > - dev, &vp_vdpa_ops, NULL); > > + dev, &vp_vdpa_ops, NULL, false); > > if (vp_vdpa == NULL) { > > dev_err(dev, "vp_vdpa: Failed to allocate vDPA structure\n"); > > return -ENOMEM; > > diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c > > index 7c83fbf3edac..b65c21ae98d1 100644 > > --- a/drivers/vhost/vdpa.c > > +++ b/drivers/vhost/vdpa.c > > @@ -480,21 +480,30 @@ static long vhost_vdpa_unlocked_ioctl(struct file *filep, > > static void vhost_vdpa_iotlb_unmap(struct vhost_vdpa *v, u64 start, u64 last) > > { > > struct vhost_dev *dev = &v->vdev; > > + struct vdpa_device *vdpa = v->vdpa; > > struct vhost_iotlb *iotlb = dev->iotlb; > > struct vhost_iotlb_map *map; > > + struct vdpa_map_file *map_file; > > struct page *page; > > unsigned long pfn, pinned; > > > > while ((map = vhost_iotlb_itree_first(iotlb, start, last)) != NULL) { > > - pinned = map->size >> PAGE_SHIFT; > > - for (pfn = map->addr >> PAGE_SHIFT; > > - pinned > 0; pfn++, pinned--) { > > - page = pfn_to_page(pfn); > > - if (map->perm & VHOST_ACCESS_WO) > > - set_page_dirty_lock(page); > > - unpin_user_page(page); > > + if (!vdpa->use_va) { > > + pinned = map->size >> PAGE_SHIFT; > > + for (pfn = map->addr >> PAGE_SHIFT; > > + pinned > 0; pfn++, pinned--) { > > + page = pfn_to_page(pfn); > > + if (map->perm & VHOST_ACCESS_WO) > > + set_page_dirty_lock(page); > > + unpin_user_page(page); > > + } > > + atomic64_sub(map->size >> PAGE_SHIFT, > > + &dev->mm->pinned_vm); > > + } else { > > + map_file = (struct vdpa_map_file *)map->opaque; > > + fput(map_file->file); > > + kfree(map_file); > > > Let's factor out the logic of pa and va separatedly here. > Will do it. Thanks, Yongji
diff --git a/drivers/vdpa/ifcvf/ifcvf_main.c b/drivers/vdpa/ifcvf/ifcvf_main.c index d555a6a5d1ba..aee013f3eb5f 100644 --- a/drivers/vdpa/ifcvf/ifcvf_main.c +++ b/drivers/vdpa/ifcvf/ifcvf_main.c @@ -431,7 +431,7 @@ static int ifcvf_probe(struct pci_dev *pdev, const struct pci_device_id *id) } adapter = vdpa_alloc_device(struct ifcvf_adapter, vdpa, - dev, &ifc_vdpa_ops, NULL); + dev, &ifc_vdpa_ops, NULL, false); if (adapter == NULL) { IFCVF_ERR(pdev, "Failed to allocate vDPA structure"); return -ENOMEM; diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c index 71397fdafa6a..fb62ebcf464a 100644 --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c @@ -1982,7 +1982,7 @@ static int mlx5v_probe(struct auxiliary_device *adev, max_vqs = min_t(u32, max_vqs, MLX5_MAX_SUPPORTED_VQS); ndev = vdpa_alloc_device(struct mlx5_vdpa_net, mvdev.vdev, mdev->device, &mlx5_vdpa_ops, - NULL); + NULL, false); if (IS_ERR(ndev)) return PTR_ERR(ndev); diff --git a/drivers/vdpa/vdpa.c b/drivers/vdpa/vdpa.c index 5cffce67cab0..97fbac276c72 100644 --- a/drivers/vdpa/vdpa.c +++ b/drivers/vdpa/vdpa.c @@ -71,6 +71,7 @@ static void vdpa_release_dev(struct device *d) * @config: the bus operations that is supported by this device * @size: size of the parent structure that contains private data * @name: name of the vdpa device; optional. + * @use_va: indicate whether virtual address must be used by this device * * Driver should use vdpa_alloc_device() wrapper macro instead of * using this directly. @@ -80,7 +81,8 @@ static void vdpa_release_dev(struct device *d) */ struct vdpa_device *__vdpa_alloc_device(struct device *parent, const struct vdpa_config_ops *config, - size_t size, const char *name) + size_t size, const char *name, + bool use_va) { struct vdpa_device *vdev; int err = -EINVAL; @@ -91,6 +93,10 @@ struct vdpa_device *__vdpa_alloc_device(struct device *parent, if (!!config->dma_map != !!config->dma_unmap) goto err; + /* It should only work for the device that use on-chip IOMMU */ + if (use_va && !(config->dma_map || config->set_map)) + goto err; + err = -ENOMEM; vdev = kzalloc(size, GFP_KERNEL); if (!vdev) @@ -106,6 +112,7 @@ struct vdpa_device *__vdpa_alloc_device(struct device *parent, vdev->index = err; vdev->config = config; vdev->features_valid = false; + vdev->use_va = use_va; if (name) err = dev_set_name(&vdev->dev, "%s", name); diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.c b/drivers/vdpa/vdpa_sim/vdpa_sim.c index ff331f088baf..d26334e9a412 100644 --- a/drivers/vdpa/vdpa_sim/vdpa_sim.c +++ b/drivers/vdpa/vdpa_sim/vdpa_sim.c @@ -235,7 +235,7 @@ struct vdpasim *vdpasim_create(struct vdpasim_dev_attr *dev_attr) ops = &vdpasim_config_ops; vdpasim = vdpa_alloc_device(struct vdpasim, vdpa, NULL, ops, - dev_attr->name); + dev_attr->name, false); if (!vdpasim) goto err_alloc; diff --git a/drivers/vdpa/virtio_pci/vp_vdpa.c b/drivers/vdpa/virtio_pci/vp_vdpa.c index 1321a2fcd088..03b36aed48d6 100644 --- a/drivers/vdpa/virtio_pci/vp_vdpa.c +++ b/drivers/vdpa/virtio_pci/vp_vdpa.c @@ -377,7 +377,7 @@ static int vp_vdpa_probe(struct pci_dev *pdev, const struct pci_device_id *id) return ret; vp_vdpa = vdpa_alloc_device(struct vp_vdpa, vdpa, - dev, &vp_vdpa_ops, NULL); + dev, &vp_vdpa_ops, NULL, false); if (vp_vdpa == NULL) { dev_err(dev, "vp_vdpa: Failed to allocate vDPA structure\n"); return -ENOMEM; diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c index 7c83fbf3edac..b65c21ae98d1 100644 --- a/drivers/vhost/vdpa.c +++ b/drivers/vhost/vdpa.c @@ -480,21 +480,30 @@ static long vhost_vdpa_unlocked_ioctl(struct file *filep, static void vhost_vdpa_iotlb_unmap(struct vhost_vdpa *v, u64 start, u64 last) { struct vhost_dev *dev = &v->vdev; + struct vdpa_device *vdpa = v->vdpa; struct vhost_iotlb *iotlb = dev->iotlb; struct vhost_iotlb_map *map; + struct vdpa_map_file *map_file; struct page *page; unsigned long pfn, pinned; while ((map = vhost_iotlb_itree_first(iotlb, start, last)) != NULL) { - pinned = map->size >> PAGE_SHIFT; - for (pfn = map->addr >> PAGE_SHIFT; - pinned > 0; pfn++, pinned--) { - page = pfn_to_page(pfn); - if (map->perm & VHOST_ACCESS_WO) - set_page_dirty_lock(page); - unpin_user_page(page); + if (!vdpa->use_va) { + pinned = map->size >> PAGE_SHIFT; + for (pfn = map->addr >> PAGE_SHIFT; + pinned > 0; pfn++, pinned--) { + page = pfn_to_page(pfn); + if (map->perm & VHOST_ACCESS_WO) + set_page_dirty_lock(page); + unpin_user_page(page); + } + atomic64_sub(map->size >> PAGE_SHIFT, + &dev->mm->pinned_vm); + } else { + map_file = (struct vdpa_map_file *)map->opaque; + fput(map_file->file); + kfree(map_file); } - atomic64_sub(map->size >> PAGE_SHIFT, &dev->mm->pinned_vm); vhost_iotlb_map_free(iotlb, map); } } @@ -530,21 +539,21 @@ static int perm_to_iommu_flags(u32 perm) return flags | IOMMU_CACHE; } -static int vhost_vdpa_map(struct vhost_vdpa *v, - u64 iova, u64 size, u64 pa, u32 perm) +static int vhost_vdpa_map(struct vhost_vdpa *v, u64 iova, + u64 size, u64 pa, u32 perm, void *opaque) { struct vhost_dev *dev = &v->vdev; struct vdpa_device *vdpa = v->vdpa; const struct vdpa_config_ops *ops = vdpa->config; int r = 0; - r = vhost_iotlb_add_range(dev->iotlb, iova, iova + size - 1, - pa, perm); + r = vhost_iotlb_add_range_ctx(dev->iotlb, iova, iova + size - 1, + pa, perm, opaque); if (r) return r; if (ops->dma_map) { - r = ops->dma_map(vdpa, iova, size, pa, perm, NULL); + r = ops->dma_map(vdpa, iova, size, pa, perm, opaque); } else if (ops->set_map) { if (!v->in_batch) r = ops->set_map(vdpa, dev->iotlb); @@ -552,13 +561,15 @@ static int vhost_vdpa_map(struct vhost_vdpa *v, r = iommu_map(v->domain, iova, pa, size, perm_to_iommu_flags(perm)); } - - if (r) + if (r) { vhost_iotlb_del_range(dev->iotlb, iova, iova + size - 1); - else + return r; + } + + if (!vdpa->use_va) atomic64_add(size >> PAGE_SHIFT, &dev->mm->pinned_vm); - return r; + return 0; } static void vhost_vdpa_unmap(struct vhost_vdpa *v, u64 iova, u64 size) @@ -579,6 +590,56 @@ static void vhost_vdpa_unmap(struct vhost_vdpa *v, u64 iova, u64 size) } } +static int vhost_vdpa_va_map(struct vhost_vdpa *v, + u64 iova, u64 size, u64 uaddr, u32 perm) +{ + struct vhost_dev *dev = &v->vdev; + u64 offset, map_size, map_iova = iova; + struct vdpa_map_file *map_file; + struct vm_area_struct *vma; + int ret; + + mmap_read_lock(dev->mm); + + while (size) { + vma = find_vma(dev->mm, uaddr); + if (!vma) { + ret = -EINVAL; + break; + } + map_size = min(size, vma->vm_end - uaddr); + if (!(vma->vm_file && (vma->vm_flags & VM_SHARED) && + !(vma->vm_flags & (VM_IO | VM_PFNMAP)))) + goto next; + + map_file = kzalloc(sizeof(*map_file), GFP_KERNEL); + if (!map_file) { + ret = -ENOMEM; + break; + } + offset = (vma->vm_pgoff << PAGE_SHIFT) + uaddr - vma->vm_start; + map_file->offset = offset; + map_file->file = get_file(vma->vm_file); + ret = vhost_vdpa_map(v, map_iova, map_size, uaddr, + perm, map_file); + if (ret) { + fput(map_file->file); + kfree(map_file); + break; + } +next: + size -= map_size; + uaddr += map_size; + map_iova += map_size; + } + if (ret) + vhost_vdpa_unmap(v, iova, map_iova - iova); + + mmap_read_unlock(dev->mm); + + return ret; +} + static int vhost_vdpa_pa_map(struct vhost_vdpa *v, u64 iova, u64 size, u64 uaddr, u32 perm) { @@ -645,7 +706,7 @@ static int vhost_vdpa_pa_map(struct vhost_vdpa *v, csize = (last_pfn - map_pfn + 1) << PAGE_SHIFT; ret = vhost_vdpa_map(v, iova, csize, map_pfn << PAGE_SHIFT, - perm); + perm, NULL); if (ret) { /* * Unpin the pages that are left unmapped @@ -674,7 +735,7 @@ static int vhost_vdpa_pa_map(struct vhost_vdpa *v, /* Pin the rest chunk */ ret = vhost_vdpa_map(v, iova, (last_pfn - map_pfn + 1) << PAGE_SHIFT, - map_pfn << PAGE_SHIFT, perm); + map_pfn << PAGE_SHIFT, perm, NULL); out: if (ret) { if (nchunks) { @@ -707,6 +768,7 @@ static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v, struct vhost_iotlb_msg *msg) { struct vhost_dev *dev = &v->vdev; + struct vdpa_device *vdpa = v->vdpa; struct vhost_iotlb *iotlb = dev->iotlb; if (msg->iova < v->range.first || @@ -717,6 +779,10 @@ static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v, msg->iova + msg->size - 1)) return -EEXIST; + if (vdpa->use_va) + return vhost_vdpa_va_map(v, msg->iova, msg->size, + msg->uaddr, msg->perm); + return vhost_vdpa_pa_map(v, msg->iova, msg->size, msg->uaddr, msg->perm); } diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h index b01f7c9096bf..e67404e4b23e 100644 --- a/include/linux/vdpa.h +++ b/include/linux/vdpa.h @@ -44,6 +44,7 @@ struct vdpa_mgmt_dev; * @config: the configuration ops for this device. * @index: device index * @features_valid: were features initialized? for legacy guests + * @use_va: indicate whether virtual address must be used by this device * @nvqs: maximum number of supported virtqueues * @mdev: management device pointer; caller must setup when registering device as part * of dev_add() mgmtdev ops callback before invoking _vdpa_register_device(). @@ -54,6 +55,7 @@ struct vdpa_device { const struct vdpa_config_ops *config; unsigned int index; bool features_valid; + bool use_va; int nvqs; struct vdpa_mgmt_dev *mdev; }; @@ -69,6 +71,16 @@ struct vdpa_iova_range { }; /** + * Corresponding file area for device memory mapping + * @file: vma->vm_file for the mapping + * @offset: mapping offset in the vm_file + */ +struct vdpa_map_file { + struct file *file; + u64 offset; +}; + +/** * vDPA_config_ops - operations for configuring a vDPA device. * Note: vDPA device drivers are required to implement all of the * operations unless it is mentioned to be optional in the following @@ -250,14 +262,15 @@ struct vdpa_config_ops { struct vdpa_device *__vdpa_alloc_device(struct device *parent, const struct vdpa_config_ops *config, - size_t size, const char *name); + size_t size, const char *name, + bool use_va); -#define vdpa_alloc_device(dev_struct, member, parent, config, name) \ +#define vdpa_alloc_device(dev_struct, member, parent, config, name, use_va) \ container_of(__vdpa_alloc_device( \ parent, config, \ sizeof(dev_struct) + \ BUILD_BUG_ON_ZERO(offsetof( \ - dev_struct, member)), name), \ + dev_struct, member)), name, use_va), \ dev_struct, member) int vdpa_register_device(struct vdpa_device *vdev, int nvqs);
This patch introduces an attribute for vDPA device to indicate whether virtual address can be used. If vDPA device driver set it, vhost-vdpa bus driver will not pin user page and transfer userspace virtual address instead of physical address during DMA mapping. And corresponding vma->vm_file and offset will be also passed as an opaque pointer. Suggested-by: Jason Wang <jasowang@redhat.com> Signed-off-by: Xie Yongji <xieyongji@bytedance.com> --- drivers/vdpa/ifcvf/ifcvf_main.c | 2 +- drivers/vdpa/mlx5/net/mlx5_vnet.c | 2 +- drivers/vdpa/vdpa.c | 9 +++- drivers/vdpa/vdpa_sim/vdpa_sim.c | 2 +- drivers/vdpa/virtio_pci/vp_vdpa.c | 2 +- drivers/vhost/vdpa.c | 104 +++++++++++++++++++++++++++++++------- include/linux/vdpa.h | 19 +++++-- 7 files changed, 113 insertions(+), 27 deletions(-)