diff mbox series

[RFC,v3,05/11] vdpa: shared virtual addressing support

Message ID 20210119045920.447-6-xieyongji@bytedance.com (mailing list archive)
State New, archived
Headers show
Series Introduce VDUSE - vDPA Device in Userspace | expand

Commit Message

Yongji Xie Jan. 19, 2021, 4:59 a.m. UTC
This patches introduces SVA (Shared Virtual Addressing)
support for vDPA device. During vDPA device allocation,
vDPA device driver needs to indicate whether SVA is
supported by the device. Then vhost-vdpa bus driver
will not pin user page and transfer userspace virtual
address instead of physical address during DMA mapping.

Suggested-by: Jason Wang <jasowang@redhat.com>
Signed-off-by: Xie Yongji <xieyongji@bytedance.com>
---
 drivers/vdpa/ifcvf/ifcvf_main.c   |  2 +-
 drivers/vdpa/mlx5/net/mlx5_vnet.c |  2 +-
 drivers/vdpa/vdpa.c               |  5 ++++-
 drivers/vdpa/vdpa_sim/vdpa_sim.c  |  3 ++-
 drivers/vhost/vdpa.c              | 35 +++++++++++++++++++++++------------
 include/linux/vdpa.h              | 10 +++++++---
 6 files changed, 38 insertions(+), 19 deletions(-)

Comments

Jason Wang Jan. 20, 2021, 5:55 a.m. UTC | #1
On 2021/1/19 下午12:59, Xie Yongji wrote:
> This patches introduces SVA (Shared Virtual Addressing)
> support for vDPA device. During vDPA device allocation,
> vDPA device driver needs to indicate whether SVA is
> supported by the device. Then vhost-vdpa bus driver
> will not pin user page and transfer userspace virtual
> address instead of physical address during DMA mapping.
>
> Suggested-by: Jason Wang <jasowang@redhat.com>
> Signed-off-by: Xie Yongji <xieyongji@bytedance.com>
> ---
>   drivers/vdpa/ifcvf/ifcvf_main.c   |  2 +-
>   drivers/vdpa/mlx5/net/mlx5_vnet.c |  2 +-
>   drivers/vdpa/vdpa.c               |  5 ++++-
>   drivers/vdpa/vdpa_sim/vdpa_sim.c  |  3 ++-
>   drivers/vhost/vdpa.c              | 35 +++++++++++++++++++++++------------
>   include/linux/vdpa.h              | 10 +++++++---
>   6 files changed, 38 insertions(+), 19 deletions(-)
>
> diff --git a/drivers/vdpa/ifcvf/ifcvf_main.c b/drivers/vdpa/ifcvf/ifcvf_main.c
> index 23474af7da40..95c4601f82f5 100644
> --- a/drivers/vdpa/ifcvf/ifcvf_main.c
> +++ b/drivers/vdpa/ifcvf/ifcvf_main.c
> @@ -439,7 +439,7 @@ static int ifcvf_probe(struct pci_dev *pdev, const struct pci_device_id *id)
>   
>   	adapter = vdpa_alloc_device(struct ifcvf_adapter, vdpa,
>   				    dev, &ifc_vdpa_ops,
> -				    IFCVF_MAX_QUEUE_PAIRS * 2, NULL);
> +				    IFCVF_MAX_QUEUE_PAIRS * 2, NULL, false);
>   	if (adapter == NULL) {
>   		IFCVF_ERR(pdev, "Failed to allocate vDPA structure");
>   		return -ENOMEM;
> diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> index 77595c81488d..05988d6907f2 100644
> --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c
> +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> @@ -1959,7 +1959,7 @@ static int mlx5v_probe(struct auxiliary_device *adev,
>   	max_vqs = min_t(u32, max_vqs, MLX5_MAX_SUPPORTED_VQS);
>   
>   	ndev = vdpa_alloc_device(struct mlx5_vdpa_net, mvdev.vdev, mdev->device, &mlx5_vdpa_ops,
> -				 2 * mlx5_vdpa_max_qps(max_vqs), NULL);
> +				 2 * mlx5_vdpa_max_qps(max_vqs), NULL, false);
>   	if (IS_ERR(ndev))
>   		return PTR_ERR(ndev);
>   
> diff --git a/drivers/vdpa/vdpa.c b/drivers/vdpa/vdpa.c
> index 32bd48baffab..50cab930b2e5 100644
> --- a/drivers/vdpa/vdpa.c
> +++ b/drivers/vdpa/vdpa.c
> @@ -72,6 +72,7 @@ static void vdpa_release_dev(struct device *d)
>    * @nvqs: number of virtqueues supported by this device
>    * @size: size of the parent structure that contains private data
>    * @name: name of the vdpa device; optional.
> + * @sva: indicate whether SVA (Shared Virtual Addressing) is supported
>    *
>    * Driver should use vdpa_alloc_device() wrapper macro instead of
>    * using this directly.
> @@ -81,7 +82,8 @@ static void vdpa_release_dev(struct device *d)
>    */
>   struct vdpa_device *__vdpa_alloc_device(struct device *parent,
>   					const struct vdpa_config_ops *config,
> -					int nvqs, size_t size, const char *name)
> +					int nvqs, size_t size, const char *name,
> +					bool sva)
>   {
>   	struct vdpa_device *vdev;
>   	int err = -EINVAL;
> @@ -108,6 +110,7 @@ struct vdpa_device *__vdpa_alloc_device(struct device *parent,
>   	vdev->config = config;
>   	vdev->features_valid = false;
>   	vdev->nvqs = nvqs;
> +	vdev->sva = sva;
>   
>   	if (name)
>   		err = dev_set_name(&vdev->dev, "%s", name);
> diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.c b/drivers/vdpa/vdpa_sim/vdpa_sim.c
> index 85776e4e6749..03c796873a6b 100644
> --- a/drivers/vdpa/vdpa_sim/vdpa_sim.c
> +++ b/drivers/vdpa/vdpa_sim/vdpa_sim.c
> @@ -367,7 +367,8 @@ static struct vdpasim *vdpasim_create(const char *name)
>   	else
>   		ops = &vdpasim_net_config_ops;
>   
> -	vdpasim = vdpa_alloc_device(struct vdpasim, vdpa, NULL, ops, VDPASIM_VQ_NUM, name);
> +	vdpasim = vdpa_alloc_device(struct vdpasim, vdpa, NULL, ops,
> +				VDPASIM_VQ_NUM, name, false);
>   	if (!vdpasim)
>   		goto err_alloc;
>   
> diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c
> index 4a241d380c40..36b6950ba37f 100644
> --- a/drivers/vhost/vdpa.c
> +++ b/drivers/vhost/vdpa.c
> @@ -486,21 +486,25 @@ static long vhost_vdpa_unlocked_ioctl(struct file *filep,
>   static void vhost_vdpa_iotlb_unmap(struct vhost_vdpa *v, u64 start, u64 last)
>   {
>   	struct vhost_dev *dev = &v->vdev;
> +	struct vdpa_device *vdpa = v->vdpa;
>   	struct vhost_iotlb *iotlb = dev->iotlb;
>   	struct vhost_iotlb_map *map;
>   	struct page *page;
>   	unsigned long pfn, pinned;
>   
>   	while ((map = vhost_iotlb_itree_first(iotlb, start, last)) != NULL) {
> -		pinned = map->size >> PAGE_SHIFT;
> -		for (pfn = map->addr >> PAGE_SHIFT;
> -		     pinned > 0; pfn++, pinned--) {
> -			page = pfn_to_page(pfn);
> -			if (map->perm & VHOST_ACCESS_WO)
> -				set_page_dirty_lock(page);
> -			unpin_user_page(page);
> +		if (!vdpa->sva) {
> +			pinned = map->size >> PAGE_SHIFT;
> +			for (pfn = map->addr >> PAGE_SHIFT;
> +			     pinned > 0; pfn++, pinned--) {
> +				page = pfn_to_page(pfn);
> +				if (map->perm & VHOST_ACCESS_WO)
> +					set_page_dirty_lock(page);
> +				unpin_user_page(page);
> +			}
> +			atomic64_sub(map->size >> PAGE_SHIFT,
> +					&dev->mm->pinned_vm);
>   		}
> -		atomic64_sub(map->size >> PAGE_SHIFT, &dev->mm->pinned_vm);
>   		vhost_iotlb_map_free(iotlb, map);
>   	}
>   }
> @@ -558,13 +562,15 @@ static int vhost_vdpa_map(struct vhost_vdpa *v,
>   		r = iommu_map(v->domain, iova, pa, size,
>   			      perm_to_iommu_flags(perm));
>   	}
> -
> -	if (r)
> +	if (r) {
>   		vhost_iotlb_del_range(dev->iotlb, iova, iova + size - 1);
> -	else
> +		return r;
> +	}
> +
> +	if (!vdpa->sva)
>   		atomic64_add(size >> PAGE_SHIFT, &dev->mm->pinned_vm);
>   
> -	return r;
> +	return 0;
>   }
>   
>   static void vhost_vdpa_unmap(struct vhost_vdpa *v, u64 iova, u64 size)
> @@ -589,6 +595,7 @@ static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v,
>   					   struct vhost_iotlb_msg *msg)
>   {
>   	struct vhost_dev *dev = &v->vdev;
> +	struct vdpa_device *vdpa = v->vdpa;
>   	struct vhost_iotlb *iotlb = dev->iotlb;
>   	struct page **page_list;
>   	unsigned long list_size = PAGE_SIZE / sizeof(struct page *);
> @@ -607,6 +614,10 @@ static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v,
>   				    msg->iova + msg->size - 1))
>   		return -EEXIST;
>   
> +	if (vdpa->sva)
> +		return vhost_vdpa_map(v, msg->iova, msg->size,
> +				      msg->uaddr, msg->perm);
> +
>   	/* Limit the use of memory for bookkeeping */
>   	page_list = (struct page **) __get_free_page(GFP_KERNEL);
>   	if (!page_list)
> diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h
> index cb5a3d847af3..f86869651614 100644
> --- a/include/linux/vdpa.h
> +++ b/include/linux/vdpa.h
> @@ -44,6 +44,7 @@ struct vdpa_parent_dev;
>    * @config: the configuration ops for this device.
>    * @index: device index
>    * @features_valid: were features initialized? for legacy guests
> + * @sva: indicate whether SVA (Shared Virtual Addressing) is supported


Rethink about this. I think we probably need a better name other than 
"sva" since kernel already use that for shared virtual address space. 
But actually we don't the whole virtual address space.

And I guess this can not work for the device that use platform IOMMU, so 
we should check and fail if sva && !(dma_map || set_map).

Thanks


>    * @nvqs: maximum number of supported virtqueues
>    * @pdev: parent device pointer; caller must setup when registering device as part
>    *	  of dev_add() parentdev ops callback before invoking _vdpa_register_device().
> @@ -54,6 +55,7 @@ struct vdpa_device {
>   	const struct vdpa_config_ops *config;
>   	unsigned int index;
>   	bool features_valid;
> +	bool sva;
>   	int nvqs;
>   	struct vdpa_parent_dev *pdev;
>   };
> @@ -250,14 +252,16 @@ struct vdpa_config_ops {
>   
>   struct vdpa_device *__vdpa_alloc_device(struct device *parent,
>   					const struct vdpa_config_ops *config,
> -					int nvqs, size_t size, const char *name);
> +					int nvqs, size_t size,
> +					const char *name, bool sva);
>   
> -#define vdpa_alloc_device(dev_struct, member, parent, config, nvqs, name)   \
> +#define vdpa_alloc_device(dev_struct, member, parent, config, \
> +			  nvqs, name, sva) \
>   			  container_of(__vdpa_alloc_device( \
>   				       parent, config, nvqs, \
>   				       sizeof(dev_struct) + \
>   				       BUILD_BUG_ON_ZERO(offsetof( \
> -				       dev_struct, member)), name), \
> +				       dev_struct, member)), name, sva), \
>   				       dev_struct, member)
>   
>   int vdpa_register_device(struct vdpa_device *vdev);
Yongji Xie Jan. 20, 2021, 7:10 a.m. UTC | #2
On Wed, Jan 20, 2021 at 1:55 PM Jason Wang <jasowang@redhat.com> wrote:
>
>
> On 2021/1/19 下午12:59, Xie Yongji wrote:
> > This patches introduces SVA (Shared Virtual Addressing)
> > support for vDPA device. During vDPA device allocation,
> > vDPA device driver needs to indicate whether SVA is
> > supported by the device. Then vhost-vdpa bus driver
> > will not pin user page and transfer userspace virtual
> > address instead of physical address during DMA mapping.
> >
> > Suggested-by: Jason Wang <jasowang@redhat.com>
> > Signed-off-by: Xie Yongji <xieyongji@bytedance.com>
> > ---
> >   drivers/vdpa/ifcvf/ifcvf_main.c   |  2 +-
> >   drivers/vdpa/mlx5/net/mlx5_vnet.c |  2 +-
> >   drivers/vdpa/vdpa.c               |  5 ++++-
> >   drivers/vdpa/vdpa_sim/vdpa_sim.c  |  3 ++-
> >   drivers/vhost/vdpa.c              | 35 +++++++++++++++++++++++------------
> >   include/linux/vdpa.h              | 10 +++++++---
> >   6 files changed, 38 insertions(+), 19 deletions(-)
> >
> > diff --git a/drivers/vdpa/ifcvf/ifcvf_main.c b/drivers/vdpa/ifcvf/ifcvf_main.c
> > index 23474af7da40..95c4601f82f5 100644
> > --- a/drivers/vdpa/ifcvf/ifcvf_main.c
> > +++ b/drivers/vdpa/ifcvf/ifcvf_main.c
> > @@ -439,7 +439,7 @@ static int ifcvf_probe(struct pci_dev *pdev, const struct pci_device_id *id)
> >
> >       adapter = vdpa_alloc_device(struct ifcvf_adapter, vdpa,
> >                                   dev, &ifc_vdpa_ops,
> > -                                 IFCVF_MAX_QUEUE_PAIRS * 2, NULL);
> > +                                 IFCVF_MAX_QUEUE_PAIRS * 2, NULL, false);
> >       if (adapter == NULL) {
> >               IFCVF_ERR(pdev, "Failed to allocate vDPA structure");
> >               return -ENOMEM;
> > diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> > index 77595c81488d..05988d6907f2 100644
> > --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c
> > +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> > @@ -1959,7 +1959,7 @@ static int mlx5v_probe(struct auxiliary_device *adev,
> >       max_vqs = min_t(u32, max_vqs, MLX5_MAX_SUPPORTED_VQS);
> >
> >       ndev = vdpa_alloc_device(struct mlx5_vdpa_net, mvdev.vdev, mdev->device, &mlx5_vdpa_ops,
> > -                              2 * mlx5_vdpa_max_qps(max_vqs), NULL);
> > +                              2 * mlx5_vdpa_max_qps(max_vqs), NULL, false);
> >       if (IS_ERR(ndev))
> >               return PTR_ERR(ndev);
> >
> > diff --git a/drivers/vdpa/vdpa.c b/drivers/vdpa/vdpa.c
> > index 32bd48baffab..50cab930b2e5 100644
> > --- a/drivers/vdpa/vdpa.c
> > +++ b/drivers/vdpa/vdpa.c
> > @@ -72,6 +72,7 @@ static void vdpa_release_dev(struct device *d)
> >    * @nvqs: number of virtqueues supported by this device
> >    * @size: size of the parent structure that contains private data
> >    * @name: name of the vdpa device; optional.
> > + * @sva: indicate whether SVA (Shared Virtual Addressing) is supported
> >    *
> >    * Driver should use vdpa_alloc_device() wrapper macro instead of
> >    * using this directly.
> > @@ -81,7 +82,8 @@ static void vdpa_release_dev(struct device *d)
> >    */
> >   struct vdpa_device *__vdpa_alloc_device(struct device *parent,
> >                                       const struct vdpa_config_ops *config,
> > -                                     int nvqs, size_t size, const char *name)
> > +                                     int nvqs, size_t size, const char *name,
> > +                                     bool sva)
> >   {
> >       struct vdpa_device *vdev;
> >       int err = -EINVAL;
> > @@ -108,6 +110,7 @@ struct vdpa_device *__vdpa_alloc_device(struct device *parent,
> >       vdev->config = config;
> >       vdev->features_valid = false;
> >       vdev->nvqs = nvqs;
> > +     vdev->sva = sva;
> >
> >       if (name)
> >               err = dev_set_name(&vdev->dev, "%s", name);
> > diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.c b/drivers/vdpa/vdpa_sim/vdpa_sim.c
> > index 85776e4e6749..03c796873a6b 100644
> > --- a/drivers/vdpa/vdpa_sim/vdpa_sim.c
> > +++ b/drivers/vdpa/vdpa_sim/vdpa_sim.c
> > @@ -367,7 +367,8 @@ static struct vdpasim *vdpasim_create(const char *name)
> >       else
> >               ops = &vdpasim_net_config_ops;
> >
> > -     vdpasim = vdpa_alloc_device(struct vdpasim, vdpa, NULL, ops, VDPASIM_VQ_NUM, name);
> > +     vdpasim = vdpa_alloc_device(struct vdpasim, vdpa, NULL, ops,
> > +                             VDPASIM_VQ_NUM, name, false);
> >       if (!vdpasim)
> >               goto err_alloc;
> >
> > diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c
> > index 4a241d380c40..36b6950ba37f 100644
> > --- a/drivers/vhost/vdpa.c
> > +++ b/drivers/vhost/vdpa.c
> > @@ -486,21 +486,25 @@ static long vhost_vdpa_unlocked_ioctl(struct file *filep,
> >   static void vhost_vdpa_iotlb_unmap(struct vhost_vdpa *v, u64 start, u64 last)
> >   {
> >       struct vhost_dev *dev = &v->vdev;
> > +     struct vdpa_device *vdpa = v->vdpa;
> >       struct vhost_iotlb *iotlb = dev->iotlb;
> >       struct vhost_iotlb_map *map;
> >       struct page *page;
> >       unsigned long pfn, pinned;
> >
> >       while ((map = vhost_iotlb_itree_first(iotlb, start, last)) != NULL) {
> > -             pinned = map->size >> PAGE_SHIFT;
> > -             for (pfn = map->addr >> PAGE_SHIFT;
> > -                  pinned > 0; pfn++, pinned--) {
> > -                     page = pfn_to_page(pfn);
> > -                     if (map->perm & VHOST_ACCESS_WO)
> > -                             set_page_dirty_lock(page);
> > -                     unpin_user_page(page);
> > +             if (!vdpa->sva) {
> > +                     pinned = map->size >> PAGE_SHIFT;
> > +                     for (pfn = map->addr >> PAGE_SHIFT;
> > +                          pinned > 0; pfn++, pinned--) {
> > +                             page = pfn_to_page(pfn);
> > +                             if (map->perm & VHOST_ACCESS_WO)
> > +                                     set_page_dirty_lock(page);
> > +                             unpin_user_page(page);
> > +                     }
> > +                     atomic64_sub(map->size >> PAGE_SHIFT,
> > +                                     &dev->mm->pinned_vm);
> >               }
> > -             atomic64_sub(map->size >> PAGE_SHIFT, &dev->mm->pinned_vm);
> >               vhost_iotlb_map_free(iotlb, map);
> >       }
> >   }
> > @@ -558,13 +562,15 @@ static int vhost_vdpa_map(struct vhost_vdpa *v,
> >               r = iommu_map(v->domain, iova, pa, size,
> >                             perm_to_iommu_flags(perm));
> >       }
> > -
> > -     if (r)
> > +     if (r) {
> >               vhost_iotlb_del_range(dev->iotlb, iova, iova + size - 1);
> > -     else
> > +             return r;
> > +     }
> > +
> > +     if (!vdpa->sva)
> >               atomic64_add(size >> PAGE_SHIFT, &dev->mm->pinned_vm);
> >
> > -     return r;
> > +     return 0;
> >   }
> >
> >   static void vhost_vdpa_unmap(struct vhost_vdpa *v, u64 iova, u64 size)
> > @@ -589,6 +595,7 @@ static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v,
> >                                          struct vhost_iotlb_msg *msg)
> >   {
> >       struct vhost_dev *dev = &v->vdev;
> > +     struct vdpa_device *vdpa = v->vdpa;
> >       struct vhost_iotlb *iotlb = dev->iotlb;
> >       struct page **page_list;
> >       unsigned long list_size = PAGE_SIZE / sizeof(struct page *);
> > @@ -607,6 +614,10 @@ static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v,
> >                                   msg->iova + msg->size - 1))
> >               return -EEXIST;
> >
> > +     if (vdpa->sva)
> > +             return vhost_vdpa_map(v, msg->iova, msg->size,
> > +                                   msg->uaddr, msg->perm);
> > +
> >       /* Limit the use of memory for bookkeeping */
> >       page_list = (struct page **) __get_free_page(GFP_KERNEL);
> >       if (!page_list)
> > diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h
> > index cb5a3d847af3..f86869651614 100644
> > --- a/include/linux/vdpa.h
> > +++ b/include/linux/vdpa.h
> > @@ -44,6 +44,7 @@ struct vdpa_parent_dev;
> >    * @config: the configuration ops for this device.
> >    * @index: device index
> >    * @features_valid: were features initialized? for legacy guests
> > + * @sva: indicate whether SVA (Shared Virtual Addressing) is supported
>
>
> Rethink about this. I think we probably need a better name other than
> "sva" since kernel already use that for shared virtual address space.
> But actually we don't the whole virtual address space.
>

This flag is used to tell vhost-vdpa bus driver to transfer virtual
addresses instead of physical addresses. So how about "use_va“,
”need_va" or "va“?

> And I guess this can not work for the device that use platform IOMMU, so
> we should check and fail if sva && !(dma_map || set_map).
>

Agree.

Thanks,
Yongji
Jason Wang Jan. 27, 2021, 3:43 a.m. UTC | #3
On 2021/1/20 下午3:10, Yongji Xie wrote:
> On Wed, Jan 20, 2021 at 1:55 PM Jason Wang <jasowang@redhat.com> wrote:
>>
>> On 2021/1/19 下午12:59, Xie Yongji wrote:
>>> This patches introduces SVA (Shared Virtual Addressing)
>>> support for vDPA device. During vDPA device allocation,
>>> vDPA device driver needs to indicate whether SVA is
>>> supported by the device. Then vhost-vdpa bus driver
>>> will not pin user page and transfer userspace virtual
>>> address instead of physical address during DMA mapping.
>>>
>>> Suggested-by: Jason Wang <jasowang@redhat.com>
>>> Signed-off-by: Xie Yongji <xieyongji@bytedance.com>
>>> ---
>>>    drivers/vdpa/ifcvf/ifcvf_main.c   |  2 +-
>>>    drivers/vdpa/mlx5/net/mlx5_vnet.c |  2 +-
>>>    drivers/vdpa/vdpa.c               |  5 ++++-
>>>    drivers/vdpa/vdpa_sim/vdpa_sim.c  |  3 ++-
>>>    drivers/vhost/vdpa.c              | 35 +++++++++++++++++++++++------------
>>>    include/linux/vdpa.h              | 10 +++++++---
>>>    6 files changed, 38 insertions(+), 19 deletions(-)
>>>
>>> diff --git a/drivers/vdpa/ifcvf/ifcvf_main.c b/drivers/vdpa/ifcvf/ifcvf_main.c
>>> index 23474af7da40..95c4601f82f5 100644
>>> --- a/drivers/vdpa/ifcvf/ifcvf_main.c
>>> +++ b/drivers/vdpa/ifcvf/ifcvf_main.c
>>> @@ -439,7 +439,7 @@ static int ifcvf_probe(struct pci_dev *pdev, const struct pci_device_id *id)
>>>
>>>        adapter = vdpa_alloc_device(struct ifcvf_adapter, vdpa,
>>>                                    dev, &ifc_vdpa_ops,
>>> -                                 IFCVF_MAX_QUEUE_PAIRS * 2, NULL);
>>> +                                 IFCVF_MAX_QUEUE_PAIRS * 2, NULL, false);
>>>        if (adapter == NULL) {
>>>                IFCVF_ERR(pdev, "Failed to allocate vDPA structure");
>>>                return -ENOMEM;
>>> diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c
>>> index 77595c81488d..05988d6907f2 100644
>>> --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c
>>> +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
>>> @@ -1959,7 +1959,7 @@ static int mlx5v_probe(struct auxiliary_device *adev,
>>>        max_vqs = min_t(u32, max_vqs, MLX5_MAX_SUPPORTED_VQS);
>>>
>>>        ndev = vdpa_alloc_device(struct mlx5_vdpa_net, mvdev.vdev, mdev->device, &mlx5_vdpa_ops,
>>> -                              2 * mlx5_vdpa_max_qps(max_vqs), NULL);
>>> +                              2 * mlx5_vdpa_max_qps(max_vqs), NULL, false);
>>>        if (IS_ERR(ndev))
>>>                return PTR_ERR(ndev);
>>>
>>> diff --git a/drivers/vdpa/vdpa.c b/drivers/vdpa/vdpa.c
>>> index 32bd48baffab..50cab930b2e5 100644
>>> --- a/drivers/vdpa/vdpa.c
>>> +++ b/drivers/vdpa/vdpa.c
>>> @@ -72,6 +72,7 @@ static void vdpa_release_dev(struct device *d)
>>>     * @nvqs: number of virtqueues supported by this device
>>>     * @size: size of the parent structure that contains private data
>>>     * @name: name of the vdpa device; optional.
>>> + * @sva: indicate whether SVA (Shared Virtual Addressing) is supported
>>>     *
>>>     * Driver should use vdpa_alloc_device() wrapper macro instead of
>>>     * using this directly.
>>> @@ -81,7 +82,8 @@ static void vdpa_release_dev(struct device *d)
>>>     */
>>>    struct vdpa_device *__vdpa_alloc_device(struct device *parent,
>>>                                        const struct vdpa_config_ops *config,
>>> -                                     int nvqs, size_t size, const char *name)
>>> +                                     int nvqs, size_t size, const char *name,
>>> +                                     bool sva)
>>>    {
>>>        struct vdpa_device *vdev;
>>>        int err = -EINVAL;
>>> @@ -108,6 +110,7 @@ struct vdpa_device *__vdpa_alloc_device(struct device *parent,
>>>        vdev->config = config;
>>>        vdev->features_valid = false;
>>>        vdev->nvqs = nvqs;
>>> +     vdev->sva = sva;
>>>
>>>        if (name)
>>>                err = dev_set_name(&vdev->dev, "%s", name);
>>> diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.c b/drivers/vdpa/vdpa_sim/vdpa_sim.c
>>> index 85776e4e6749..03c796873a6b 100644
>>> --- a/drivers/vdpa/vdpa_sim/vdpa_sim.c
>>> +++ b/drivers/vdpa/vdpa_sim/vdpa_sim.c
>>> @@ -367,7 +367,8 @@ static struct vdpasim *vdpasim_create(const char *name)
>>>        else
>>>                ops = &vdpasim_net_config_ops;
>>>
>>> -     vdpasim = vdpa_alloc_device(struct vdpasim, vdpa, NULL, ops, VDPASIM_VQ_NUM, name);
>>> +     vdpasim = vdpa_alloc_device(struct vdpasim, vdpa, NULL, ops,
>>> +                             VDPASIM_VQ_NUM, name, false);
>>>        if (!vdpasim)
>>>                goto err_alloc;
>>>
>>> diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c
>>> index 4a241d380c40..36b6950ba37f 100644
>>> --- a/drivers/vhost/vdpa.c
>>> +++ b/drivers/vhost/vdpa.c
>>> @@ -486,21 +486,25 @@ static long vhost_vdpa_unlocked_ioctl(struct file *filep,
>>>    static void vhost_vdpa_iotlb_unmap(struct vhost_vdpa *v, u64 start, u64 last)
>>>    {
>>>        struct vhost_dev *dev = &v->vdev;
>>> +     struct vdpa_device *vdpa = v->vdpa;
>>>        struct vhost_iotlb *iotlb = dev->iotlb;
>>>        struct vhost_iotlb_map *map;
>>>        struct page *page;
>>>        unsigned long pfn, pinned;
>>>
>>>        while ((map = vhost_iotlb_itree_first(iotlb, start, last)) != NULL) {
>>> -             pinned = map->size >> PAGE_SHIFT;
>>> -             for (pfn = map->addr >> PAGE_SHIFT;
>>> -                  pinned > 0; pfn++, pinned--) {
>>> -                     page = pfn_to_page(pfn);
>>> -                     if (map->perm & VHOST_ACCESS_WO)
>>> -                             set_page_dirty_lock(page);
>>> -                     unpin_user_page(page);
>>> +             if (!vdpa->sva) {
>>> +                     pinned = map->size >> PAGE_SHIFT;
>>> +                     for (pfn = map->addr >> PAGE_SHIFT;
>>> +                          pinned > 0; pfn++, pinned--) {
>>> +                             page = pfn_to_page(pfn);
>>> +                             if (map->perm & VHOST_ACCESS_WO)
>>> +                                     set_page_dirty_lock(page);
>>> +                             unpin_user_page(page);
>>> +                     }
>>> +                     atomic64_sub(map->size >> PAGE_SHIFT,
>>> +                                     &dev->mm->pinned_vm);
>>>                }
>>> -             atomic64_sub(map->size >> PAGE_SHIFT, &dev->mm->pinned_vm);
>>>                vhost_iotlb_map_free(iotlb, map);
>>>        }
>>>    }
>>> @@ -558,13 +562,15 @@ static int vhost_vdpa_map(struct vhost_vdpa *v,
>>>                r = iommu_map(v->domain, iova, pa, size,
>>>                              perm_to_iommu_flags(perm));
>>>        }
>>> -
>>> -     if (r)
>>> +     if (r) {
>>>                vhost_iotlb_del_range(dev->iotlb, iova, iova + size - 1);
>>> -     else
>>> +             return r;
>>> +     }
>>> +
>>> +     if (!vdpa->sva)
>>>                atomic64_add(size >> PAGE_SHIFT, &dev->mm->pinned_vm);
>>>
>>> -     return r;
>>> +     return 0;
>>>    }
>>>
>>>    static void vhost_vdpa_unmap(struct vhost_vdpa *v, u64 iova, u64 size)
>>> @@ -589,6 +595,7 @@ static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v,
>>>                                           struct vhost_iotlb_msg *msg)
>>>    {
>>>        struct vhost_dev *dev = &v->vdev;
>>> +     struct vdpa_device *vdpa = v->vdpa;
>>>        struct vhost_iotlb *iotlb = dev->iotlb;
>>>        struct page **page_list;
>>>        unsigned long list_size = PAGE_SIZE / sizeof(struct page *);
>>> @@ -607,6 +614,10 @@ static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v,
>>>                                    msg->iova + msg->size - 1))
>>>                return -EEXIST;
>>>
>>> +     if (vdpa->sva)
>>> +             return vhost_vdpa_map(v, msg->iova, msg->size,
>>> +                                   msg->uaddr, msg->perm);
>>> +
>>>        /* Limit the use of memory for bookkeeping */
>>>        page_list = (struct page **) __get_free_page(GFP_KERNEL);
>>>        if (!page_list)
>>> diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h
>>> index cb5a3d847af3..f86869651614 100644
>>> --- a/include/linux/vdpa.h
>>> +++ b/include/linux/vdpa.h
>>> @@ -44,6 +44,7 @@ struct vdpa_parent_dev;
>>>     * @config: the configuration ops for this device.
>>>     * @index: device index
>>>     * @features_valid: were features initialized? for legacy guests
>>> + * @sva: indicate whether SVA (Shared Virtual Addressing) is supported
>>
>> Rethink about this. I think we probably need a better name other than
>> "sva" since kernel already use that for shared virtual address space.
>> But actually we don't the whole virtual address space.
>>
> This flag is used to tell vhost-vdpa bus driver to transfer virtual
> addresses instead of physical addresses. So how about "use_va“,
> ”need_va" or "va“?


I think "use_va" or "need_va" should be fine.

Thanks


>
>> And I guess this can not work for the device that use platform IOMMU, so
>> we should check and fail if sva && !(dma_map || set_map).
>>
> Agree.
>
> Thanks,
> Yongji
>
diff mbox series

Patch

diff --git a/drivers/vdpa/ifcvf/ifcvf_main.c b/drivers/vdpa/ifcvf/ifcvf_main.c
index 23474af7da40..95c4601f82f5 100644
--- a/drivers/vdpa/ifcvf/ifcvf_main.c
+++ b/drivers/vdpa/ifcvf/ifcvf_main.c
@@ -439,7 +439,7 @@  static int ifcvf_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 
 	adapter = vdpa_alloc_device(struct ifcvf_adapter, vdpa,
 				    dev, &ifc_vdpa_ops,
-				    IFCVF_MAX_QUEUE_PAIRS * 2, NULL);
+				    IFCVF_MAX_QUEUE_PAIRS * 2, NULL, false);
 	if (adapter == NULL) {
 		IFCVF_ERR(pdev, "Failed to allocate vDPA structure");
 		return -ENOMEM;
diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c
index 77595c81488d..05988d6907f2 100644
--- a/drivers/vdpa/mlx5/net/mlx5_vnet.c
+++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
@@ -1959,7 +1959,7 @@  static int mlx5v_probe(struct auxiliary_device *adev,
 	max_vqs = min_t(u32, max_vqs, MLX5_MAX_SUPPORTED_VQS);
 
 	ndev = vdpa_alloc_device(struct mlx5_vdpa_net, mvdev.vdev, mdev->device, &mlx5_vdpa_ops,
-				 2 * mlx5_vdpa_max_qps(max_vqs), NULL);
+				 2 * mlx5_vdpa_max_qps(max_vqs), NULL, false);
 	if (IS_ERR(ndev))
 		return PTR_ERR(ndev);
 
diff --git a/drivers/vdpa/vdpa.c b/drivers/vdpa/vdpa.c
index 32bd48baffab..50cab930b2e5 100644
--- a/drivers/vdpa/vdpa.c
+++ b/drivers/vdpa/vdpa.c
@@ -72,6 +72,7 @@  static void vdpa_release_dev(struct device *d)
  * @nvqs: number of virtqueues supported by this device
  * @size: size of the parent structure that contains private data
  * @name: name of the vdpa device; optional.
+ * @sva: indicate whether SVA (Shared Virtual Addressing) is supported
  *
  * Driver should use vdpa_alloc_device() wrapper macro instead of
  * using this directly.
@@ -81,7 +82,8 @@  static void vdpa_release_dev(struct device *d)
  */
 struct vdpa_device *__vdpa_alloc_device(struct device *parent,
 					const struct vdpa_config_ops *config,
-					int nvqs, size_t size, const char *name)
+					int nvqs, size_t size, const char *name,
+					bool sva)
 {
 	struct vdpa_device *vdev;
 	int err = -EINVAL;
@@ -108,6 +110,7 @@  struct vdpa_device *__vdpa_alloc_device(struct device *parent,
 	vdev->config = config;
 	vdev->features_valid = false;
 	vdev->nvqs = nvqs;
+	vdev->sva = sva;
 
 	if (name)
 		err = dev_set_name(&vdev->dev, "%s", name);
diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.c b/drivers/vdpa/vdpa_sim/vdpa_sim.c
index 85776e4e6749..03c796873a6b 100644
--- a/drivers/vdpa/vdpa_sim/vdpa_sim.c
+++ b/drivers/vdpa/vdpa_sim/vdpa_sim.c
@@ -367,7 +367,8 @@  static struct vdpasim *vdpasim_create(const char *name)
 	else
 		ops = &vdpasim_net_config_ops;
 
-	vdpasim = vdpa_alloc_device(struct vdpasim, vdpa, NULL, ops, VDPASIM_VQ_NUM, name);
+	vdpasim = vdpa_alloc_device(struct vdpasim, vdpa, NULL, ops,
+				VDPASIM_VQ_NUM, name, false);
 	if (!vdpasim)
 		goto err_alloc;
 
diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c
index 4a241d380c40..36b6950ba37f 100644
--- a/drivers/vhost/vdpa.c
+++ b/drivers/vhost/vdpa.c
@@ -486,21 +486,25 @@  static long vhost_vdpa_unlocked_ioctl(struct file *filep,
 static void vhost_vdpa_iotlb_unmap(struct vhost_vdpa *v, u64 start, u64 last)
 {
 	struct vhost_dev *dev = &v->vdev;
+	struct vdpa_device *vdpa = v->vdpa;
 	struct vhost_iotlb *iotlb = dev->iotlb;
 	struct vhost_iotlb_map *map;
 	struct page *page;
 	unsigned long pfn, pinned;
 
 	while ((map = vhost_iotlb_itree_first(iotlb, start, last)) != NULL) {
-		pinned = map->size >> PAGE_SHIFT;
-		for (pfn = map->addr >> PAGE_SHIFT;
-		     pinned > 0; pfn++, pinned--) {
-			page = pfn_to_page(pfn);
-			if (map->perm & VHOST_ACCESS_WO)
-				set_page_dirty_lock(page);
-			unpin_user_page(page);
+		if (!vdpa->sva) {
+			pinned = map->size >> PAGE_SHIFT;
+			for (pfn = map->addr >> PAGE_SHIFT;
+			     pinned > 0; pfn++, pinned--) {
+				page = pfn_to_page(pfn);
+				if (map->perm & VHOST_ACCESS_WO)
+					set_page_dirty_lock(page);
+				unpin_user_page(page);
+			}
+			atomic64_sub(map->size >> PAGE_SHIFT,
+					&dev->mm->pinned_vm);
 		}
-		atomic64_sub(map->size >> PAGE_SHIFT, &dev->mm->pinned_vm);
 		vhost_iotlb_map_free(iotlb, map);
 	}
 }
@@ -558,13 +562,15 @@  static int vhost_vdpa_map(struct vhost_vdpa *v,
 		r = iommu_map(v->domain, iova, pa, size,
 			      perm_to_iommu_flags(perm));
 	}
-
-	if (r)
+	if (r) {
 		vhost_iotlb_del_range(dev->iotlb, iova, iova + size - 1);
-	else
+		return r;
+	}
+
+	if (!vdpa->sva)
 		atomic64_add(size >> PAGE_SHIFT, &dev->mm->pinned_vm);
 
-	return r;
+	return 0;
 }
 
 static void vhost_vdpa_unmap(struct vhost_vdpa *v, u64 iova, u64 size)
@@ -589,6 +595,7 @@  static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v,
 					   struct vhost_iotlb_msg *msg)
 {
 	struct vhost_dev *dev = &v->vdev;
+	struct vdpa_device *vdpa = v->vdpa;
 	struct vhost_iotlb *iotlb = dev->iotlb;
 	struct page **page_list;
 	unsigned long list_size = PAGE_SIZE / sizeof(struct page *);
@@ -607,6 +614,10 @@  static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v,
 				    msg->iova + msg->size - 1))
 		return -EEXIST;
 
+	if (vdpa->sva)
+		return vhost_vdpa_map(v, msg->iova, msg->size,
+				      msg->uaddr, msg->perm);
+
 	/* Limit the use of memory for bookkeeping */
 	page_list = (struct page **) __get_free_page(GFP_KERNEL);
 	if (!page_list)
diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h
index cb5a3d847af3..f86869651614 100644
--- a/include/linux/vdpa.h
+++ b/include/linux/vdpa.h
@@ -44,6 +44,7 @@  struct vdpa_parent_dev;
  * @config: the configuration ops for this device.
  * @index: device index
  * @features_valid: were features initialized? for legacy guests
+ * @sva: indicate whether SVA (Shared Virtual Addressing) is supported
  * @nvqs: maximum number of supported virtqueues
  * @pdev: parent device pointer; caller must setup when registering device as part
  *	  of dev_add() parentdev ops callback before invoking _vdpa_register_device().
@@ -54,6 +55,7 @@  struct vdpa_device {
 	const struct vdpa_config_ops *config;
 	unsigned int index;
 	bool features_valid;
+	bool sva;
 	int nvqs;
 	struct vdpa_parent_dev *pdev;
 };
@@ -250,14 +252,16 @@  struct vdpa_config_ops {
 
 struct vdpa_device *__vdpa_alloc_device(struct device *parent,
 					const struct vdpa_config_ops *config,
-					int nvqs, size_t size, const char *name);
+					int nvqs, size_t size,
+					const char *name, bool sva);
 
-#define vdpa_alloc_device(dev_struct, member, parent, config, nvqs, name)   \
+#define vdpa_alloc_device(dev_struct, member, parent, config, \
+			  nvqs, name, sva) \
 			  container_of(__vdpa_alloc_device( \
 				       parent, config, nvqs, \
 				       sizeof(dev_struct) + \
 				       BUILD_BUG_ON_ZERO(offsetof( \
-				       dev_struct, member)), name), \
+				       dev_struct, member)), name, sva), \
 				       dev_struct, member)
 
 int vdpa_register_device(struct vdpa_device *vdev);