[RFC,6/6] vdpa_sim: add support for user VA

Message ID	20221214163025.103075-7-sgarzare@redhat.com (mailing list archive)
State	New, archived
Headers	show Return-Path: <kvm-owner@kernel.org> From: Stefano Garzarella <sgarzare@redhat.com> To: virtualization@lists.linux-foundation.org Cc: Jason Wang <jasowang@redhat.com>, Andrey Zhadchenko <andrey.zhadchenko@virtuozzo.com>, linux-kernel@vger.kernel.org, kvm@vger.kernel.org, "Michael S. Tsirkin" <mst@redhat.com>, eperezma@redhat.com, stefanha@redhat.com, netdev@vger.kernel.org, Stefano Garzarella <sgarzare@redhat.com> Subject: [RFC PATCH 6/6] vdpa_sim: add support for user VA Date: Wed, 14 Dec 2022 17:30:25 +0100 Message-Id: <20221214163025.103075-7-sgarzare@redhat.com> In-Reply-To: <20221214163025.103075-1-sgarzare@redhat.com> References: <20221214163025.103075-1-sgarzare@redhat.com> MIME-Version: 1.0 Content-type: text/plain Content-Transfer-Encoding: 8bit Precedence: bulk
Series	vdpa_sim: add support for user VA \| expand [RFC,0/6] vdpa_sim: add support for user VA [RFC,1/6] vdpa: add bind_mm callback [RFC,2/6] vhost-vdpa: use bind_mm device callback [RFC,3/6] vringh: support VA with iotlb [RFC,4/6] vdpa_sim: make devices agnostic for work management [RFC,5/6] vdpa_sim: use kthread worker [RFC,6/6] vdpa_sim: add support for user VA

Message ID

20221214163025.103075-7-sgarzare@redhat.com (mailing list archive)

State

New, archived

Headers

From: Stefano Garzarella <sgarzare@redhat.com>
To: virtualization@lists.linux-foundation.org
Cc: Jason Wang <jasowang@redhat.com>,
        Andrey Zhadchenko <andrey.zhadchenko@virtuozzo.com>,
        linux-kernel@vger.kernel.org, kvm@vger.kernel.org,
        "Michael S. Tsirkin" <mst@redhat.com>, eperezma@redhat.com,
        stefanha@redhat.com, netdev@vger.kernel.org,
        Stefano Garzarella <sgarzare@redhat.com>
Subject: [RFC PATCH 6/6] vdpa_sim: add support for user VA
Date: Wed, 14 Dec 2022 17:30:25 +0100
Message-Id: <20221214163025.103075-7-sgarzare@redhat.com>
In-Reply-To: <20221214163025.103075-1-sgarzare@redhat.com>
References: <20221214163025.103075-1-sgarzare@redhat.com>
MIME-Version: 1.0
Content-type: text/plain
Content-Transfer-Encoding: 8bit
Precedence: bulk

Series

vdpa_sim: add support for user VA | expand

Commit Message

Stefano Garzarella Dec. 14, 2022, 4:30 p.m. UTC

The new "use_va" module parameter (default: false) is used in
vdpa_alloc_device() to inform the vDPA framework that the device
supports VA.

vringh is initialized to use VA only when "use_va" is true and the
user's mm has been bound. So, only when the bus supports user VA
(e.g. vhost-vdpa).

vdpasim_mm_work_fn work is used to attach the kthread to the user
address space when the .bind_mm callback is invoked, and to detach
it when the device is reset.

Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
---
 drivers/vdpa/vdpa_sim/vdpa_sim.h |   1 +
 drivers/vdpa/vdpa_sim/vdpa_sim.c | 104 ++++++++++++++++++++++++++++++-
 2 files changed, 103 insertions(+), 2 deletions(-)

Comments

Jason Wang Dec. 16, 2022, 7:26 a.m. UTC | #1

On Thu, Dec 15, 2022 at 12:31 AM Stefano Garzarella <sgarzare@redhat.com> wrote:
>
> The new "use_va" module parameter (default: false) is used in
> vdpa_alloc_device() to inform the vDPA framework that the device
> supports VA.
>
> vringh is initialized to use VA only when "use_va" is true and the
> user's mm has been bound. So, only when the bus supports user VA
> (e.g. vhost-vdpa).
>
> vdpasim_mm_work_fn work is used to attach the kthread to the user
> address space when the .bind_mm callback is invoked, and to detach
> it when the device is reset.

One thing in my mind is that the current datapath is running under
spinlock which prevents us from using iov_iter (which may have page
faults).

We need to get rid of the spinlock first.

>
> Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
> ---
>  drivers/vdpa/vdpa_sim/vdpa_sim.h |   1 +
>  drivers/vdpa/vdpa_sim/vdpa_sim.c | 104 ++++++++++++++++++++++++++++++-
>  2 files changed, 103 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.h b/drivers/vdpa/vdpa_sim/vdpa_sim.h
> index 07ef53ea375e..1b010e5c0445 100644
> --- a/drivers/vdpa/vdpa_sim/vdpa_sim.h
> +++ b/drivers/vdpa/vdpa_sim/vdpa_sim.h
> @@ -55,6 +55,7 @@ struct vdpasim {
>         struct vdpasim_virtqueue *vqs;
>         struct kthread_worker *worker;
>         struct kthread_work work;
> +       struct mm_struct *mm_bound;
>         struct vdpasim_dev_attr dev_attr;
>         /* spinlock to synchronize virtqueue state */
>         spinlock_t lock;
> diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.c b/drivers/vdpa/vdpa_sim/vdpa_sim.c
> index 36a1d2e0a6ba..6e07cedef30c 100644
> --- a/drivers/vdpa/vdpa_sim/vdpa_sim.c
> +++ b/drivers/vdpa/vdpa_sim/vdpa_sim.c
> @@ -36,10 +36,90 @@ module_param(max_iotlb_entries, int, 0444);
>  MODULE_PARM_DESC(max_iotlb_entries,
>                  "Maximum number of iotlb entries for each address space. 0 means unlimited. (default: 2048)");
>
> +static bool use_va;
> +module_param(use_va, bool, 0444);
> +MODULE_PARM_DESC(use_va, "Enable the device's ability to use VA");
> +
>  #define VDPASIM_QUEUE_ALIGN PAGE_SIZE
>  #define VDPASIM_QUEUE_MAX 256
>  #define VDPASIM_VENDOR_ID 0
>
> +struct vdpasim_mm_work {
> +       struct kthread_work work;
> +       struct task_struct *owner;
> +       struct mm_struct *mm;
> +       bool bind;
> +       int ret;
> +};
> +
> +static void vdpasim_mm_work_fn(struct kthread_work *work)
> +{
> +       struct vdpasim_mm_work *mm_work =
> +               container_of(work, struct vdpasim_mm_work, work);
> +
> +       mm_work->ret = 0;
> +
> +       if (mm_work->bind) {
> +               kthread_use_mm(mm_work->mm);
> +#if 0
> +               if (mm_work->owner)
> +                       mm_work->ret = cgroup_attach_task_all(mm_work->owner,
> +                                                             current);
> +#endif
> +       } else {
> +#if 0
> +               //TODO: check it
> +               cgroup_release(current);
> +#endif
> +               kthread_unuse_mm(mm_work->mm);
> +       }
> +}
> +
> +static void vdpasim_worker_queue_mm(struct vdpasim *vdpasim,
> +                                   struct vdpasim_mm_work *mm_work)
> +{
> +       struct kthread_work *work = &mm_work->work;
> +
> +       kthread_init_work(work, vdpasim_mm_work_fn);
> +       kthread_queue_work(vdpasim->worker, work);
> +
> +       spin_unlock(&vdpasim->lock);
> +       kthread_flush_work(work);
> +       spin_lock(&vdpasim->lock);
> +}
> +
> +static int vdpasim_worker_bind_mm(struct vdpasim *vdpasim,
> +                                 struct mm_struct *new_mm,
> +                                 struct task_struct *owner)
> +{
> +       struct vdpasim_mm_work mm_work;
> +
> +       mm_work.owner = owner;
> +       mm_work.mm = new_mm;
> +       mm_work.bind = true;
> +
> +       vdpasim_worker_queue_mm(vdpasim, &mm_work);
> +

Should we wait for the work to be finished?

> +       if (!mm_work.ret)
> +               vdpasim->mm_bound = new_mm;
> +
> +       return mm_work.ret;
> +}
> +
> +static void vdpasim_worker_unbind_mm(struct vdpasim *vdpasim)
> +{
> +       struct vdpasim_mm_work mm_work;
> +
> +       if (!vdpasim->mm_bound)
> +               return;
> +
> +       mm_work.mm = vdpasim->mm_bound;
> +       mm_work.bind = false;
> +
> +       vdpasim_worker_queue_mm(vdpasim, &mm_work);
> +
> +       vdpasim->mm_bound = NULL;
> +}
>  static struct vdpasim *vdpa_to_sim(struct vdpa_device *vdpa)
>  {
>         return container_of(vdpa, struct vdpasim, vdpa);
> @@ -66,8 +146,10 @@ static void vdpasim_vq_notify(struct vringh *vring)
>  static void vdpasim_queue_ready(struct vdpasim *vdpasim, unsigned int idx)
>  {
>         struct vdpasim_virtqueue *vq = &vdpasim->vqs[idx];
> +       bool va_enabled = use_va && vdpasim->mm_bound;
>
> -       vringh_init_iotlb(&vq->vring, vdpasim->features, vq->num, false, false,
> +       vringh_init_iotlb(&vq->vring, vdpasim->features, vq->num, false,
> +                         va_enabled,
>                           (struct vring_desc *)(uintptr_t)vq->desc_addr,
>                           (struct vring_avail *)
>                           (uintptr_t)vq->driver_addr,
> @@ -96,6 +178,9 @@ static void vdpasim_do_reset(struct vdpasim *vdpasim)
>  {
>         int i;
>
> +       //TODO: should we cancel the works?
> +       vdpasim_worker_unbind_mm(vdpasim);

We probably don't need this since it's the virtio level reset so we
need to keep the mm bound in this case. Otherwise we may break the
guest. It should be the responsibility of the driver to call
config_ops->unbind if it needs to do that.

Thanks


> +
>         spin_lock(&vdpasim->iommu_lock);
>
>         for (i = 0; i < vdpasim->dev_attr.nvqs; i++) {
> @@ -275,7 +360,7 @@ struct vdpasim *vdpasim_create(struct vdpasim_dev_attr *dev_attr,
>
>         vdpasim = vdpa_alloc_device(struct vdpasim, vdpa, NULL, ops,
>                                     dev_attr->ngroups, dev_attr->nas,
> -                                   dev_attr->name, false);
> +                                   dev_attr->name, use_va);
>         if (IS_ERR(vdpasim)) {
>                 ret = PTR_ERR(vdpasim);
>                 goto err_alloc;
> @@ -657,6 +742,19 @@ static int vdpasim_set_map(struct vdpa_device *vdpa, unsigned int asid,
>         return ret;
>  }
>
> +static int vdpasim_bind_mm(struct vdpa_device *vdpa, struct mm_struct *mm,
> +                          struct task_struct *owner)
> +{
> +       struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
> +       int ret;
> +
> +       spin_lock(&vdpasim->lock);
> +       ret = vdpasim_worker_bind_mm(vdpasim, mm, owner);
> +       spin_unlock(&vdpasim->lock);
> +
> +       return ret;
> +}
> +
>  static int vdpasim_dma_map(struct vdpa_device *vdpa, unsigned int asid,
>                            u64 iova, u64 size,
>                            u64 pa, u32 perm, void *opaque)
> @@ -744,6 +842,7 @@ static const struct vdpa_config_ops vdpasim_config_ops = {
>         .set_group_asid         = vdpasim_set_group_asid,
>         .dma_map                = vdpasim_dma_map,
>         .dma_unmap              = vdpasim_dma_unmap,
> +       .bind_mm                = vdpasim_bind_mm,
>         .free                   = vdpasim_free,
>  };
>
> @@ -776,6 +875,7 @@ static const struct vdpa_config_ops vdpasim_batch_config_ops = {
>         .get_iova_range         = vdpasim_get_iova_range,
>         .set_group_asid         = vdpasim_set_group_asid,
>         .set_map                = vdpasim_set_map,
> +       .bind_mm                = vdpasim_bind_mm,
>         .free                   = vdpasim_free,
>  };
>
> --
> 2.38.1
>

Stefano Garzarella Dec. 16, 2022, 8:13 a.m. UTC | #2

On Fri, Dec 16, 2022 at 03:26:46PM +0800, Jason Wang wrote:
>On Thu, Dec 15, 2022 at 12:31 AM Stefano Garzarella <sgarzare@redhat.com> wrote:
>>
>> The new "use_va" module parameter (default: false) is used in
>> vdpa_alloc_device() to inform the vDPA framework that the device
>> supports VA.
>>
>> vringh is initialized to use VA only when "use_va" is true and the
>> user's mm has been bound. So, only when the bus supports user VA
>> (e.g. vhost-vdpa).
>>
>> vdpasim_mm_work_fn work is used to attach the kthread to the user
>> address space when the .bind_mm callback is invoked, and to detach
>> it when the device is reset.
>
>One thing in my mind is that the current datapath is running under
>spinlock which prevents us from using iov_iter (which may have page
>faults).
>
>We need to get rid of the spinlock first.

Right! I already have a patch for that since I used for the vdpa-blk 
software device in-kernel PoC where I had the same issue.

I'll add it to the series!

>
>>
>> Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
>> ---
>>  drivers/vdpa/vdpa_sim/vdpa_sim.h |   1 +
>>  drivers/vdpa/vdpa_sim/vdpa_sim.c | 104 ++++++++++++++++++++++++++++++-
>>  2 files changed, 103 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.h b/drivers/vdpa/vdpa_sim/vdpa_sim.h
>> index 07ef53ea375e..1b010e5c0445 100644
>> --- a/drivers/vdpa/vdpa_sim/vdpa_sim.h
>> +++ b/drivers/vdpa/vdpa_sim/vdpa_sim.h
>> @@ -55,6 +55,7 @@ struct vdpasim {
>>         struct vdpasim_virtqueue *vqs;
>>         struct kthread_worker *worker;
>>         struct kthread_work work;
>> +       struct mm_struct *mm_bound;
>>         struct vdpasim_dev_attr dev_attr;
>>         /* spinlock to synchronize virtqueue state */
>>         spinlock_t lock;
>> diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.c b/drivers/vdpa/vdpa_sim/vdpa_sim.c
>> index 36a1d2e0a6ba..6e07cedef30c 100644
>> --- a/drivers/vdpa/vdpa_sim/vdpa_sim.c
>> +++ b/drivers/vdpa/vdpa_sim/vdpa_sim.c
>> @@ -36,10 +36,90 @@ module_param(max_iotlb_entries, int, 0444);
>>  MODULE_PARM_DESC(max_iotlb_entries,
>>                  "Maximum number of iotlb entries for each address space. 0 means unlimited. (default: 2048)");
>>
>> +static bool use_va;
>> +module_param(use_va, bool, 0444);
>> +MODULE_PARM_DESC(use_va, "Enable the device's ability to use VA");
>> +
>>  #define VDPASIM_QUEUE_ALIGN PAGE_SIZE
>>  #define VDPASIM_QUEUE_MAX 256
>>  #define VDPASIM_VENDOR_ID 0
>>
>> +struct vdpasim_mm_work {
>> +       struct kthread_work work;
>> +       struct task_struct *owner;
>> +       struct mm_struct *mm;
>> +       bool bind;
>> +       int ret;
>> +};
>> +
>> +static void vdpasim_mm_work_fn(struct kthread_work *work)
>> +{
>> +       struct vdpasim_mm_work *mm_work =
>> +               container_of(work, struct vdpasim_mm_work, work);
>> +
>> +       mm_work->ret = 0;
>> +
>> +       if (mm_work->bind) {
>> +               kthread_use_mm(mm_work->mm);
>> +#if 0
>> +               if (mm_work->owner)
>> +                       mm_work->ret = cgroup_attach_task_all(mm_work->owner,
>> +                                                             current);
>> +#endif
>> +       } else {
>> +#if 0
>> +               //TODO: check it
>> +               cgroup_release(current);
>> +#endif
>> +               kthread_unuse_mm(mm_work->mm);
>> +       }
>> +}
>> +
>> +static void vdpasim_worker_queue_mm(struct vdpasim *vdpasim,
>> +                                   struct vdpasim_mm_work *mm_work)
>> +{
>> +       struct kthread_work *work = &mm_work->work;
>> +
>> +       kthread_init_work(work, vdpasim_mm_work_fn);
>> +       kthread_queue_work(vdpasim->worker, work);
>> +
>> +       spin_unlock(&vdpasim->lock);
>> +       kthread_flush_work(work);
>> +       spin_lock(&vdpasim->lock);
>> +}
>> +
>> +static int vdpasim_worker_bind_mm(struct vdpasim *vdpasim,
>> +                                 struct mm_struct *new_mm,
>> +                                 struct task_struct *owner)
>> +{
>> +       struct vdpasim_mm_work mm_work;
>> +
>> +       mm_work.owner = owner;
>> +       mm_work.mm = new_mm;
>> +       mm_work.bind = true;
>> +
>> +       vdpasim_worker_queue_mm(vdpasim, &mm_work);
>> +
>
>Should we wait for the work to be finished?

Yep, I'm waiting inside vdpasim_worker_queue_mm() calling 
kthread_flush_work().

If we will use mutex, I think we can avoid the lock release around that 
call.

>
>> +       if (!mm_work.ret)
>> +               vdpasim->mm_bound = new_mm;
>> +
>> +       return mm_work.ret;
>> +}
>> +
>> +static void vdpasim_worker_unbind_mm(struct vdpasim *vdpasim)
>> +{
>> +       struct vdpasim_mm_work mm_work;
>> +
>> +       if (!vdpasim->mm_bound)
>> +               return;
>> +
>> +       mm_work.mm = vdpasim->mm_bound;
>> +       mm_work.bind = false;
>> +
>> +       vdpasim_worker_queue_mm(vdpasim, &mm_work);
>> +
>> +       vdpasim->mm_bound = NULL;
>> +}
>>  static struct vdpasim *vdpa_to_sim(struct vdpa_device *vdpa)
>>  {
>>         return container_of(vdpa, struct vdpasim, vdpa);
>> @@ -66,8 +146,10 @@ static void vdpasim_vq_notify(struct vringh *vring)
>>  static void vdpasim_queue_ready(struct vdpasim *vdpasim, unsigned int idx)
>>  {
>>         struct vdpasim_virtqueue *vq = &vdpasim->vqs[idx];
>> +       bool va_enabled = use_va && vdpasim->mm_bound;
>>
>> -       vringh_init_iotlb(&vq->vring, vdpasim->features, vq->num, false, false,
>> +       vringh_init_iotlb(&vq->vring, vdpasim->features, vq->num, false,
>> +                         va_enabled,
>>                           (struct vring_desc *)(uintptr_t)vq->desc_addr,
>>                           (struct vring_avail *)
>>                           (uintptr_t)vq->driver_addr,
>> @@ -96,6 +178,9 @@ static void vdpasim_do_reset(struct vdpasim *vdpasim)
>>  {
>>         int i;
>>
>> +       //TODO: should we cancel the works?
>> +       vdpasim_worker_unbind_mm(vdpasim);
>
>We probably don't need this since it's the virtio level reset so we
>need to keep the mm bound in this case. Otherwise we may break the
>guest. It should be the responsibility of the driver to call
>config_ops->unbind if it needs to do that.

Got it, my biggest concern was when we go from a vhost-vdpa virtio-vdpa, 
but as you said, in vhost-vdpa I can call unbind before releasing the 
device.

Thanks,
Stefano

Eugenio Perez Martin Dec. 21, 2022, 7:17 a.m. UTC | #3

On Wed, Dec 14, 2022 at 5:31 PM Stefano Garzarella <sgarzare@redhat.com> wrote:
>
> The new "use_va" module parameter (default: false) is used in

Why not true by default? I'd say it makes more sense for the simulator
to use va mode and only use pa for testing it.

> vdpa_alloc_device() to inform the vDPA framework that the device
> supports VA.
>
> vringh is initialized to use VA only when "use_va" is true and the
> user's mm has been bound. So, only when the bus supports user VA
> (e.g. vhost-vdpa).
>
> vdpasim_mm_work_fn work is used to attach the kthread to the user
> address space when the .bind_mm callback is invoked, and to detach
> it when the device is reset.
>
> Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
> ---
>  drivers/vdpa/vdpa_sim/vdpa_sim.h |   1 +
>  drivers/vdpa/vdpa_sim/vdpa_sim.c | 104 ++++++++++++++++++++++++++++++-
>  2 files changed, 103 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.h b/drivers/vdpa/vdpa_sim/vdpa_sim.h
> index 07ef53ea375e..1b010e5c0445 100644
> --- a/drivers/vdpa/vdpa_sim/vdpa_sim.h
> +++ b/drivers/vdpa/vdpa_sim/vdpa_sim.h
> @@ -55,6 +55,7 @@ struct vdpasim {
>         struct vdpasim_virtqueue *vqs;
>         struct kthread_worker *worker;
>         struct kthread_work work;
> +       struct mm_struct *mm_bound;
>         struct vdpasim_dev_attr dev_attr;
>         /* spinlock to synchronize virtqueue state */
>         spinlock_t lock;
> diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.c b/drivers/vdpa/vdpa_sim/vdpa_sim.c
> index 36a1d2e0a6ba..6e07cedef30c 100644
> --- a/drivers/vdpa/vdpa_sim/vdpa_sim.c
> +++ b/drivers/vdpa/vdpa_sim/vdpa_sim.c
> @@ -36,10 +36,90 @@ module_param(max_iotlb_entries, int, 0444);
>  MODULE_PARM_DESC(max_iotlb_entries,
>                  "Maximum number of iotlb entries for each address space. 0 means unlimited. (default: 2048)");
>
> +static bool use_va;
> +module_param(use_va, bool, 0444);
> +MODULE_PARM_DESC(use_va, "Enable the device's ability to use VA");
> +
>  #define VDPASIM_QUEUE_ALIGN PAGE_SIZE
>  #define VDPASIM_QUEUE_MAX 256
>  #define VDPASIM_VENDOR_ID 0
>
> +struct vdpasim_mm_work {
> +       struct kthread_work work;
> +       struct task_struct *owner;
> +       struct mm_struct *mm;
> +       bool bind;
> +       int ret;
> +};
> +
> +static void vdpasim_mm_work_fn(struct kthread_work *work)
> +{
> +       struct vdpasim_mm_work *mm_work =
> +               container_of(work, struct vdpasim_mm_work, work);
> +
> +       mm_work->ret = 0;
> +
> +       if (mm_work->bind) {
> +               kthread_use_mm(mm_work->mm);
> +#if 0
> +               if (mm_work->owner)
> +                       mm_work->ret = cgroup_attach_task_all(mm_work->owner,
> +                                                             current);
> +#endif
> +       } else {
> +#if 0
> +               //TODO: check it
> +               cgroup_release(current);
> +#endif
> +               kthread_unuse_mm(mm_work->mm);
> +       }
> +}
> +
> +static void vdpasim_worker_queue_mm(struct vdpasim *vdpasim,
> +                                   struct vdpasim_mm_work *mm_work)
> +{
> +       struct kthread_work *work = &mm_work->work;
> +
> +       kthread_init_work(work, vdpasim_mm_work_fn);
> +       kthread_queue_work(vdpasim->worker, work);
> +
> +       spin_unlock(&vdpasim->lock);
> +       kthread_flush_work(work);
> +       spin_lock(&vdpasim->lock);
> +}
> +
> +static int vdpasim_worker_bind_mm(struct vdpasim *vdpasim,
> +                                 struct mm_struct *new_mm,
> +                                 struct task_struct *owner)
> +{
> +       struct vdpasim_mm_work mm_work;
> +
> +       mm_work.owner = owner;
> +       mm_work.mm = new_mm;
> +       mm_work.bind = true;
> +
> +       vdpasim_worker_queue_mm(vdpasim, &mm_work);
> +
> +       if (!mm_work.ret)
> +               vdpasim->mm_bound = new_mm;
> +
> +       return mm_work.ret;
> +}
> +
> +static void vdpasim_worker_unbind_mm(struct vdpasim *vdpasim)
> +{
> +       struct vdpasim_mm_work mm_work;
> +
> +       if (!vdpasim->mm_bound)
> +               return;
> +
> +       mm_work.mm = vdpasim->mm_bound;
> +       mm_work.bind = false;
> +
> +       vdpasim_worker_queue_mm(vdpasim, &mm_work);
> +
> +       vdpasim->mm_bound = NULL;
> +}
>  static struct vdpasim *vdpa_to_sim(struct vdpa_device *vdpa)
>  {
>         return container_of(vdpa, struct vdpasim, vdpa);
> @@ -66,8 +146,10 @@ static void vdpasim_vq_notify(struct vringh *vring)
>  static void vdpasim_queue_ready(struct vdpasim *vdpasim, unsigned int idx)
>  {
>         struct vdpasim_virtqueue *vq = &vdpasim->vqs[idx];
> +       bool va_enabled = use_va && vdpasim->mm_bound;
>
> -       vringh_init_iotlb(&vq->vring, vdpasim->features, vq->num, false, false,
> +       vringh_init_iotlb(&vq->vring, vdpasim->features, vq->num, false,
> +                         va_enabled,
>                           (struct vring_desc *)(uintptr_t)vq->desc_addr,
>                           (struct vring_avail *)
>                           (uintptr_t)vq->driver_addr,
> @@ -96,6 +178,9 @@ static void vdpasim_do_reset(struct vdpasim *vdpasim)
>  {
>         int i;
>
> +       //TODO: should we cancel the works?
> +       vdpasim_worker_unbind_mm(vdpasim);
> +
>         spin_lock(&vdpasim->iommu_lock);
>
>         for (i = 0; i < vdpasim->dev_attr.nvqs; i++) {
> @@ -275,7 +360,7 @@ struct vdpasim *vdpasim_create(struct vdpasim_dev_attr *dev_attr,
>
>         vdpasim = vdpa_alloc_device(struct vdpasim, vdpa, NULL, ops,
>                                     dev_attr->ngroups, dev_attr->nas,
> -                                   dev_attr->name, false);
> +                                   dev_attr->name, use_va);
>         if (IS_ERR(vdpasim)) {
>                 ret = PTR_ERR(vdpasim);
>                 goto err_alloc;
> @@ -657,6 +742,19 @@ static int vdpasim_set_map(struct vdpa_device *vdpa, unsigned int asid,
>         return ret;
>  }
>
> +static int vdpasim_bind_mm(struct vdpa_device *vdpa, struct mm_struct *mm,
> +                          struct task_struct *owner)
> +{
> +       struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
> +       int ret;
> +
> +       spin_lock(&vdpasim->lock);
> +       ret = vdpasim_worker_bind_mm(vdpasim, mm, owner);
> +       spin_unlock(&vdpasim->lock);
> +
> +       return ret;
> +}
> +
>  static int vdpasim_dma_map(struct vdpa_device *vdpa, unsigned int asid,
>                            u64 iova, u64 size,
>                            u64 pa, u32 perm, void *opaque)
> @@ -744,6 +842,7 @@ static const struct vdpa_config_ops vdpasim_config_ops = {
>         .set_group_asid         = vdpasim_set_group_asid,
>         .dma_map                = vdpasim_dma_map,
>         .dma_unmap              = vdpasim_dma_unmap,
> +       .bind_mm                = vdpasim_bind_mm,
>         .free                   = vdpasim_free,
>  };
>
> @@ -776,6 +875,7 @@ static const struct vdpa_config_ops vdpasim_batch_config_ops = {
>         .get_iova_range         = vdpasim_get_iova_range,
>         .set_group_asid         = vdpasim_set_group_asid,
>         .set_map                = vdpasim_set_map,
> +       .bind_mm                = vdpasim_bind_mm,
>         .free                   = vdpasim_free,
>  };
>
> --
> 2.38.1
>

Stefano Garzarella Dec. 21, 2022, 9:50 a.m. UTC | #4

On Wed, Dec 21, 2022 at 08:17:41AM +0100, Eugenio Perez Martin wrote:
>On Wed, Dec 14, 2022 at 5:31 PM Stefano Garzarella <sgarzare@redhat.com> wrote:
>>
>> The new "use_va" module parameter (default: false) is used in
>
>Why not true by default? I'd say it makes more sense for the simulator
>to use va mode and only use pa for testing it.

Yep, you are right. I'll change it in the version.
I initially left it at false because we usually use the simulator to 
test the paths that would be used for the real hardware.

Thanks,
Stefano

diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.h b/drivers/vdpa/vdpa_sim/vdpa_sim.h
index 07ef53ea375e..1b010e5c0445 100644
--- a/drivers/vdpa/vdpa_sim/vdpa_sim.h
+++ b/drivers/vdpa/vdpa_sim/vdpa_sim.h
@@ -55,6 +55,7 @@  struct vdpasim {
 	struct vdpasim_virtqueue *vqs;
 	struct kthread_worker *worker;
 	struct kthread_work work;
+	struct mm_struct *mm_bound;
 	struct vdpasim_dev_attr dev_attr;
 	/* spinlock to synchronize virtqueue state */
 	spinlock_t lock;
diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.c b/drivers/vdpa/vdpa_sim/vdpa_sim.c
index 36a1d2e0a6ba..6e07cedef30c 100644
--- a/drivers/vdpa/vdpa_sim/vdpa_sim.c
+++ b/drivers/vdpa/vdpa_sim/vdpa_sim.c
@@ -36,10 +36,90 @@  module_param(max_iotlb_entries, int, 0444);
 MODULE_PARM_DESC(max_iotlb_entries,
 		 "Maximum number of iotlb entries for each address space. 0 means unlimited. (default: 2048)");
 
+static bool use_va;
+module_param(use_va, bool, 0444);
+MODULE_PARM_DESC(use_va, "Enable the device's ability to use VA");
+
 #define VDPASIM_QUEUE_ALIGN PAGE_SIZE
 #define VDPASIM_QUEUE_MAX 256
 #define VDPASIM_VENDOR_ID 0
 
+struct vdpasim_mm_work {
+	struct kthread_work work;
+	struct task_struct *owner;
+	struct mm_struct *mm;
+	bool bind;
+	int ret;
+};
+
+static void vdpasim_mm_work_fn(struct kthread_work *work)
+{
+	struct vdpasim_mm_work *mm_work =
+		container_of(work, struct vdpasim_mm_work, work);
+
+	mm_work->ret = 0;
+
+	if (mm_work->bind) {
+		kthread_use_mm(mm_work->mm);
+#if 0
+		if (mm_work->owner)
+			mm_work->ret = cgroup_attach_task_all(mm_work->owner,
+							      current);
+#endif
+	} else {
+#if 0
+		//TODO: check it
+		cgroup_release(current);
+#endif
+		kthread_unuse_mm(mm_work->mm);
+	}
+}
+
+static void vdpasim_worker_queue_mm(struct vdpasim *vdpasim,
+				    struct vdpasim_mm_work *mm_work)
+{
+	struct kthread_work *work = &mm_work->work;
+
+	kthread_init_work(work, vdpasim_mm_work_fn);
+	kthread_queue_work(vdpasim->worker, work);
+
+	spin_unlock(&vdpasim->lock);
+	kthread_flush_work(work);
+	spin_lock(&vdpasim->lock);
+}
+
+static int vdpasim_worker_bind_mm(struct vdpasim *vdpasim,
+				  struct mm_struct *new_mm,
+				  struct task_struct *owner)
+{
+	struct vdpasim_mm_work mm_work;
+
+	mm_work.owner = owner;
+	mm_work.mm = new_mm;
+	mm_work.bind = true;
+
+	vdpasim_worker_queue_mm(vdpasim, &mm_work);
+
+	if (!mm_work.ret)
+		vdpasim->mm_bound = new_mm;
+
+	return mm_work.ret;
+}
+
+static void vdpasim_worker_unbind_mm(struct vdpasim *vdpasim)
+{
+	struct vdpasim_mm_work mm_work;
+
+	if (!vdpasim->mm_bound)
+		return;
+
+	mm_work.mm = vdpasim->mm_bound;
+	mm_work.bind = false;
+
+	vdpasim_worker_queue_mm(vdpasim, &mm_work);
+
+	vdpasim->mm_bound = NULL;
+}
 static struct vdpasim *vdpa_to_sim(struct vdpa_device *vdpa)
 {
 	return container_of(vdpa, struct vdpasim, vdpa);
@@ -66,8 +146,10 @@  static void vdpasim_vq_notify(struct vringh *vring)
 static void vdpasim_queue_ready(struct vdpasim *vdpasim, unsigned int idx)
 {
 	struct vdpasim_virtqueue *vq = &vdpasim->vqs[idx];
+	bool va_enabled = use_va && vdpasim->mm_bound;
 
-	vringh_init_iotlb(&vq->vring, vdpasim->features, vq->num, false, false,
+	vringh_init_iotlb(&vq->vring, vdpasim->features, vq->num, false,
+			  va_enabled,
 			  (struct vring_desc *)(uintptr_t)vq->desc_addr,
 			  (struct vring_avail *)
 			  (uintptr_t)vq->driver_addr,
@@ -96,6 +178,9 @@  static void vdpasim_do_reset(struct vdpasim *vdpasim)
 {
 	int i;
 
+	//TODO: should we cancel the works?
+	vdpasim_worker_unbind_mm(vdpasim);
+
 	spin_lock(&vdpasim->iommu_lock);
 
 	for (i = 0; i < vdpasim->dev_attr.nvqs; i++) {
@@ -275,7 +360,7 @@  struct vdpasim *vdpasim_create(struct vdpasim_dev_attr *dev_attr,
 
 	vdpasim = vdpa_alloc_device(struct vdpasim, vdpa, NULL, ops,
 				    dev_attr->ngroups, dev_attr->nas,
-				    dev_attr->name, false);
+				    dev_attr->name, use_va);
 	if (IS_ERR(vdpasim)) {
 		ret = PTR_ERR(vdpasim);
 		goto err_alloc;
@@ -657,6 +742,19 @@  static int vdpasim_set_map(struct vdpa_device *vdpa, unsigned int asid,
 	return ret;
 }
 
+static int vdpasim_bind_mm(struct vdpa_device *vdpa, struct mm_struct *mm,
+			   struct task_struct *owner)
+{
+	struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
+	int ret;
+
+	spin_lock(&vdpasim->lock);
+	ret = vdpasim_worker_bind_mm(vdpasim, mm, owner);
+	spin_unlock(&vdpasim->lock);
+
+	return ret;
+}
+
 static int vdpasim_dma_map(struct vdpa_device *vdpa, unsigned int asid,
 			   u64 iova, u64 size,
 			   u64 pa, u32 perm, void *opaque)
@@ -744,6 +842,7 @@  static const struct vdpa_config_ops vdpasim_config_ops = {
 	.set_group_asid         = vdpasim_set_group_asid,
 	.dma_map                = vdpasim_dma_map,
 	.dma_unmap              = vdpasim_dma_unmap,
+	.bind_mm		= vdpasim_bind_mm,
 	.free                   = vdpasim_free,
 };
 
@@ -776,6 +875,7 @@  static const struct vdpa_config_ops vdpasim_batch_config_ops = {
 	.get_iova_range         = vdpasim_get_iova_range,
 	.set_group_asid         = vdpasim_set_group_asid,
 	.set_map                = vdpasim_set_map,
+	.bind_mm		= vdpasim_bind_mm,
 	.free                   = vdpasim_free,
 };

[RFC,6/6] vdpa_sim: add support for user VA

Commit Message

Comments

Patch